Add test suite for scrapers, matcher, and API

Tests for RawProduct/ScrapeResult dataclasses, product name
normalizer, cross-store matcher (EAN, fuzzy, unit validation),
and FastAPI endpoints with mocked database sessions.
This commit is contained in:
authentik Default Admin 2026-02-11 08:44:19 +00:00
parent 8feea63abe
commit f9c4389f5a
9 changed files with 1063 additions and 0 deletions

0
tests/__init__.py Normal file
View file

174
tests/conftest.py Normal file
View file

@ -0,0 +1,174 @@
"""Shared fixtures for SmartCart tests."""
from __future__ import annotations
from datetime import datetime
from decimal import Decimal
from unittest.mock import AsyncMock, MagicMock
import pytest
from httpx import ASGITransport, AsyncClient
from src.api.main import app
from src.core.database import get_session
from src.core.models import Category, PriceRecord, Product, ScrapeRun, Store, StoreProduct
# ---------------------------------------------------------------------------
# Mock async session
# ---------------------------------------------------------------------------
@pytest.fixture()
def mock_session():
"""Return an ``AsyncMock`` that behaves like an ``AsyncSession``.
Individual tests can configure ``session.execute.return_value`` to
control query results.
"""
session = AsyncMock()
# By default .execute() returns a result whose .scalars().all() is empty
result_mock = MagicMock()
result_mock.scalars.return_value.all.return_value = []
result_mock.scalar_one_or_none.return_value = None
result_mock.scalar_one.return_value = 0
session.execute.return_value = result_mock
session.get.return_value = None
return session
# ---------------------------------------------------------------------------
# FastAPI test client
# ---------------------------------------------------------------------------
@pytest.fixture()
async def client(mock_session):
"""Provide an ``httpx.AsyncClient`` wired to the FastAPI app with the
database session dependency overridden by ``mock_session``."""
async def _override_get_session():
yield mock_session
app.dependency_overrides[get_session] = _override_get_session
transport = ASGITransport(app=app)
async with AsyncClient(transport=transport, base_url="http://test") as ac:
yield ac
app.dependency_overrides.clear()
# ---------------------------------------------------------------------------
# Sample domain objects
# ---------------------------------------------------------------------------
@pytest.fixture()
def sample_store() -> Store:
store = Store(
id=1,
name="Tesco",
slug="tesco",
base_url="https://www.tesco.ie",
logo_url="https://www.tesco.ie/logo.png",
)
return store
@pytest.fixture()
def sample_store_2() -> Store:
store = Store(
id=2,
name="SuperValu",
slug="supervalu",
base_url="https://www.supervalu.ie",
logo_url=None,
)
return store
@pytest.fixture()
def sample_category() -> Category:
return Category(
id=1,
name="Dairy",
slug="dairy",
)
@pytest.fixture()
def sample_product(sample_category) -> Product:
product = Product(
id=1,
name="Avonmore Full Cream Milk 2L",
brand="Avonmore",
ean="5391516590123",
category_id=sample_category.id,
unit="l",
unit_size=Decimal("2"),
image_url="https://example.com/milk.jpg",
created_at=datetime(2025, 1, 1),
)
product.category = sample_category
return product
@pytest.fixture()
def sample_product_no_ean() -> Product:
product = Product(
id=2,
name="Kerrygold Butter 250g",
brand="Kerrygold",
ean=None,
category_id=None,
unit="g",
unit_size=Decimal("250"),
image_url=None,
created_at=datetime(2025, 1, 2),
)
product.category = None
return product
@pytest.fixture()
def sample_store_product(sample_product, sample_store) -> StoreProduct:
sp = StoreProduct(
id=1,
product_id=sample_product.id,
store_id=sample_store.id,
store_sku="TESCO-12345",
store_name="Avonmore Fresh Milk 2 Litre",
store_url="https://www.tesco.ie/product/12345",
is_active=True,
)
sp.product = sample_product
sp.store = sample_store
return sp
@pytest.fixture()
def sample_price_record(sample_store_product) -> PriceRecord:
return PriceRecord(
id=1,
store_product_id=sample_store_product.id,
price=Decimal("2.49"),
promo_price=Decimal("1.99"),
promo_label="Save 50c",
unit_price=Decimal("0.9950"),
in_stock=True,
scraped_at=datetime(2025, 6, 1, 10, 0, 0),
)
@pytest.fixture()
def sample_scrape_run(sample_store) -> ScrapeRun:
return ScrapeRun(
id=1,
store_id=sample_store.id,
started_at=datetime(2025, 6, 1, 22, 0, 0),
finished_at=datetime(2025, 6, 1, 22, 15, 0),
status="done",
products_scraped=150,
errors=None,
)

View file

View file

@ -0,0 +1,241 @@
"""Tests for the SmartCart FastAPI endpoints."""
from __future__ import annotations
from decimal import Decimal
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from httpx import ASGITransport, AsyncClient
from src.api.main import app
from src.api.schemas import CategoryOut, ProductOut, StoreOut
from src.core.database import get_session
from src.core.models import Category, Product, Store
# =========================================================================
# /health
# =========================================================================
class TestHealthCheck:
"""Tests for ``GET /health``."""
async def test_health_check(self, client):
response = await client.get("/health")
assert response.status_code == 200
data = response.json()
assert data == {"status": "ok"}
# =========================================================================
# /
# =========================================================================
class TestRoot:
"""Tests for ``GET /`` redirect."""
async def test_root_redirects_to_docs(self, client):
response = await client.get("/", follow_redirects=False)
assert response.status_code == 307
assert response.headers["location"] == "/docs"
# =========================================================================
# /api/stores
# =========================================================================
class TestListStores:
"""Tests for ``GET /api/stores``."""
async def test_list_stores_empty(self, client, mock_session):
"""When the database has no stores, return an empty list."""
result_mock = MagicMock()
result_mock.scalars.return_value.all.return_value = []
mock_session.execute.return_value = result_mock
response = await client.get("/api/stores")
assert response.status_code == 200
assert response.json() == []
async def test_list_stores(self, client, mock_session, sample_store, sample_store_2):
"""Return a list of stores from the database."""
result_mock = MagicMock()
result_mock.scalars.return_value.all.return_value = [sample_store, sample_store_2]
mock_session.execute.return_value = result_mock
response = await client.get("/api/stores")
assert response.status_code == 200
data = response.json()
assert len(data) == 2
assert data[0]["name"] == "Tesco"
assert data[0]["slug"] == "tesco"
assert data[1]["name"] == "SuperValu"
assert data[1]["slug"] == "supervalu"
async def test_list_stores_schema(self, client, mock_session, sample_store):
"""Verify the response matches the StoreOut schema."""
result_mock = MagicMock()
result_mock.scalars.return_value.all.return_value = [sample_store]
mock_session.execute.return_value = result_mock
response = await client.get("/api/stores")
assert response.status_code == 200
data = response.json()
store = data[0]
assert "id" in store
assert "name" in store
assert "slug" in store
assert "base_url" in store
assert "logo_url" in store
# =========================================================================
# /api/categories
# =========================================================================
class TestListCategories:
"""Tests for ``GET /api/categories``."""
async def test_list_categories_empty(self, client, mock_session):
result_mock = MagicMock()
result_mock.scalars.return_value.all.return_value = []
mock_session.execute.return_value = result_mock
response = await client.get("/api/categories")
assert response.status_code == 200
assert response.json() == []
async def test_list_categories(self, client, mock_session, sample_category):
result_mock = MagicMock()
result_mock.scalars.return_value.all.return_value = [sample_category]
mock_session.execute.return_value = result_mock
response = await client.get("/api/categories")
assert response.status_code == 200
data = response.json()
assert len(data) == 1
assert data[0]["name"] == "Dairy"
assert data[0]["slug"] == "dairy"
# =========================================================================
# /api/products
# =========================================================================
class TestListProducts:
"""Tests for ``GET /api/products``."""
async def test_list_products_empty(self, client, mock_session):
"""Empty database returns zero items."""
# list_products calls execute twice: once for count, once for results
count_result = MagicMock()
count_result.scalar_one.return_value = 0
products_result = MagicMock()
products_result.scalars.return_value.all.return_value = []
mock_session.execute.side_effect = [count_result, products_result]
response = await client.get("/api/products")
assert response.status_code == 200
data = response.json()
assert data["items"] == []
assert data["total"] == 0
async def test_list_products(self, client, mock_session, sample_product):
"""Return a paginated list of products."""
count_result = MagicMock()
count_result.scalar_one.return_value = 1
products_result = MagicMock()
products_result.scalars.return_value.all.return_value = [sample_product]
mock_session.execute.side_effect = [count_result, products_result]
response = await client.get("/api/products")
assert response.status_code == 200
data = response.json()
assert data["total"] == 1
assert len(data["items"]) == 1
assert data["items"][0]["name"] == "Avonmore Full Cream Milk 2L"
async def test_list_products_pagination_params(self, client, mock_session):
"""Verify pagination query parameters are accepted."""
count_result = MagicMock()
count_result.scalar_one.return_value = 0
products_result = MagicMock()
products_result.scalars.return_value.all.return_value = []
mock_session.execute.side_effect = [count_result, products_result]
response = await client.get("/api/products?page=2&limit=10")
assert response.status_code == 200
async def test_list_products_search_param(self, client, mock_session):
"""Verify the search query parameter is accepted."""
count_result = MagicMock()
count_result.scalar_one.return_value = 0
products_result = MagicMock()
products_result.scalars.return_value.all.return_value = []
mock_session.execute.side_effect = [count_result, products_result]
response = await client.get("/api/products?search=milk")
assert response.status_code == 200
# =========================================================================
# /api/products/{product_id}
# =========================================================================
class TestGetProduct:
"""Tests for ``GET /api/products/{product_id}``."""
async def test_get_product_not_found(self, client, mock_session):
"""A non-existent product should return 404."""
result_mock = MagicMock()
result_mock.scalar_one_or_none.return_value = None
mock_session.execute.return_value = result_mock
response = await client.get("/api/products/99999")
assert response.status_code == 404
assert response.json()["detail"] == "Product not found"
async def test_get_product_found(self, client, mock_session, sample_product):
"""An existing product should return 200 with product data."""
result_mock = MagicMock()
result_mock.scalar_one_or_none.return_value = sample_product
mock_session.execute.return_value = result_mock
response = await client.get("/api/products/1")
assert response.status_code == 200
data = response.json()
assert data["id"] == 1
assert data["name"] == "Avonmore Full Cream Milk 2L"
assert data["brand"] == "Avonmore"
assert data["ean"] == "5391516590123"
assert data["unit"] == "l"
assert data["category"] is not None
assert data["category"]["name"] == "Dairy"
async def test_get_product_no_category(self, client, mock_session, sample_product_no_ean):
"""A product with no category should return null for category."""
result_mock = MagicMock()
result_mock.scalar_one_or_none.return_value = sample_product_no_ean
mock_session.execute.return_value = result_mock
response = await client.get("/api/products/2")
assert response.status_code == 200
data = response.json()
assert data["id"] == 2
assert data["category"] is None
assert data["ean"] is None

View file

View file

@ -0,0 +1,223 @@
"""Tests for src.matcher.matcher."""
from __future__ import annotations
from decimal import Decimal
from unittest.mock import MagicMock
import pytest
from src.core.models import Product
from src.matcher.matcher import RawProduct, ean_match, find_match, fuzzy_match
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_product(
id: int,
name: str,
ean: str | None = None,
brand: str | None = None,
unit: str | None = None,
unit_size: Decimal | None = None,
) -> Product:
"""Create a lightweight Product instance for testing without a database."""
p = Product.__new__(Product)
p.id = id
p.name = name
p.ean = ean
p.brand = brand
p.unit = unit
p.unit_size = unit_size
p.category_id = None
p.image_url = None
return p
# =========================================================================
# ean_match
# =========================================================================
class TestEanMatch:
"""Tests for ``ean_match``."""
def test_ean_match_found(self):
product = _make_product(1, "Milk 2L", ean="5391516590123")
candidates = [
_make_product(2, "Full Cream Milk 2L", ean="5391516590123"),
_make_product(3, "Skimmed Milk 1L", ean="5391516590456"),
]
result = ean_match(product, candidates)
assert result is not None
assert result.id == 2
assert result.ean == "5391516590123"
def test_ean_match_not_found(self):
product = _make_product(1, "Milk 2L", ean="0000000000000")
candidates = [
_make_product(2, "Full Cream Milk 2L", ean="5391516590123"),
_make_product(3, "Skimmed Milk 1L", ean="5391516590456"),
]
result = ean_match(product, candidates)
assert result is None
def test_ean_match_no_ean_on_product(self):
"""If the product has no EAN, ean_match returns None immediately."""
product = _make_product(1, "Milk 2L", ean=None)
candidates = [
_make_product(2, "Full Cream Milk 2L", ean="5391516590123"),
]
result = ean_match(product, candidates)
assert result is None
def test_ean_match_skips_self(self):
"""ean_match should not match a product against itself."""
product = _make_product(1, "Milk 2L", ean="5391516590123")
candidates = [product]
result = ean_match(product, candidates)
assert result is None
def test_ean_match_empty_candidates(self):
product = _make_product(1, "Milk 2L", ean="5391516590123")
result = ean_match(product, [])
assert result is None
def test_ean_match_candidate_no_ean(self):
"""Candidates without EANs should be skipped."""
product = _make_product(1, "Milk 2L", ean="5391516590123")
candidates = [
_make_product(2, "Milk 2L", ean=None),
]
result = ean_match(product, candidates)
assert result is None
# =========================================================================
# fuzzy_match
# =========================================================================
class TestFuzzyMatch:
"""Tests for ``fuzzy_match``."""
def test_fuzzy_match_above_threshold(self):
"""Very similar names should match above the default threshold."""
candidates = [
_make_product(1, "Avonmore Full Cream Milk 2L"),
]
result = fuzzy_match("Avonmore Fresh Milk Full Cream 2L", candidates)
assert result is not None
assert result.id == 1
def test_fuzzy_match_below_threshold(self):
"""Completely different names should not match."""
candidates = [
_make_product(1, "Heinz Baked Beans 415g"),
]
result = fuzzy_match("Avonmore Full Cream Milk 2L", candidates)
assert result is None
def test_fuzzy_match_picks_best(self):
"""When multiple candidates exist, the best match should be returned."""
candidates = [
_make_product(1, "Brennans White Bread 800g"),
_make_product(2, "Brennans Wholemeal Bread 800g"),
]
result = fuzzy_match("Brennans White Sliced Pan 800g", candidates)
assert result is not None
# The white bread should be a closer match than wholemeal
assert result.id == 1
def test_fuzzy_match_custom_threshold(self):
"""A very high threshold should reject moderate matches."""
candidates = [
_make_product(1, "Avonmore Milk 2L"),
]
result = fuzzy_match("Avonmore Super Milk 1L", candidates, threshold=99.0)
assert result is None
def test_fuzzy_match_empty_name(self):
candidates = [_make_product(1, "Milk 2L")]
result = fuzzy_match("", candidates)
assert result is None
def test_fuzzy_match_empty_candidates(self):
result = fuzzy_match("Avonmore Milk 2L", [])
assert result is None
def test_fuzzy_match_word_order_invariant(self):
"""token_sort_ratio should handle reordered words."""
candidates = [
_make_product(1, "Kerrygold Irish Butter 250g"),
]
result = fuzzy_match("Irish Butter Kerrygold 250g", candidates)
assert result is not None
assert result.id == 1
# =========================================================================
# find_match
# =========================================================================
class TestFindMatch:
"""Tests for ``find_match``."""
def test_find_match_prefers_ean(self):
"""When EAN matches, it should be returned even if names differ."""
raw = RawProduct(name="Completely Different Name", ean="5391516590123")
existing = [
_make_product(1, "Avonmore Milk 2L", ean="5391516590123"),
_make_product(2, "Something Else 500ml", ean="9999999999999"),
]
result = find_match(raw, existing)
assert result is not None
assert result.id == 1
def test_find_match_falls_back_to_fuzzy(self):
"""With no EAN on the raw product, find_match should use fuzzy matching."""
raw = RawProduct(name="Avonmore Full Cream Milk 2L", ean=None)
existing = [
_make_product(1, "Avonmore Fresh Full Cream Milk 2L", ean="5391516590123"),
]
result = find_match(raw, existing)
assert result is not None
assert result.id == 1
def test_find_match_no_match(self):
"""Completely unrelated products should not match."""
raw = RawProduct(name="Heinz Baked Beans 415g", ean=None)
existing = [
_make_product(1, "Avonmore Milk 2L", ean="5391516590123"),
]
result = find_match(raw, existing)
assert result is None
def test_find_match_rejects_unit_mismatch(self):
"""If names are similar but unit info differs, find_match should
reject the match to avoid merging different sizes."""
raw = RawProduct(name="Avonmore Milk 1L", ean=None)
existing = [
_make_product(1, "Avonmore Milk 2L"),
]
result = find_match(raw, existing)
assert result is None
def test_find_match_ean_no_candidates(self):
raw = RawProduct(name="Milk", ean="5391516590123")
result = find_match(raw, [])
assert result is None
def test_find_match_accepts_matching_units(self):
"""When names and units both match, the product should be returned."""
raw = RawProduct(name="Avonmore Milk 2L", ean=None)
existing = [
_make_product(1, "Avonmore Fresh Milk 2L"),
]
result = find_match(raw, existing)
assert result is not None
assert result.id == 1

View file

@ -0,0 +1,225 @@
"""Tests for src.matcher.normalizer."""
from decimal import Decimal
import pytest
from src.matcher.normalizer import extract_brand, extract_unit_info, normalize_name
# =========================================================================
# normalize_name
# =========================================================================
class TestNormalizeName:
"""Tests for ``normalize_name``."""
def test_empty_string(self):
assert normalize_name("") == ""
def test_none_like_empty(self):
"""An empty/whitespace-only input should yield an empty string."""
assert normalize_name(" ") == ""
def test_lowercases(self):
result = normalize_name("AVONMORE MILK")
assert result == result.lower()
def test_strips_extra_whitespace(self):
result = normalize_name(" Avonmore Milk 2L ")
assert " " not in result
assert not result.startswith(" ")
assert not result.endswith(" ")
def test_litre_to_l(self):
"""'1 Litre' should collapse to '1l'."""
result = normalize_name("Milk 1 Litre")
assert "1l" in result
assert "litre" not in result
def test_litres_to_l(self):
result = normalize_name("Juice 2 Litres")
assert "2l" in result
def test_ltr_to_l(self):
result = normalize_name("Water 5Ltr")
assert "5l" in result
def test_millilitres_to_ml(self):
result = normalize_name("Cream 500 Millilitres")
assert "500ml" in result
def test_grams_to_g(self):
result = normalize_name("Cheese 200 Grams")
assert "200g" in result
def test_kilograms_to_kg(self):
result = normalize_name("Potatoes 2 Kilograms")
assert "2kg" in result
def test_kilo_to_kg(self):
result = normalize_name("Rice 1 Kilo")
assert "1kg" in result
def test_number_unit_space_collapsed(self):
"""Spaces between a number and its unit should be removed."""
result = normalize_name("Milk 2 L")
assert "2l" in result
# No space between the number and unit
assert "2 l" not in result
def test_removes_noise_words(self):
result = normalize_name("The Fresh Premium Irish Milk")
assert "the" not in result.split()
assert "fresh" not in result.split()
assert "premium" not in result.split()
assert "irish" not in result.split()
def test_preserves_meaningful_words(self):
result = normalize_name("Avonmore Milk 2L")
assert "avonmore" in result
assert "milk" in result
def test_comma_decimal_normalised(self):
"""European-style comma decimal ('1,5l') should become '1.5l'."""
result = normalize_name("Juice 1,5 Litres")
assert "1.5l" in result
def test_multiple_units_in_name(self):
"""When a name has two quantity+unit patterns, both should be normalised."""
result = normalize_name("Bottle 750ml x 6 Pack")
assert "750ml" in result
# =========================================================================
# extract_brand
# =========================================================================
class TestExtractBrand:
"""Tests for ``extract_brand``."""
def test_empty_string(self):
assert extract_brand("") is None
def test_none_input(self):
assert extract_brand(None) is None
def test_known_brand_avonmore(self):
assert extract_brand("Avonmore Full Cream Milk 2L") == "Avonmore"
def test_known_brand_kerrygold(self):
assert extract_brand("Kerrygold Pure Irish Butter 250g") == "Kerrygold"
def test_known_brand_brennans(self):
assert extract_brand("Brennans Family Pan 800g") == "Brennans"
def test_known_brand_case_insensitive(self):
assert extract_brand("avonmore milk 2l") == "Avonmore"
def test_known_brand_mid_string(self):
"""Brand appearing later in the string should still be detected."""
assert extract_brand("Fresh Irish Kerrygold Butter") == "Kerrygold"
def test_known_brand_barrys(self):
assert extract_brand("Barry's Gold Blend Tea 80s") == "Barry's"
def test_known_brand_heinz(self):
assert extract_brand("Heinz Baked Beans 415g") == "Heinz"
def test_heuristic_capitalised_first_word(self):
"""When no known brand matches, the first capitalised word (if it
looks like a proper noun) should be returned."""
result = extract_brand("Glenilen Farm Clotted Cream 140g")
# "Glenilen" is the first capitalised token and not a noise word
assert result is not None
def test_no_brand_generic_name(self):
"""A fully lowercase name with no brands should return None."""
assert extract_brand("whole milk 2l") is None
def test_all_uppercase_first_word_returns_none(self):
"""A fully uppercased first token should be rejected by the heuristic
(``not candidate.isupper()`` guard)."""
# "AA" is all-uppercase and only 2 chars; the heuristic rejects it
assert extract_brand("AA batteries 4 pack") is None
# =========================================================================
# extract_unit_info
# =========================================================================
class TestExtractUnitInfo:
"""Tests for ``extract_unit_info``."""
def test_empty_string(self):
unit, size = extract_unit_info("")
assert unit is None
assert size is None
def test_none_input(self):
unit, size = extract_unit_info(None)
assert unit is None
assert size is None
def test_litres(self):
unit, size = extract_unit_info("Milk 2L")
assert unit == "l"
assert size == Decimal("2")
def test_litres_word(self):
unit, size = extract_unit_info("Juice 1.5 Litres")
assert unit == "l"
assert size == Decimal("1.5")
def test_millilitres(self):
unit, size = extract_unit_info("Cream 500ml")
assert unit == "ml"
assert size == Decimal("500")
def test_grams(self):
unit, size = extract_unit_info("Bread 800g")
assert unit == "g"
assert size == Decimal("800")
def test_kilograms(self):
unit, size = extract_unit_info("Rice 1kg")
assert unit == "kg"
assert size == Decimal("1")
def test_grams_word(self):
unit, size = extract_unit_info("Cheese 200 Grams")
assert unit == "g"
assert size == Decimal("200")
def test_centilitres(self):
unit, size = extract_unit_info("Wine 75cl")
assert unit == "cl"
assert size == Decimal("75")
def test_comma_decimal(self):
unit, size = extract_unit_info("Juice 1,5L")
assert unit == "l"
assert size == Decimal("1.5")
def test_no_unit(self):
unit, size = extract_unit_info("Bananas Loose")
assert unit is None
assert size is None
def test_decimal_size(self):
unit, size = extract_unit_info("Oil 0.5L")
assert unit == "l"
assert size == Decimal("0.5")
def test_tablets(self):
unit, size = extract_unit_info("Paracetamol 24 Tablets")
assert unit == "tab"
assert size == Decimal("24")
def test_capsules(self):
unit, size = extract_unit_info("Vitamin D 30 Capsules")
assert unit == "cap"
assert size == Decimal("30")

View file

View file

@ -0,0 +1,200 @@
"""Tests for src.scrapers.base data structures and utilities."""
from __future__ import annotations
from datetime import datetime, timedelta
from decimal import Decimal
import pytest
from src.scrapers.base import (
DEFAULT_HEADERS,
USER_AGENTS,
RawProduct,
ScrapeResult,
random_user_agent,
)
# =========================================================================
# RawProduct
# =========================================================================
class TestRawProduct:
"""Tests for the ``RawProduct`` dataclass."""
def test_raw_product_creation_minimal(self):
"""Create a RawProduct with only the required fields."""
rp = RawProduct(
store_sku="SKU-001",
name="Avonmore Milk 2L",
price=Decimal("2.49"),
)
assert rp.store_sku == "SKU-001"
assert rp.name == "Avonmore Milk 2L"
assert rp.price == Decimal("2.49")
# Defaults
assert rp.promo_price is None
assert rp.promo_label is None
assert rp.unit_price is None
assert rp.unit is None
assert rp.unit_size is None
assert rp.brand is None
assert rp.ean is None
assert rp.category is None
assert rp.image_url is None
assert rp.product_url is None
assert rp.in_stock is True
def test_raw_product_creation_full(self):
"""Create a RawProduct with all fields specified."""
rp = RawProduct(
store_sku="SKU-002",
name="Kerrygold Butter 250g",
price=Decimal("3.99"),
promo_price=Decimal("2.99"),
promo_label="Save 1 Euro",
unit_price=Decimal("11.96"),
unit="g",
unit_size=Decimal("250"),
brand="Kerrygold",
ean="5011038123456",
category="Dairy",
image_url="https://example.com/butter.jpg",
product_url="https://store.com/butter",
in_stock=False,
)
assert rp.store_sku == "SKU-002"
assert rp.name == "Kerrygold Butter 250g"
assert rp.price == Decimal("3.99")
assert rp.promo_price == Decimal("2.99")
assert rp.promo_label == "Save 1 Euro"
assert rp.unit_price == Decimal("11.96")
assert rp.unit == "g"
assert rp.unit_size == Decimal("250")
assert rp.brand == "Kerrygold"
assert rp.ean == "5011038123456"
assert rp.category == "Dairy"
assert rp.image_url == "https://example.com/butter.jpg"
assert rp.product_url == "https://store.com/butter"
assert rp.in_stock is False
def test_raw_product_default_in_stock_is_true(self):
rp = RawProduct(store_sku="X", name="Y", price=Decimal("1"))
assert rp.in_stock is True
# =========================================================================
# ScrapeResult
# =========================================================================
class TestScrapeResult:
"""Tests for the ``ScrapeResult`` dataclass and its properties."""
def test_status_success(self):
"""Products present and no errors -> 'success'."""
result = ScrapeResult(
store_slug="tesco",
products=[RawProduct(store_sku="A", name="A", price=Decimal("1"))],
errors=[],
)
assert result.status == "success"
def test_status_failed(self):
"""No products and at least one error -> 'failed'."""
result = ScrapeResult(
store_slug="tesco",
products=[],
errors=["Connection timeout"],
)
assert result.status == "failed"
def test_status_partial(self):
"""Some products and some errors -> 'partial'."""
result = ScrapeResult(
store_slug="tesco",
products=[RawProduct(store_sku="A", name="A", price=Decimal("1"))],
errors=["One category failed"],
)
assert result.status == "partial"
def test_status_success_no_products_no_errors(self):
"""No products and no errors -> 'success' (degenerate but valid)."""
result = ScrapeResult(store_slug="tesco", products=[], errors=[])
assert result.status == "success"
def test_duration_seconds(self):
start = datetime(2025, 6, 1, 10, 0, 0)
end = datetime(2025, 6, 1, 10, 5, 30)
result = ScrapeResult(
store_slug="tesco",
started_at=start,
finished_at=end,
)
assert result.duration_seconds == 330.0
def test_duration_zero(self):
now = datetime(2025, 6, 1, 10, 0, 0)
result = ScrapeResult(
store_slug="tesco",
started_at=now,
finished_at=now,
)
assert result.duration_seconds == 0.0
def test_default_factory_products(self):
"""products and errors should default to empty lists."""
result = ScrapeResult(store_slug="supervalu")
assert result.products == []
assert result.errors == []
def test_store_slug_stored(self):
result = ScrapeResult(store_slug="dunnes")
assert result.store_slug == "dunnes"
# =========================================================================
# random_user_agent
# =========================================================================
class TestRandomUserAgent:
"""Tests for ``random_user_agent``."""
def test_returns_string(self):
ua = random_user_agent()
assert isinstance(ua, str)
def test_returns_non_empty(self):
ua = random_user_agent()
assert len(ua) > 0
def test_returns_from_user_agents_list(self):
ua = random_user_agent()
assert ua in USER_AGENTS
def test_returns_vary(self):
"""Over many calls we should see more than one unique value
(with very high probability given 5 agents)."""
results = {random_user_agent() for _ in range(50)}
assert len(results) > 1
# =========================================================================
# Module-level constants
# =========================================================================
class TestConstants:
"""Sanity checks on module-level constants."""
def test_user_agents_not_empty(self):
assert len(USER_AGENTS) > 0
def test_default_headers_has_accept(self):
assert "Accept" in DEFAULT_HEADERS
def test_default_headers_has_accept_language(self):
assert "Accept-Language" in DEFAULT_HEADERS