Add test suite for scrapers, matcher, and API
Tests for RawProduct/ScrapeResult dataclasses, product name normalizer, cross-store matcher (EAN, fuzzy, unit validation), and FastAPI endpoints with mocked database sessions.
This commit is contained in:
parent
8feea63abe
commit
f9c4389f5a
9 changed files with 1063 additions and 0 deletions
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
174
tests/conftest.py
Normal file
174
tests/conftest.py
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
"""Shared fixtures for SmartCart tests."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
from src.api.main import app
|
||||
from src.core.database import get_session
|
||||
from src.core.models import Category, PriceRecord, Product, ScrapeRun, Store, StoreProduct
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mock async session
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_session():
|
||||
"""Return an ``AsyncMock`` that behaves like an ``AsyncSession``.
|
||||
|
||||
Individual tests can configure ``session.execute.return_value`` to
|
||||
control query results.
|
||||
"""
|
||||
session = AsyncMock()
|
||||
# By default .execute() returns a result whose .scalars().all() is empty
|
||||
result_mock = MagicMock()
|
||||
result_mock.scalars.return_value.all.return_value = []
|
||||
result_mock.scalar_one_or_none.return_value = None
|
||||
result_mock.scalar_one.return_value = 0
|
||||
session.execute.return_value = result_mock
|
||||
session.get.return_value = None
|
||||
return session
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# FastAPI test client
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
async def client(mock_session):
|
||||
"""Provide an ``httpx.AsyncClient`` wired to the FastAPI app with the
|
||||
database session dependency overridden by ``mock_session``."""
|
||||
|
||||
async def _override_get_session():
|
||||
yield mock_session
|
||||
|
||||
app.dependency_overrides[get_session] = _override_get_session
|
||||
|
||||
transport = ASGITransport(app=app)
|
||||
async with AsyncClient(transport=transport, base_url="http://test") as ac:
|
||||
yield ac
|
||||
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sample domain objects
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def sample_store() -> Store:
|
||||
store = Store(
|
||||
id=1,
|
||||
name="Tesco",
|
||||
slug="tesco",
|
||||
base_url="https://www.tesco.ie",
|
||||
logo_url="https://www.tesco.ie/logo.png",
|
||||
)
|
||||
return store
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def sample_store_2() -> Store:
|
||||
store = Store(
|
||||
id=2,
|
||||
name="SuperValu",
|
||||
slug="supervalu",
|
||||
base_url="https://www.supervalu.ie",
|
||||
logo_url=None,
|
||||
)
|
||||
return store
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def sample_category() -> Category:
|
||||
return Category(
|
||||
id=1,
|
||||
name="Dairy",
|
||||
slug="dairy",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def sample_product(sample_category) -> Product:
|
||||
product = Product(
|
||||
id=1,
|
||||
name="Avonmore Full Cream Milk 2L",
|
||||
brand="Avonmore",
|
||||
ean="5391516590123",
|
||||
category_id=sample_category.id,
|
||||
unit="l",
|
||||
unit_size=Decimal("2"),
|
||||
image_url="https://example.com/milk.jpg",
|
||||
created_at=datetime(2025, 1, 1),
|
||||
)
|
||||
product.category = sample_category
|
||||
return product
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def sample_product_no_ean() -> Product:
|
||||
product = Product(
|
||||
id=2,
|
||||
name="Kerrygold Butter 250g",
|
||||
brand="Kerrygold",
|
||||
ean=None,
|
||||
category_id=None,
|
||||
unit="g",
|
||||
unit_size=Decimal("250"),
|
||||
image_url=None,
|
||||
created_at=datetime(2025, 1, 2),
|
||||
)
|
||||
product.category = None
|
||||
return product
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def sample_store_product(sample_product, sample_store) -> StoreProduct:
|
||||
sp = StoreProduct(
|
||||
id=1,
|
||||
product_id=sample_product.id,
|
||||
store_id=sample_store.id,
|
||||
store_sku="TESCO-12345",
|
||||
store_name="Avonmore Fresh Milk 2 Litre",
|
||||
store_url="https://www.tesco.ie/product/12345",
|
||||
is_active=True,
|
||||
)
|
||||
sp.product = sample_product
|
||||
sp.store = sample_store
|
||||
return sp
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def sample_price_record(sample_store_product) -> PriceRecord:
|
||||
return PriceRecord(
|
||||
id=1,
|
||||
store_product_id=sample_store_product.id,
|
||||
price=Decimal("2.49"),
|
||||
promo_price=Decimal("1.99"),
|
||||
promo_label="Save 50c",
|
||||
unit_price=Decimal("0.9950"),
|
||||
in_stock=True,
|
||||
scraped_at=datetime(2025, 6, 1, 10, 0, 0),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def sample_scrape_run(sample_store) -> ScrapeRun:
|
||||
return ScrapeRun(
|
||||
id=1,
|
||||
store_id=sample_store.id,
|
||||
started_at=datetime(2025, 6, 1, 22, 0, 0),
|
||||
finished_at=datetime(2025, 6, 1, 22, 15, 0),
|
||||
status="done",
|
||||
products_scraped=150,
|
||||
errors=None,
|
||||
)
|
||||
0
tests/test_api/__init__.py
Normal file
0
tests/test_api/__init__.py
Normal file
241
tests/test_api/test_endpoints.py
Normal file
241
tests/test_api/test_endpoints.py
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
"""Tests for the SmartCart FastAPI endpoints."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from decimal import Decimal
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
from src.api.main import app
|
||||
from src.api.schemas import CategoryOut, ProductOut, StoreOut
|
||||
from src.core.database import get_session
|
||||
from src.core.models import Category, Product, Store
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# /health
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestHealthCheck:
|
||||
"""Tests for ``GET /health``."""
|
||||
|
||||
async def test_health_check(self, client):
|
||||
response = await client.get("/health")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data == {"status": "ok"}
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# /
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestRoot:
|
||||
"""Tests for ``GET /`` redirect."""
|
||||
|
||||
async def test_root_redirects_to_docs(self, client):
|
||||
response = await client.get("/", follow_redirects=False)
|
||||
assert response.status_code == 307
|
||||
assert response.headers["location"] == "/docs"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# /api/stores
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestListStores:
|
||||
"""Tests for ``GET /api/stores``."""
|
||||
|
||||
async def test_list_stores_empty(self, client, mock_session):
|
||||
"""When the database has no stores, return an empty list."""
|
||||
result_mock = MagicMock()
|
||||
result_mock.scalars.return_value.all.return_value = []
|
||||
mock_session.execute.return_value = result_mock
|
||||
|
||||
response = await client.get("/api/stores")
|
||||
assert response.status_code == 200
|
||||
assert response.json() == []
|
||||
|
||||
async def test_list_stores(self, client, mock_session, sample_store, sample_store_2):
|
||||
"""Return a list of stores from the database."""
|
||||
result_mock = MagicMock()
|
||||
result_mock.scalars.return_value.all.return_value = [sample_store, sample_store_2]
|
||||
mock_session.execute.return_value = result_mock
|
||||
|
||||
response = await client.get("/api/stores")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert len(data) == 2
|
||||
assert data[0]["name"] == "Tesco"
|
||||
assert data[0]["slug"] == "tesco"
|
||||
assert data[1]["name"] == "SuperValu"
|
||||
assert data[1]["slug"] == "supervalu"
|
||||
|
||||
async def test_list_stores_schema(self, client, mock_session, sample_store):
|
||||
"""Verify the response matches the StoreOut schema."""
|
||||
result_mock = MagicMock()
|
||||
result_mock.scalars.return_value.all.return_value = [sample_store]
|
||||
mock_session.execute.return_value = result_mock
|
||||
|
||||
response = await client.get("/api/stores")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
store = data[0]
|
||||
assert "id" in store
|
||||
assert "name" in store
|
||||
assert "slug" in store
|
||||
assert "base_url" in store
|
||||
assert "logo_url" in store
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# /api/categories
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestListCategories:
|
||||
"""Tests for ``GET /api/categories``."""
|
||||
|
||||
async def test_list_categories_empty(self, client, mock_session):
|
||||
result_mock = MagicMock()
|
||||
result_mock.scalars.return_value.all.return_value = []
|
||||
mock_session.execute.return_value = result_mock
|
||||
|
||||
response = await client.get("/api/categories")
|
||||
assert response.status_code == 200
|
||||
assert response.json() == []
|
||||
|
||||
async def test_list_categories(self, client, mock_session, sample_category):
|
||||
result_mock = MagicMock()
|
||||
result_mock.scalars.return_value.all.return_value = [sample_category]
|
||||
mock_session.execute.return_value = result_mock
|
||||
|
||||
response = await client.get("/api/categories")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert len(data) == 1
|
||||
assert data[0]["name"] == "Dairy"
|
||||
assert data[0]["slug"] == "dairy"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# /api/products
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestListProducts:
|
||||
"""Tests for ``GET /api/products``."""
|
||||
|
||||
async def test_list_products_empty(self, client, mock_session):
|
||||
"""Empty database returns zero items."""
|
||||
# list_products calls execute twice: once for count, once for results
|
||||
count_result = MagicMock()
|
||||
count_result.scalar_one.return_value = 0
|
||||
|
||||
products_result = MagicMock()
|
||||
products_result.scalars.return_value.all.return_value = []
|
||||
|
||||
mock_session.execute.side_effect = [count_result, products_result]
|
||||
|
||||
response = await client.get("/api/products")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["items"] == []
|
||||
assert data["total"] == 0
|
||||
|
||||
async def test_list_products(self, client, mock_session, sample_product):
|
||||
"""Return a paginated list of products."""
|
||||
count_result = MagicMock()
|
||||
count_result.scalar_one.return_value = 1
|
||||
|
||||
products_result = MagicMock()
|
||||
products_result.scalars.return_value.all.return_value = [sample_product]
|
||||
|
||||
mock_session.execute.side_effect = [count_result, products_result]
|
||||
|
||||
response = await client.get("/api/products")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["total"] == 1
|
||||
assert len(data["items"]) == 1
|
||||
assert data["items"][0]["name"] == "Avonmore Full Cream Milk 2L"
|
||||
|
||||
async def test_list_products_pagination_params(self, client, mock_session):
|
||||
"""Verify pagination query parameters are accepted."""
|
||||
count_result = MagicMock()
|
||||
count_result.scalar_one.return_value = 0
|
||||
|
||||
products_result = MagicMock()
|
||||
products_result.scalars.return_value.all.return_value = []
|
||||
|
||||
mock_session.execute.side_effect = [count_result, products_result]
|
||||
|
||||
response = await client.get("/api/products?page=2&limit=10")
|
||||
assert response.status_code == 200
|
||||
|
||||
async def test_list_products_search_param(self, client, mock_session):
|
||||
"""Verify the search query parameter is accepted."""
|
||||
count_result = MagicMock()
|
||||
count_result.scalar_one.return_value = 0
|
||||
|
||||
products_result = MagicMock()
|
||||
products_result.scalars.return_value.all.return_value = []
|
||||
|
||||
mock_session.execute.side_effect = [count_result, products_result]
|
||||
|
||||
response = await client.get("/api/products?search=milk")
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# /api/products/{product_id}
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestGetProduct:
|
||||
"""Tests for ``GET /api/products/{product_id}``."""
|
||||
|
||||
async def test_get_product_not_found(self, client, mock_session):
|
||||
"""A non-existent product should return 404."""
|
||||
result_mock = MagicMock()
|
||||
result_mock.scalar_one_or_none.return_value = None
|
||||
mock_session.execute.return_value = result_mock
|
||||
|
||||
response = await client.get("/api/products/99999")
|
||||
assert response.status_code == 404
|
||||
assert response.json()["detail"] == "Product not found"
|
||||
|
||||
async def test_get_product_found(self, client, mock_session, sample_product):
|
||||
"""An existing product should return 200 with product data."""
|
||||
result_mock = MagicMock()
|
||||
result_mock.scalar_one_or_none.return_value = sample_product
|
||||
mock_session.execute.return_value = result_mock
|
||||
|
||||
response = await client.get("/api/products/1")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["id"] == 1
|
||||
assert data["name"] == "Avonmore Full Cream Milk 2L"
|
||||
assert data["brand"] == "Avonmore"
|
||||
assert data["ean"] == "5391516590123"
|
||||
assert data["unit"] == "l"
|
||||
assert data["category"] is not None
|
||||
assert data["category"]["name"] == "Dairy"
|
||||
|
||||
async def test_get_product_no_category(self, client, mock_session, sample_product_no_ean):
|
||||
"""A product with no category should return null for category."""
|
||||
result_mock = MagicMock()
|
||||
result_mock.scalar_one_or_none.return_value = sample_product_no_ean
|
||||
mock_session.execute.return_value = result_mock
|
||||
|
||||
response = await client.get("/api/products/2")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["id"] == 2
|
||||
assert data["category"] is None
|
||||
assert data["ean"] is None
|
||||
0
tests/test_matcher/__init__.py
Normal file
0
tests/test_matcher/__init__.py
Normal file
223
tests/test_matcher/test_matcher.py
Normal file
223
tests/test_matcher/test_matcher.py
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
"""Tests for src.matcher.matcher."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from decimal import Decimal
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from src.core.models import Product
|
||||
from src.matcher.matcher import RawProduct, ean_match, find_match, fuzzy_match
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_product(
|
||||
id: int,
|
||||
name: str,
|
||||
ean: str | None = None,
|
||||
brand: str | None = None,
|
||||
unit: str | None = None,
|
||||
unit_size: Decimal | None = None,
|
||||
) -> Product:
|
||||
"""Create a lightweight Product instance for testing without a database."""
|
||||
p = Product.__new__(Product)
|
||||
p.id = id
|
||||
p.name = name
|
||||
p.ean = ean
|
||||
p.brand = brand
|
||||
p.unit = unit
|
||||
p.unit_size = unit_size
|
||||
p.category_id = None
|
||||
p.image_url = None
|
||||
return p
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# ean_match
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestEanMatch:
|
||||
"""Tests for ``ean_match``."""
|
||||
|
||||
def test_ean_match_found(self):
|
||||
product = _make_product(1, "Milk 2L", ean="5391516590123")
|
||||
candidates = [
|
||||
_make_product(2, "Full Cream Milk 2L", ean="5391516590123"),
|
||||
_make_product(3, "Skimmed Milk 1L", ean="5391516590456"),
|
||||
]
|
||||
result = ean_match(product, candidates)
|
||||
assert result is not None
|
||||
assert result.id == 2
|
||||
assert result.ean == "5391516590123"
|
||||
|
||||
def test_ean_match_not_found(self):
|
||||
product = _make_product(1, "Milk 2L", ean="0000000000000")
|
||||
candidates = [
|
||||
_make_product(2, "Full Cream Milk 2L", ean="5391516590123"),
|
||||
_make_product(3, "Skimmed Milk 1L", ean="5391516590456"),
|
||||
]
|
||||
result = ean_match(product, candidates)
|
||||
assert result is None
|
||||
|
||||
def test_ean_match_no_ean_on_product(self):
|
||||
"""If the product has no EAN, ean_match returns None immediately."""
|
||||
product = _make_product(1, "Milk 2L", ean=None)
|
||||
candidates = [
|
||||
_make_product(2, "Full Cream Milk 2L", ean="5391516590123"),
|
||||
]
|
||||
result = ean_match(product, candidates)
|
||||
assert result is None
|
||||
|
||||
def test_ean_match_skips_self(self):
|
||||
"""ean_match should not match a product against itself."""
|
||||
product = _make_product(1, "Milk 2L", ean="5391516590123")
|
||||
candidates = [product]
|
||||
result = ean_match(product, candidates)
|
||||
assert result is None
|
||||
|
||||
def test_ean_match_empty_candidates(self):
|
||||
product = _make_product(1, "Milk 2L", ean="5391516590123")
|
||||
result = ean_match(product, [])
|
||||
assert result is None
|
||||
|
||||
def test_ean_match_candidate_no_ean(self):
|
||||
"""Candidates without EANs should be skipped."""
|
||||
product = _make_product(1, "Milk 2L", ean="5391516590123")
|
||||
candidates = [
|
||||
_make_product(2, "Milk 2L", ean=None),
|
||||
]
|
||||
result = ean_match(product, candidates)
|
||||
assert result is None
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# fuzzy_match
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestFuzzyMatch:
|
||||
"""Tests for ``fuzzy_match``."""
|
||||
|
||||
def test_fuzzy_match_above_threshold(self):
|
||||
"""Very similar names should match above the default threshold."""
|
||||
candidates = [
|
||||
_make_product(1, "Avonmore Full Cream Milk 2L"),
|
||||
]
|
||||
result = fuzzy_match("Avonmore Fresh Milk Full Cream 2L", candidates)
|
||||
assert result is not None
|
||||
assert result.id == 1
|
||||
|
||||
def test_fuzzy_match_below_threshold(self):
|
||||
"""Completely different names should not match."""
|
||||
candidates = [
|
||||
_make_product(1, "Heinz Baked Beans 415g"),
|
||||
]
|
||||
result = fuzzy_match("Avonmore Full Cream Milk 2L", candidates)
|
||||
assert result is None
|
||||
|
||||
def test_fuzzy_match_picks_best(self):
|
||||
"""When multiple candidates exist, the best match should be returned."""
|
||||
candidates = [
|
||||
_make_product(1, "Brennans White Bread 800g"),
|
||||
_make_product(2, "Brennans Wholemeal Bread 800g"),
|
||||
]
|
||||
result = fuzzy_match("Brennans White Sliced Pan 800g", candidates)
|
||||
assert result is not None
|
||||
# The white bread should be a closer match than wholemeal
|
||||
assert result.id == 1
|
||||
|
||||
def test_fuzzy_match_custom_threshold(self):
|
||||
"""A very high threshold should reject moderate matches."""
|
||||
candidates = [
|
||||
_make_product(1, "Avonmore Milk 2L"),
|
||||
]
|
||||
result = fuzzy_match("Avonmore Super Milk 1L", candidates, threshold=99.0)
|
||||
assert result is None
|
||||
|
||||
def test_fuzzy_match_empty_name(self):
|
||||
candidates = [_make_product(1, "Milk 2L")]
|
||||
result = fuzzy_match("", candidates)
|
||||
assert result is None
|
||||
|
||||
def test_fuzzy_match_empty_candidates(self):
|
||||
result = fuzzy_match("Avonmore Milk 2L", [])
|
||||
assert result is None
|
||||
|
||||
def test_fuzzy_match_word_order_invariant(self):
|
||||
"""token_sort_ratio should handle reordered words."""
|
||||
candidates = [
|
||||
_make_product(1, "Kerrygold Irish Butter 250g"),
|
||||
]
|
||||
result = fuzzy_match("Irish Butter Kerrygold 250g", candidates)
|
||||
assert result is not None
|
||||
assert result.id == 1
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# find_match
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestFindMatch:
|
||||
"""Tests for ``find_match``."""
|
||||
|
||||
def test_find_match_prefers_ean(self):
|
||||
"""When EAN matches, it should be returned even if names differ."""
|
||||
raw = RawProduct(name="Completely Different Name", ean="5391516590123")
|
||||
existing = [
|
||||
_make_product(1, "Avonmore Milk 2L", ean="5391516590123"),
|
||||
_make_product(2, "Something Else 500ml", ean="9999999999999"),
|
||||
]
|
||||
result = find_match(raw, existing)
|
||||
assert result is not None
|
||||
assert result.id == 1
|
||||
|
||||
def test_find_match_falls_back_to_fuzzy(self):
|
||||
"""With no EAN on the raw product, find_match should use fuzzy matching."""
|
||||
raw = RawProduct(name="Avonmore Full Cream Milk 2L", ean=None)
|
||||
existing = [
|
||||
_make_product(1, "Avonmore Fresh Full Cream Milk 2L", ean="5391516590123"),
|
||||
]
|
||||
result = find_match(raw, existing)
|
||||
assert result is not None
|
||||
assert result.id == 1
|
||||
|
||||
def test_find_match_no_match(self):
|
||||
"""Completely unrelated products should not match."""
|
||||
raw = RawProduct(name="Heinz Baked Beans 415g", ean=None)
|
||||
existing = [
|
||||
_make_product(1, "Avonmore Milk 2L", ean="5391516590123"),
|
||||
]
|
||||
result = find_match(raw, existing)
|
||||
assert result is None
|
||||
|
||||
def test_find_match_rejects_unit_mismatch(self):
|
||||
"""If names are similar but unit info differs, find_match should
|
||||
reject the match to avoid merging different sizes."""
|
||||
raw = RawProduct(name="Avonmore Milk 1L", ean=None)
|
||||
existing = [
|
||||
_make_product(1, "Avonmore Milk 2L"),
|
||||
]
|
||||
result = find_match(raw, existing)
|
||||
assert result is None
|
||||
|
||||
def test_find_match_ean_no_candidates(self):
|
||||
raw = RawProduct(name="Milk", ean="5391516590123")
|
||||
result = find_match(raw, [])
|
||||
assert result is None
|
||||
|
||||
def test_find_match_accepts_matching_units(self):
|
||||
"""When names and units both match, the product should be returned."""
|
||||
raw = RawProduct(name="Avonmore Milk 2L", ean=None)
|
||||
existing = [
|
||||
_make_product(1, "Avonmore Fresh Milk 2L"),
|
||||
]
|
||||
result = find_match(raw, existing)
|
||||
assert result is not None
|
||||
assert result.id == 1
|
||||
225
tests/test_matcher/test_normalizer.py
Normal file
225
tests/test_matcher/test_normalizer.py
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
"""Tests for src.matcher.normalizer."""
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
import pytest
|
||||
|
||||
from src.matcher.normalizer import extract_brand, extract_unit_info, normalize_name
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# normalize_name
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestNormalizeName:
|
||||
"""Tests for ``normalize_name``."""
|
||||
|
||||
def test_empty_string(self):
|
||||
assert normalize_name("") == ""
|
||||
|
||||
def test_none_like_empty(self):
|
||||
"""An empty/whitespace-only input should yield an empty string."""
|
||||
assert normalize_name(" ") == ""
|
||||
|
||||
def test_lowercases(self):
|
||||
result = normalize_name("AVONMORE MILK")
|
||||
assert result == result.lower()
|
||||
|
||||
def test_strips_extra_whitespace(self):
|
||||
result = normalize_name(" Avonmore Milk 2L ")
|
||||
assert " " not in result
|
||||
assert not result.startswith(" ")
|
||||
assert not result.endswith(" ")
|
||||
|
||||
def test_litre_to_l(self):
|
||||
"""'1 Litre' should collapse to '1l'."""
|
||||
result = normalize_name("Milk 1 Litre")
|
||||
assert "1l" in result
|
||||
assert "litre" not in result
|
||||
|
||||
def test_litres_to_l(self):
|
||||
result = normalize_name("Juice 2 Litres")
|
||||
assert "2l" in result
|
||||
|
||||
def test_ltr_to_l(self):
|
||||
result = normalize_name("Water 5Ltr")
|
||||
assert "5l" in result
|
||||
|
||||
def test_millilitres_to_ml(self):
|
||||
result = normalize_name("Cream 500 Millilitres")
|
||||
assert "500ml" in result
|
||||
|
||||
def test_grams_to_g(self):
|
||||
result = normalize_name("Cheese 200 Grams")
|
||||
assert "200g" in result
|
||||
|
||||
def test_kilograms_to_kg(self):
|
||||
result = normalize_name("Potatoes 2 Kilograms")
|
||||
assert "2kg" in result
|
||||
|
||||
def test_kilo_to_kg(self):
|
||||
result = normalize_name("Rice 1 Kilo")
|
||||
assert "1kg" in result
|
||||
|
||||
def test_number_unit_space_collapsed(self):
|
||||
"""Spaces between a number and its unit should be removed."""
|
||||
result = normalize_name("Milk 2 L")
|
||||
assert "2l" in result
|
||||
# No space between the number and unit
|
||||
assert "2 l" not in result
|
||||
|
||||
def test_removes_noise_words(self):
|
||||
result = normalize_name("The Fresh Premium Irish Milk")
|
||||
assert "the" not in result.split()
|
||||
assert "fresh" not in result.split()
|
||||
assert "premium" not in result.split()
|
||||
assert "irish" not in result.split()
|
||||
|
||||
def test_preserves_meaningful_words(self):
|
||||
result = normalize_name("Avonmore Milk 2L")
|
||||
assert "avonmore" in result
|
||||
assert "milk" in result
|
||||
|
||||
def test_comma_decimal_normalised(self):
|
||||
"""European-style comma decimal ('1,5l') should become '1.5l'."""
|
||||
result = normalize_name("Juice 1,5 Litres")
|
||||
assert "1.5l" in result
|
||||
|
||||
def test_multiple_units_in_name(self):
|
||||
"""When a name has two quantity+unit patterns, both should be normalised."""
|
||||
result = normalize_name("Bottle 750ml x 6 Pack")
|
||||
assert "750ml" in result
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# extract_brand
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestExtractBrand:
|
||||
"""Tests for ``extract_brand``."""
|
||||
|
||||
def test_empty_string(self):
|
||||
assert extract_brand("") is None
|
||||
|
||||
def test_none_input(self):
|
||||
assert extract_brand(None) is None
|
||||
|
||||
def test_known_brand_avonmore(self):
|
||||
assert extract_brand("Avonmore Full Cream Milk 2L") == "Avonmore"
|
||||
|
||||
def test_known_brand_kerrygold(self):
|
||||
assert extract_brand("Kerrygold Pure Irish Butter 250g") == "Kerrygold"
|
||||
|
||||
def test_known_brand_brennans(self):
|
||||
assert extract_brand("Brennans Family Pan 800g") == "Brennans"
|
||||
|
||||
def test_known_brand_case_insensitive(self):
|
||||
assert extract_brand("avonmore milk 2l") == "Avonmore"
|
||||
|
||||
def test_known_brand_mid_string(self):
|
||||
"""Brand appearing later in the string should still be detected."""
|
||||
assert extract_brand("Fresh Irish Kerrygold Butter") == "Kerrygold"
|
||||
|
||||
def test_known_brand_barrys(self):
|
||||
assert extract_brand("Barry's Gold Blend Tea 80s") == "Barry's"
|
||||
|
||||
def test_known_brand_heinz(self):
|
||||
assert extract_brand("Heinz Baked Beans 415g") == "Heinz"
|
||||
|
||||
def test_heuristic_capitalised_first_word(self):
|
||||
"""When no known brand matches, the first capitalised word (if it
|
||||
looks like a proper noun) should be returned."""
|
||||
result = extract_brand("Glenilen Farm Clotted Cream 140g")
|
||||
# "Glenilen" is the first capitalised token and not a noise word
|
||||
assert result is not None
|
||||
|
||||
def test_no_brand_generic_name(self):
|
||||
"""A fully lowercase name with no brands should return None."""
|
||||
assert extract_brand("whole milk 2l") is None
|
||||
|
||||
def test_all_uppercase_first_word_returns_none(self):
|
||||
"""A fully uppercased first token should be rejected by the heuristic
|
||||
(``not candidate.isupper()`` guard)."""
|
||||
# "AA" is all-uppercase and only 2 chars; the heuristic rejects it
|
||||
assert extract_brand("AA batteries 4 pack") is None
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# extract_unit_info
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestExtractUnitInfo:
|
||||
"""Tests for ``extract_unit_info``."""
|
||||
|
||||
def test_empty_string(self):
|
||||
unit, size = extract_unit_info("")
|
||||
assert unit is None
|
||||
assert size is None
|
||||
|
||||
def test_none_input(self):
|
||||
unit, size = extract_unit_info(None)
|
||||
assert unit is None
|
||||
assert size is None
|
||||
|
||||
def test_litres(self):
|
||||
unit, size = extract_unit_info("Milk 2L")
|
||||
assert unit == "l"
|
||||
assert size == Decimal("2")
|
||||
|
||||
def test_litres_word(self):
|
||||
unit, size = extract_unit_info("Juice 1.5 Litres")
|
||||
assert unit == "l"
|
||||
assert size == Decimal("1.5")
|
||||
|
||||
def test_millilitres(self):
|
||||
unit, size = extract_unit_info("Cream 500ml")
|
||||
assert unit == "ml"
|
||||
assert size == Decimal("500")
|
||||
|
||||
def test_grams(self):
|
||||
unit, size = extract_unit_info("Bread 800g")
|
||||
assert unit == "g"
|
||||
assert size == Decimal("800")
|
||||
|
||||
def test_kilograms(self):
|
||||
unit, size = extract_unit_info("Rice 1kg")
|
||||
assert unit == "kg"
|
||||
assert size == Decimal("1")
|
||||
|
||||
def test_grams_word(self):
|
||||
unit, size = extract_unit_info("Cheese 200 Grams")
|
||||
assert unit == "g"
|
||||
assert size == Decimal("200")
|
||||
|
||||
def test_centilitres(self):
|
||||
unit, size = extract_unit_info("Wine 75cl")
|
||||
assert unit == "cl"
|
||||
assert size == Decimal("75")
|
||||
|
||||
def test_comma_decimal(self):
|
||||
unit, size = extract_unit_info("Juice 1,5L")
|
||||
assert unit == "l"
|
||||
assert size == Decimal("1.5")
|
||||
|
||||
def test_no_unit(self):
|
||||
unit, size = extract_unit_info("Bananas Loose")
|
||||
assert unit is None
|
||||
assert size is None
|
||||
|
||||
def test_decimal_size(self):
|
||||
unit, size = extract_unit_info("Oil 0.5L")
|
||||
assert unit == "l"
|
||||
assert size == Decimal("0.5")
|
||||
|
||||
def test_tablets(self):
|
||||
unit, size = extract_unit_info("Paracetamol 24 Tablets")
|
||||
assert unit == "tab"
|
||||
assert size == Decimal("24")
|
||||
|
||||
def test_capsules(self):
|
||||
unit, size = extract_unit_info("Vitamin D 30 Capsules")
|
||||
assert unit == "cap"
|
||||
assert size == Decimal("30")
|
||||
0
tests/test_scrapers/__init__.py
Normal file
0
tests/test_scrapers/__init__.py
Normal file
200
tests/test_scrapers/test_base.py
Normal file
200
tests/test_scrapers/test_base.py
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
"""Tests for src.scrapers.base data structures and utilities."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from decimal import Decimal
|
||||
|
||||
import pytest
|
||||
|
||||
from src.scrapers.base import (
|
||||
DEFAULT_HEADERS,
|
||||
USER_AGENTS,
|
||||
RawProduct,
|
||||
ScrapeResult,
|
||||
random_user_agent,
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# RawProduct
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestRawProduct:
|
||||
"""Tests for the ``RawProduct`` dataclass."""
|
||||
|
||||
def test_raw_product_creation_minimal(self):
|
||||
"""Create a RawProduct with only the required fields."""
|
||||
rp = RawProduct(
|
||||
store_sku="SKU-001",
|
||||
name="Avonmore Milk 2L",
|
||||
price=Decimal("2.49"),
|
||||
)
|
||||
assert rp.store_sku == "SKU-001"
|
||||
assert rp.name == "Avonmore Milk 2L"
|
||||
assert rp.price == Decimal("2.49")
|
||||
# Defaults
|
||||
assert rp.promo_price is None
|
||||
assert rp.promo_label is None
|
||||
assert rp.unit_price is None
|
||||
assert rp.unit is None
|
||||
assert rp.unit_size is None
|
||||
assert rp.brand is None
|
||||
assert rp.ean is None
|
||||
assert rp.category is None
|
||||
assert rp.image_url is None
|
||||
assert rp.product_url is None
|
||||
assert rp.in_stock is True
|
||||
|
||||
def test_raw_product_creation_full(self):
|
||||
"""Create a RawProduct with all fields specified."""
|
||||
rp = RawProduct(
|
||||
store_sku="SKU-002",
|
||||
name="Kerrygold Butter 250g",
|
||||
price=Decimal("3.99"),
|
||||
promo_price=Decimal("2.99"),
|
||||
promo_label="Save 1 Euro",
|
||||
unit_price=Decimal("11.96"),
|
||||
unit="g",
|
||||
unit_size=Decimal("250"),
|
||||
brand="Kerrygold",
|
||||
ean="5011038123456",
|
||||
category="Dairy",
|
||||
image_url="https://example.com/butter.jpg",
|
||||
product_url="https://store.com/butter",
|
||||
in_stock=False,
|
||||
)
|
||||
assert rp.store_sku == "SKU-002"
|
||||
assert rp.name == "Kerrygold Butter 250g"
|
||||
assert rp.price == Decimal("3.99")
|
||||
assert rp.promo_price == Decimal("2.99")
|
||||
assert rp.promo_label == "Save 1 Euro"
|
||||
assert rp.unit_price == Decimal("11.96")
|
||||
assert rp.unit == "g"
|
||||
assert rp.unit_size == Decimal("250")
|
||||
assert rp.brand == "Kerrygold"
|
||||
assert rp.ean == "5011038123456"
|
||||
assert rp.category == "Dairy"
|
||||
assert rp.image_url == "https://example.com/butter.jpg"
|
||||
assert rp.product_url == "https://store.com/butter"
|
||||
assert rp.in_stock is False
|
||||
|
||||
def test_raw_product_default_in_stock_is_true(self):
|
||||
rp = RawProduct(store_sku="X", name="Y", price=Decimal("1"))
|
||||
assert rp.in_stock is True
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# ScrapeResult
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestScrapeResult:
|
||||
"""Tests for the ``ScrapeResult`` dataclass and its properties."""
|
||||
|
||||
def test_status_success(self):
|
||||
"""Products present and no errors -> 'success'."""
|
||||
result = ScrapeResult(
|
||||
store_slug="tesco",
|
||||
products=[RawProduct(store_sku="A", name="A", price=Decimal("1"))],
|
||||
errors=[],
|
||||
)
|
||||
assert result.status == "success"
|
||||
|
||||
def test_status_failed(self):
|
||||
"""No products and at least one error -> 'failed'."""
|
||||
result = ScrapeResult(
|
||||
store_slug="tesco",
|
||||
products=[],
|
||||
errors=["Connection timeout"],
|
||||
)
|
||||
assert result.status == "failed"
|
||||
|
||||
def test_status_partial(self):
|
||||
"""Some products and some errors -> 'partial'."""
|
||||
result = ScrapeResult(
|
||||
store_slug="tesco",
|
||||
products=[RawProduct(store_sku="A", name="A", price=Decimal("1"))],
|
||||
errors=["One category failed"],
|
||||
)
|
||||
assert result.status == "partial"
|
||||
|
||||
def test_status_success_no_products_no_errors(self):
|
||||
"""No products and no errors -> 'success' (degenerate but valid)."""
|
||||
result = ScrapeResult(store_slug="tesco", products=[], errors=[])
|
||||
assert result.status == "success"
|
||||
|
||||
def test_duration_seconds(self):
|
||||
start = datetime(2025, 6, 1, 10, 0, 0)
|
||||
end = datetime(2025, 6, 1, 10, 5, 30)
|
||||
result = ScrapeResult(
|
||||
store_slug="tesco",
|
||||
started_at=start,
|
||||
finished_at=end,
|
||||
)
|
||||
assert result.duration_seconds == 330.0
|
||||
|
||||
def test_duration_zero(self):
|
||||
now = datetime(2025, 6, 1, 10, 0, 0)
|
||||
result = ScrapeResult(
|
||||
store_slug="tesco",
|
||||
started_at=now,
|
||||
finished_at=now,
|
||||
)
|
||||
assert result.duration_seconds == 0.0
|
||||
|
||||
def test_default_factory_products(self):
|
||||
"""products and errors should default to empty lists."""
|
||||
result = ScrapeResult(store_slug="supervalu")
|
||||
assert result.products == []
|
||||
assert result.errors == []
|
||||
|
||||
def test_store_slug_stored(self):
|
||||
result = ScrapeResult(store_slug="dunnes")
|
||||
assert result.store_slug == "dunnes"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# random_user_agent
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestRandomUserAgent:
|
||||
"""Tests for ``random_user_agent``."""
|
||||
|
||||
def test_returns_string(self):
|
||||
ua = random_user_agent()
|
||||
assert isinstance(ua, str)
|
||||
|
||||
def test_returns_non_empty(self):
|
||||
ua = random_user_agent()
|
||||
assert len(ua) > 0
|
||||
|
||||
def test_returns_from_user_agents_list(self):
|
||||
ua = random_user_agent()
|
||||
assert ua in USER_AGENTS
|
||||
|
||||
def test_returns_vary(self):
|
||||
"""Over many calls we should see more than one unique value
|
||||
(with very high probability given 5 agents)."""
|
||||
results = {random_user_agent() for _ in range(50)}
|
||||
assert len(results) > 1
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Module-level constants
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestConstants:
|
||||
"""Sanity checks on module-level constants."""
|
||||
|
||||
def test_user_agents_not_empty(self):
|
||||
assert len(USER_AGENTS) > 0
|
||||
|
||||
def test_default_headers_has_accept(self):
|
||||
assert "Accept" in DEFAULT_HEADERS
|
||||
|
||||
def test_default_headers_has_accept_language(self):
|
||||
assert "Accept-Language" in DEFAULT_HEADERS
|
||||
Loading…
Add table
Add a link
Reference in a new issue