Skip to content

Commit e2104a2

Browse files
committed
refactor: enhance docstrings for mock classes and unit tests in retriever module
1 parent 79af726 commit e2104a2

File tree

4 files changed

+93
-16
lines changed

4 files changed

+93
-16
lines changed

libs/rag-core-api/tests/composite_retriever_test.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Unit tests for internal helper methods of ``CompositeRetriever``.
1+
"""Test internal helper methods of ``CompositeRetriever``.
22
33
The goal of these tests is to verify the transformation semantics of:
44
- _use_summaries
@@ -40,6 +40,10 @@ def _mk_doc(
4040

4141
@pytest.mark.asyncio
4242
async def test_use_summaries_expands_and_removes_summary():
43+
"""Expand a summary into its related documents.
44+
45+
Verify that summary documents are removed and replaced by their related underlying documents.
46+
"""
4347
# Summary references an underlying doc not in initial results.
4448
underlying = _mk_doc("doc1", score=0.9)
4549
summary = _mk_doc("sum1", doc_type=ContentType.SUMMARY, related=["doc1"]) # type: ignore[arg-type]
@@ -57,6 +61,10 @@ async def test_use_summaries_expands_and_removes_summary():
5761

5862

5963
def test_use_summaries_only_summary_no_related():
64+
"""Drop a summary document that has no related documents.
65+
66+
Verify that the returned result is empty when no related ids are present.
67+
"""
6068
summary = _mk_doc("sum1", doc_type=ContentType.SUMMARY, related=[]) # type: ignore[arg-type]
6169
retriever = MockRetrieverQuark([summary])
6270
cr = CompositeRetriever(retrievers=[retriever], reranker=None, reranker_enabled=False)
@@ -66,6 +74,10 @@ def test_use_summaries_only_summary_no_related():
6674

6775

6876
def test_remove_duplicates_preserves_first_occurrence():
77+
"""Preserve the first occurrence when duplicate ids are present.
78+
79+
Verify that duplicate documents are removed while maintaining the original order.
80+
"""
6981
d1a = _mk_doc("a")
7082
d1b = _mk_doc("a") # duplicate id
7183
d2 = _mk_doc("b")
@@ -76,6 +88,10 @@ def test_remove_duplicates_preserves_first_occurrence():
7688

7789

7890
def test_early_pruning_sorts_by_score_when_all_have_score():
91+
"""Sort by score and keep only the top-k documents.
92+
93+
Verify that documents are sorted descending by score when all documents include scores.
94+
"""
7995
docs = [_mk_doc("a", score=0.7), _mk_doc("b", score=0.9), _mk_doc("c", score=0.8)]
8096
retriever = MockRetrieverQuark(docs)
8197
cr = CompositeRetriever(
@@ -87,6 +103,10 @@ def test_early_pruning_sorts_by_score_when_all_have_score():
87103

88104

89105
def test_early_pruning_preserves_order_without_scores():
106+
"""Preserve input order when pruning without score metadata.
107+
108+
Verify that pruning keeps the original order when scores are absent.
109+
"""
90110
docs = [_mk_doc("a"), _mk_doc("b"), _mk_doc("c")] # no scores
91111
retriever = MockRetrieverQuark(docs)
92112
cr = CompositeRetriever(
@@ -98,6 +118,10 @@ def test_early_pruning_preserves_order_without_scores():
98118

99119
@pytest.mark.asyncio
100120
async def test_arerank_pruning_invokes_reranker_when_needed():
121+
"""Invoke the reranker when more than k documents are retrieved.
122+
123+
Verify that the reranker is called and that the returned list is trimmed to ``reranker_k_documents``.
124+
"""
101125
docs = [_mk_doc("a", score=0.5), _mk_doc("b", score=0.7), _mk_doc("c", score=0.9)]
102126
retriever = MockRetrieverQuark(docs)
103127
reranker = MockReranker()
@@ -116,6 +140,10 @@ async def test_arerank_pruning_invokes_reranker_when_needed():
116140

117141
@pytest.mark.asyncio
118142
async def test_arerank_pruning_skips_when_not_needed():
143+
"""Skip reranking when the retrieved docs are already within k.
144+
145+
Verify that the reranker is not invoked when no pruning is required.
146+
"""
119147
docs = [_mk_doc("a", score=0.5), _mk_doc("b", score=0.7)] # already <= k
120148
retriever = MockRetrieverQuark(docs)
121149
reranker = MockReranker()

libs/rag-core-api/tests/mocks/mock_reranker.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,32 @@
1-
"""Mock reranker used by CompositeRetriever unit tests."""
2-
3-
from langchain_core.documents import Document
1+
"""Provide a mock reranker for CompositeRetriever unit tests."""
42

53
__all__ = ["MockReranker"]
64

75

86
class MockReranker:
7+
"""Provide a simple reranker test double.
8+
9+
The mock records whether it was invoked and returns a deterministic top-2 subset.
10+
"""
11+
912
def __init__(self):
1013
self.invoked = False
1114

12-
async def ainvoke(self, payload, config=None): # noqa: D401
15+
async def ainvoke(self, payload, config=None):
16+
"""Return a reranked subset of the provided documents.
17+
18+
Parameters
19+
----------
20+
payload : tuple
21+
A ``(documents, query)`` tuple.
22+
config : Any, optional
23+
Optional runtime config passed through by the caller.
24+
25+
Returns
26+
-------
27+
list
28+
The top two documents sorted by score when available.
29+
"""
1330
self.invoked = True
1431
documents, _query = payload
1532
# Emulate reranker selecting top 2 with highest 'score' if present; else first 2 reversed
Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
"""Mock retriever quark for CompositeRetriever unit tests."""
1+
"""Provide a mock retriever quark for CompositeRetriever unit tests."""
22

3-
from typing import List
43
from langchain_core.documents import Document
54

65
from .mock_vector_db import MockVectorDB
@@ -9,18 +8,31 @@
98

109

1110
class MockRetrieverQuark:
12-
"""Minimal stand-in for a RetrieverQuark.
11+
"""Provide a minimal stand-in for a RetrieverQuark.
1312
1413
Exposes an ``ainvoke`` returning pre-seeded documents and a ``_vector_database`` attribute
1514
referenced by summary expansion logic.
1615
"""
1716

18-
def __init__(self, documents: List[Document], vector_database: MockVectorDB | None = None):
17+
def __init__(self, documents: list[Document], vector_database: MockVectorDB | None = None):
1918
self._documents = documents
2019
self._vector_database = vector_database or MockVectorDB()
2120

2221
def verify_readiness(self): # pragma: no cover - trivial
23-
return None
24-
25-
async def ainvoke(self, *_args, **_kwargs): # noqa: D401 - simple passthrough
22+
"""Verify that the retriever is ready.
23+
24+
Returns
25+
-------
26+
None
27+
Always returns ``None``.
28+
"""
29+
30+
async def ainvoke(self, *_args, **_kwargs):
31+
"""Return the pre-seeded documents.
32+
33+
Returns
34+
-------
35+
list[Document]
36+
The documents passed to the constructor.
37+
"""
2638
return self._documents
Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,43 @@
1-
"""Mock implementation of a minimal vector database interface for tests.
1+
"""Provide a minimal vector database interface for tests.
22
33
Provides only the methods required by the CompositeRetriever unit tests:
44
- get_documents_by_ids: Used during summary expansion
55
- asearch: (async) provided as a defensive stub
66
"""
77

8-
from typing import Dict, List
98
from langchain_core.documents import Document
109

1110
__all__ = ["MockVectorDB"]
1211

1312

1413
class MockVectorDB:
15-
def __init__(self, docs_by_id: Dict[str, Document] | None = None):
14+
"""Provide a minimal in-memory vector database test double."""
15+
16+
def __init__(self, docs_by_id: dict[str, Document] | None = None):
1617
self.collection_available = True
1718
self._docs_by_id = docs_by_id or {}
1819

19-
def get_documents_by_ids(self, ids: List[str]) -> List[Document]: # pragma: no cover - simple mapping
20+
def get_documents_by_ids(self, ids: list[str]) -> list[Document]: # pragma: no cover - simple mapping
21+
"""Return documents for the provided ids.
22+
23+
Parameters
24+
----------
25+
ids : list[str]
26+
Document ids to look up.
27+
28+
Returns
29+
-------
30+
list[Document]
31+
Documents that exist in the in-memory mapping.
32+
"""
2033
return [self._docs_by_id[i] for i in ids if i in self._docs_by_id]
2134

2235
async def asearch(self, *_, **__): # pragma: no cover - defensive stub
36+
"""Return an empty result for async search.
37+
38+
Returns
39+
-------
40+
list
41+
Always returns an empty list.
42+
"""
2343
return []

0 commit comments

Comments
 (0)