■ BaseRetriever 클래스를 사용해 커스텀 검색기를 만드는 방법을 보여준다.
▶ main.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import asyncio from langchain_core.retrievers import BaseRetriever from typing import List from langchain_core.documents import Document from langchain_core.callbacks import CallbackManagerForRetrieverRun class CustomRetriever(BaseRetriever): """A toy retriever that contains the top k documents that contain the user query. This retriever only implements the sync method _get_relevant_documents. If the retriever were to involve file access or network access, it could benefit from a native async implementation of `_aget_relevant_documents`. As usual, with Runnables, there's a default async implementation that's provided that delegates to the sync implementation running on another thread. """ documentList : List[Document] """List of documents to retrieve from.""" k : int """Number of top results to return""" def _get_relevant_documents(self, query : str, *, run_manager : CallbackManagerForRetrieverRun) -> List[Document]: """Sync implementations for retriever.""" matchingDocumentList = [] for document in self.documentList: if len(matchingDocumentList) > self.k: return matchingDocumentList if query.lower() in document.page_content.lower(): matchingDocumentList.append(document) return matchingDocumentList # 선택 사항 : _aget_relevant_documents를 재정의하여 보다 효율적인 네이티브 구현을 제공한다. # async def _aget_relevant_documents(self, query : str, *, run_manager : AsyncCallbackManagerForRetrieverRun) -> List[Document]: # """Asynchronously get documents relevant to a query. # Args : # query : String to find relevant documents for # run_manager : The callbacks handler to use # Returns : # List of relevant documents # """ documentList = [ Document( page_content = "Dogs are great companions, known for their loyalty and friendliness.", metadata = {"type" : "dog", "trait" : "loyalty"} ), Document( page_content = "Cats are independent pets that often enjoy their own space.", metadata = {"type" : "cat", "trait" : "independence"} ), Document( page_content = "Goldfish are popular pets for beginners, requiring relatively simple care.", metadata = {"type": "fish", "trait": "low maintenance"} ), Document( page_content = "Parrots are intelligent birds capable of mimicking human speech.", metadata = {"type": "bird", "trait": "intelligence"} ), Document( page_content = "Rabbits are social animals that need plenty of space to hop around.", metadata = {"type": "rabbit", "trait": "social"} ), ] customRetriever = CustomRetriever(documentList = documentList, k = 3) responseDocumentList = customRetriever.invoke("that") print(responseDocumentList) print("-" * 50) responseDocumentListList = customRetriever.batch(["dog", "cat"]) print(responseDocumentListList) print("-" * 50) async def main(): responseDocumentList = await customRetriever.ainvoke("that") print(responseDocumentList) print("-" * 50) async for eventDictionary in customRetriever.astream_events("bar", version = "v2"): print(eventDictionary) print("-" * 50) asyncio.run(main()) """ [Document(metadata={'type': 'cat', 'trait': 'independence'}, page_content='Cats are independent pets that often enjoy their own space.'), Document(metadata={'type': 'rabbit', 'trait': 'social'}, page_content='Rabbits are social animals that need plenty of space to hop around.')] --------------------------------------------------- [[Document(metadata={'type': 'dog', 'trait': 'loyalty'}, page_content='Dogs are great companions, known for their loyalty and friendliness.')], [Document(metadata={'type': 'cat', 'trait': 'independence'}, page_content='Cats are independent pets that often enjoy their own space.')]] --------------------------------------------------- [Document(metadata={'type': 'cat', 'trait': 'independence'}, page_content='Cats are independent pets that often enjoy their own space.'), Document(metadata={'type': 'rabbit', 'trait': 'social'}, page_content='Rabbits are social animals that need plenty of space to hop around.')] --------------------------------------------------- {'event': 'on_retriever_start', 'data': {'input': 'bar'}, 'name': 'ToyRetriever', 'tags': [], 'run_id': '359f2805-45a0-422a-b8d9-ccc9067ea7de', 'metadata': {'ls_retriever_name': 'toy'}, 'parent_ids': []} {'event': 'on_retriever_end', 'data': {'output': []}, 'run_id': '359f2805-45a0-422a-b8d9-ccc9067ea7de', 'name': 'ToyRetriever', 'tags': [], 'metadata': {'ls_retriever_name': 'toy'}, 'parent_ids': []} --------------------------------------------------- """ |
▶ requirements.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
aiohappyeyeballs==2.4.3 aiohttp==3.11.7 aiosignal==1.3.1 annotated-types==0.7.0 anyio==4.6.2.post1 attrs==24.2.0 certifi==2024.8.30 charset-normalizer==3.4.0 frozenlist==1.5.0 greenlet==3.1.1 h11==0.14.0 httpcore==1.0.7 httpx==0.27.2 idna==3.10 jsonpatch==1.33 jsonpointer==3.0.0 langchain==0.3.7 langchain-core==0.3.19 langchain-text-splitters==0.3.2 langsmith==0.1.144 multidict==6.1.0 numpy==1.26.4 orjson==3.10.11 packaging==24.2 propcache==0.2.0 pydantic==2.10.1 pydantic_core==2.27.1 PyYAML==6.0.2 requests==2.32.3 requests-toolbelt==1.0.0 sniffio==1.3.1 SQLAlchemy==2.0.36 tenacity==9.0.0 typing_extensions==4.12.2 urllib3==2.2.3 yarl==1.18.0 |
※ pip install langchain 명령을 실행했다.