[PYTHON/LANGCHAIN] BaseRetriever 클래스 : astream

■ BaseRetriever 클래스의 astream_events 메소드를 사용하는 방법을 보여준다.

▶ main.py


import asyncio

from langchain_core.retrievers import BaseRetriever
from typing                    import List
from langchain_core.documents  import Document
from langchain_core.callbacks  import CallbackManagerForRetrieverRun
from langchain                 import globals

class CustomRetriever(BaseRetriever):
    """사용자 쿼리를 포함하는 상위 k 문서를 포함하는 장난감 검색기이다.
       이 검색기는 동기화 메서드 _get_relevant_documents만 구현한다.
       검색기가 파일 액세스 또는 네트워크 액세스를 포함하는 경우 `_aget_relevant_documents`의 네이티브 비동기 구현에서 이점을 얻을 수 있다.
       평소와 같이 Runnables에는 다른 스레드에서 실행되는 동기화 구현에 위임하는 기본 비동기 구현이 제공된다."""
    documents : List[Document]
    """List of documents to retrieve from."""
    k : int
    """Number of top results to return"""

    def _get_relevant_documents(self, query : str, *, run_manager : CallbackManagerForRetrieverRun) -> List[Document]:
        """검색기에 대한 동기화를 구현한다."""
        matchingDocumentList = []
        for document in self.documents:
            if len(matchingDocumentList) > self.k:
                return matchingDocumentList

            if query.lower() in document.page_content.lower():
                matchingDocumentList.append(document)
        return matchingDocumentList

documentList = [
    Document(
        page_content = "Dogs are great companions, known for their loyalty and friendliness.",
        metadata     = {"type" : "dog", "trait" : "loyalty"}
    ),
    Document(
        page_content = "Cats are independent pets that often enjoy their own space.",
        metadata     = {"type" : "cat", "trait" : "independence"}
    ),
    Document(
        page_content = "Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata     = {"type" : "fish", "trait" : "low maintenance"}
    ),
    Document(
        page_content = "Parrots are intelligent birds capable of mimicking human speech.",
        metadata     = {"type" : "bird", "trait" : "intelligence"}
    ),
    Document(
        page_content = "Rabbits are social animals that need plenty of space to hop around.",
        metadata     = {"type" : "rabbit", "trait" : "social"}
    )
]

async def main():
    globals.set_debug(False)

    customRetriever = CustomRetriever(documents = documentList, k = 3)

    async for event in customRetriever.astream_events("bar", version = "v1"):
        print(event)

asyncio.run(main())

import asyncio

from langchain_core.retrievers import BaseRetriever

from typing import List

from langchain_core.documents import Document

from langchain_core.callbacks import CallbackManagerForRetrieverRun

from langchain import globals

class CustomRetriever(BaseRetriever):

"""사용자 쿼리를 포함하는 상위 k 문서를 포함하는 장난감 검색기이다.

이 검색기는 동기화 메서드 _get_relevant_documents만 구현한다.

검색기가 파일 액세스 또는 네트워크 액세스를 포함하는 경우 `_aget_relevant_documents`의 네이티브 비동기 구현에서 이점을 얻을 수 있다.

평소와 같이 Runnables에는 다른 스레드에서 실행되는 동기화 구현에 위임하는 기본 비동기 구현이 제공된다."""

documents : List[Document]

"""List of documents to retrieve from."""

k : int

"""Number of top results to return"""

def _get_relevant_documents(self, query : str, *, run_manager : CallbackManagerForRetrieverRun) -> List[Document]:

"""검색기에 대한 동기화를 구현한다."""

matchingDocumentList = []

for document in self.documents:

if len(matchingDocumentList) > self.k:

return matchingDocumentList

if query.lower() in document.page_content.lower():

matchingDocumentList.append(document)

return matchingDocumentList

documentList = [

Document(

page_content = "Dogs are great companions, known for their loyalty and friendliness.",

metadata = {"type" : "dog", "trait" : "loyalty"}

Document(

page_content = "Cats are independent pets that often enjoy their own space.",

metadata = {"type" : "cat", "trait" : "independence"}

Document(

page_content = "Goldfish are popular pets for beginners, requiring relatively simple care.",

metadata = {"type" : "fish", "trait" : "low maintenance"}

Document(

page_content = "Parrots are intelligent birds capable of mimicking human speech.",

metadata = {"type" : "bird", "trait" : "intelligence"}

Document(

page_content = "Rabbits are social animals that need plenty of space to hop around.",

metadata = {"type" : "rabbit", "trait" : "social"}

)

]

async def main():

globals.set_debug(False)

customRetriever = CustomRetriever(documents = documentList, k = 3)

async for event in customRetriever.astream_events("bar", version = "v1"):

print(event)

asyncio.run(main())

▶ requirements.txt


aiohappyeyeballs==2.4.0
aiohttp==3.10.5
aiosignal==1.3.1
annotated-types==0.7.0
anyio==4.4.0
attrs==24.2.0
certifi==2024.8.30
charset-normalizer==3.3.2
frozenlist==1.4.1
greenlet==3.1.0
h11==0.14.0
httpcore==1.0.5
httpx==0.27.2
idna==3.8
jsonpatch==1.33
jsonpointer==3.0.0
langchain==0.2.16
langchain-core==0.2.39
langchain-text-splitters==0.2.4
langsmith==0.1.118
multidict==6.1.0
numpy==1.26.4
orjson==3.10.7
packaging==24.1
pydantic==2.9.1
pydantic_core==2.23.3
PyYAML==6.0.2
requests==2.32.3
sniffio==1.3.1
SQLAlchemy==2.0.34
tenacity==8.5.0
typing_extensions==4.12.2
urllib3==2.2.3
yarl==1.11.1

aiohappyeyeballs==2.4.0

aiohttp==3.10.5

aiosignal==1.3.1

annotated-types==0.7.0

anyio==4.4.0

attrs==24.2.0

certifi==2024.8.30

charset-normalizer==3.3.2

frozenlist==1.4.1

greenlet==3.1.0

h11==0.14.0

httpcore==1.0.5

httpx==0.27.2

idna==3.8

jsonpatch==1.33

jsonpointer==3.0.0

langchain==0.2.16

langchain-core==0.2.39

langchain-text-splitters==0.2.4

langsmith==0.1.118

multidict==6.1.0

numpy==1.26.4

orjson==3.10.7

packaging==24.1

pydantic==2.9.1

pydantic_core==2.23.3

PyYAML==6.0.2

requests==2.32.3

sniffio==1.3.1

SQLAlchemy==2.0.34

tenacity==8.5.0

typing_extensions==4.12.2

urllib3==2.2.3

yarl==1.11.1

※ pip install langchain 명령을 실행했다.