[PYTHON/LANGCHAIN] BaseRetriever 클래스 : 커스텀 검색기 만들기

■ BaseRetriever 클래스를 사용해 커스텀 검색기를 만드는 방법을 보여준다.

▶ main.py


import asyncio

from langchain_core.retrievers import BaseRetriever
from typing                    import List
from langchain_core.documents  import Document
from langchain_core.callbacks  import CallbackManagerForRetrieverRun

class CustomRetriever(BaseRetriever):
    """A toy retriever that contains the top k documents that contain the user query.

    This retriever only implements the sync method _get_relevant_documents.
    If the retriever were to involve file access or network access, it could benefit from a native async implementation of `_aget_relevant_documents`.
    As usual, with Runnables, there's a default async implementation that's provided that delegates to the sync implementation running on another thread.
    """

    documentList : List[Document]
    """List of documents to retrieve from."""

    k : int
    """Number of top results to return"""

    def _get_relevant_documents(self, query : str, *, run_manager : CallbackManagerForRetrieverRun) -> List[Document]:
        """Sync implementations for retriever."""
        matchingDocumentList = []
        for document in self.documentList:
            if len(matchingDocumentList) > self.k:
                return matchingDocumentList

            if query.lower() in document.page_content.lower():
                matchingDocumentList.append(document)
        return matchingDocumentList

    # 선택 사항 : _aget_relevant_documents를 재정의하여 보다 효율적인 네이티브 구현을 제공한다.
    # async def _aget_relevant_documents(self, query : str, *, run_manager : AsyncCallbackManagerForRetrieverRun) -> List[Document]:
    #     """Asynchronously get documents relevant to a query.

    #     Args :
    #         query       : String to find relevant documents for
    #         run_manager : The callbacks handler to use

    #     Returns :
    #         List of relevant documents
    #     """

documentList = [
    Document(
        page_content = "Dogs are great companions, known for their loyalty and friendliness.",
        metadata     = {"type" : "dog", "trait" : "loyalty"}
    ),
    Document(
        page_content = "Cats are independent pets that often enjoy their own space.",
        metadata     = {"type" : "cat", "trait" : "independence"}
    ),
    Document(
        page_content = "Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata     = {"type": "fish", "trait": "low maintenance"}
    ),
    Document(
        page_content = "Parrots are intelligent birds capable of mimicking human speech.",
        metadata     = {"type": "bird", "trait": "intelligence"}
    ),
    Document(
        page_content = "Rabbits are social animals that need plenty of space to hop around.",
        metadata     = {"type": "rabbit", "trait": "social"}
    ),
]

customRetriever = CustomRetriever(documentList = documentList, k = 3)

responseDocumentList = customRetriever.invoke("that")

print(responseDocumentList)
print("-" * 50)

responseDocumentListList = customRetriever.batch(["dog", "cat"])

print(responseDocumentListList)
print("-" * 50)

async def main():
    responseDocumentList = await customRetriever.ainvoke("that")

    print(responseDocumentList)
    print("-" * 50)

    async for eventDictionary in customRetriever.astream_events("bar", version = "v2"):
        print(eventDictionary)
    print("-" * 50)

asyncio.run(main())

"""
[Document(metadata={'type': 'cat', 'trait': 'independence'}, page_content='Cats are independent pets that often enjoy their own space.'), Document(metadata={'type': 'rabbit', 'trait': 'social'}, page_content='Rabbits are social animals that need plenty of space to hop around.')]
---------------------------------------------------
[[Document(metadata={'type': 'dog', 'trait': 'loyalty'}, page_content='Dogs are great companions, known for their loyalty and friendliness.')], [Document(metadata={'type': 'cat', 'trait': 'independence'}, page_content='Cats are independent pets that often enjoy their own space.')]]
---------------------------------------------------
[Document(metadata={'type': 'cat', 'trait': 'independence'}, page_content='Cats are independent pets that often enjoy their own space.'), Document(metadata={'type': 'rabbit', 'trait': 'social'}, page_content='Rabbits are social animals that need plenty of space to hop around.')]
---------------------------------------------------
{'event': 'on_retriever_start', 'data': {'input': 'bar'}, 'name': 'ToyRetriever', 'tags': [], 'run_id': '359f2805-45a0-422a-b8d9-ccc9067ea7de', 'metadata': {'ls_retriever_name': 'toy'}, 'parent_ids': []}
{'event': 'on_retriever_end', 'data': {'output': []}, 'run_id': '359f2805-45a0-422a-b8d9-ccc9067ea7de', 'name': 'ToyRetriever', 'tags': [], 'metadata': {'ls_retriever_name': 'toy'}, 'parent_ids': []}
---------------------------------------------------
"""

100

101

102

103

104

import asyncio

from langchain_core.retrievers import BaseRetriever

from typing import List

from langchain_core.documents import Document

from langchain_core.callbacks import CallbackManagerForRetrieverRun

class CustomRetriever(BaseRetriever):

"""A toy retriever that contains the top k documents that contain the user query.

This retriever only implements the sync method _get_relevant_documents.

If the retriever were to involve file access or network access, it could benefit from a native async implementation of `_aget_relevant_documents`.

As usual, with Runnables, there's a default async implementation that's provided that delegates to the sync implementation running on another thread.

"""

documentList : List[Document]

"""List of documents to retrieve from."""

k : int

"""Number of top results to return"""

def _get_relevant_documents(self, query : str, *, run_manager : CallbackManagerForRetrieverRun) -> List[Document]:

"""Sync implementations for retriever."""

matchingDocumentList = []

for document in self.documentList:

if len(matchingDocumentList) > self.k:

return matchingDocumentList

if query.lower() in document.page_content.lower():

matchingDocumentList.append(document)

return matchingDocumentList

# 선택 사항 : _aget_relevant_documents를 재정의하여 보다 효율적인 네이티브 구현을 제공한다.

# async def _aget_relevant_documents(self, query : str, *, run_manager : AsyncCallbackManagerForRetrieverRun) -> List[Document]:

# """Asynchronously get documents relevant to a query.

# Args :

# query : String to find relevant documents for

# run_manager : The callbacks handler to use

# Returns :

# List of relevant documents

# """

documentList = [

Document(

page_content = "Dogs are great companions, known for their loyalty and friendliness.",

metadata = {"type" : "dog", "trait" : "loyalty"}

Document(

page_content = "Cats are independent pets that often enjoy their own space.",

metadata = {"type" : "cat", "trait" : "independence"}

Document(

page_content = "Goldfish are popular pets for beginners, requiring relatively simple care.",

metadata = {"type": "fish", "trait": "low maintenance"}

Document(

page_content = "Parrots are intelligent birds capable of mimicking human speech.",

metadata = {"type": "bird", "trait": "intelligence"}

Document(

page_content = "Rabbits are social animals that need plenty of space to hop around.",

metadata = {"type": "rabbit", "trait": "social"}

]

customRetriever = CustomRetriever(documentList = documentList, k = 3)

responseDocumentList = customRetriever.invoke("that")

print(responseDocumentList)

print("-" * 50)

responseDocumentListList = customRetriever.batch(["dog", "cat"])

print(responseDocumentListList)

print("-" * 50)

async def main():

responseDocumentList = await customRetriever.ainvoke("that")

print(responseDocumentList)

print("-" * 50)

async for eventDictionary in customRetriever.astream_events("bar", version = "v2"):

print(eventDictionary)

print("-" * 50)

asyncio.run(main())

"""

[Document(metadata={'type': 'cat', 'trait': 'independence'}, page_content='Cats are independent pets that often enjoy their own space.'), Document(metadata={'type': 'rabbit', 'trait': 'social'}, page_content='Rabbits are social animals that need plenty of space to hop around.')]

---------------------------------------------------

[[Document(metadata={'type': 'dog', 'trait': 'loyalty'}, page_content='Dogs are great companions, known for their loyalty and friendliness.')], [Document(metadata={'type': 'cat', 'trait': 'independence'}, page_content='Cats are independent pets that often enjoy their own space.')]]

---------------------------------------------------

{'event': 'on_retriever_start', 'data': {'input': 'bar'}, 'name': 'ToyRetriever', 'tags': [], 'run_id': '359f2805-45a0-422a-b8d9-ccc9067ea7de', 'metadata': {'ls_retriever_name': 'toy'}, 'parent_ids': []}

{'event': 'on_retriever_end', 'data': {'output': []}, 'run_id': '359f2805-45a0-422a-b8d9-ccc9067ea7de', 'name': 'ToyRetriever', 'tags': [], 'metadata': {'ls_retriever_name': 'toy'}, 'parent_ids': []}

---------------------------------------------------

"""

▶ requirements.txt


aiohappyeyeballs==2.4.3
aiohttp==3.11.7
aiosignal==1.3.1
annotated-types==0.7.0
anyio==4.6.2.post1
attrs==24.2.0
certifi==2024.8.30
charset-normalizer==3.4.0
frozenlist==1.5.0
greenlet==3.1.1
h11==0.14.0
httpcore==1.0.7
httpx==0.27.2
idna==3.10
jsonpatch==1.33
jsonpointer==3.0.0
langchain==0.3.7
langchain-core==0.3.19
langchain-text-splitters==0.3.2
langsmith==0.1.144
multidict==6.1.0
numpy==1.26.4
orjson==3.10.11
packaging==24.2
propcache==0.2.0
pydantic==2.10.1
pydantic_core==2.27.1
PyYAML==6.0.2
requests==2.32.3
requests-toolbelt==1.0.0
sniffio==1.3.1
SQLAlchemy==2.0.36
tenacity==9.0.0
typing_extensions==4.12.2
urllib3==2.2.3
yarl==1.18.0

aiohappyeyeballs==2.4.3

aiohttp==3.11.7

aiosignal==1.3.1

annotated-types==0.7.0

anyio==4.6.2.post1

attrs==24.2.0

certifi==2024.8.30

charset-normalizer==3.4.0

frozenlist==1.5.0

greenlet==3.1.1

h11==0.14.0

httpcore==1.0.7

httpx==0.27.2

idna==3.10

jsonpatch==1.33

jsonpointer==3.0.0

langchain==0.3.7

langchain-core==0.3.19

langchain-text-splitters==0.3.2

langsmith==0.1.144

multidict==6.1.0

numpy==1.26.4

orjson==3.10.11

packaging==24.2

propcache==0.2.0

pydantic==2.10.1

pydantic_core==2.27.1

PyYAML==6.0.2

requests==2.32.3

requests-toolbelt==1.0.0

sniffio==1.3.1

SQLAlchemy==2.0.36

tenacity==9.0.0

typing_extensions==4.12.2

urllib3==2.2.3

yarl==1.18.0

※ pip install langchain 명령을 실행했다.

Post Views: 0

AI LANGCHAIN LLM PYTHON RETRIEVER

icodebroker

[PYTHON/LANGCHAIN] BaseRetriever 클래스 : 커스텀 검색기 만들기

분류

보관함