■ SelfQueryRetriever 클래스를 사용해 자체 쿼리 검색기를 만드는 방법을 보여준다.
※ OPENAI_API_KEY 환경 변수 값은 .env 파일에 정의한다.
▶ main.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
from dotenv import load_dotenv from langchain_core.documents import Document from langchain_openai import OpenAIEmbeddings from langchain_chroma import Chroma from langchain.chains.query_constructor.base import AttributeInfo from langchain_openai import ChatOpenAI from langchain.retrievers.self_query.base import SelfQueryRetriever load_dotenv() documentList = [ Document( page_content = "A bunch of scientists bring back dinosaurs and mayhem breaks loose", metadata = {"year" : 1993, "rating" : 7.7, "genre" : "science fiction"} ), Document( page_content = "Leo DiCaprio gets lost in a dream within a dream within a dream within a ...", metadata = {"year" : 2010, "director" : "Christopher Nolan", "rating" : 8.2} ), Document( page_content = "A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea", metadata = {"year" : 2006, "director" : "Satoshi Kon", "rating" : 8.6} ), Document( page_content = "A bunch of normal-sized women are supremely wholesome and some men pine after them", metadata = {"year" : 2019, "director" : "Greta Gerwig", "rating" : 8.3} ), Document( page_content = "Toys come alive and have a blast doing so", metadata = {"year" : 1995, "genre" : "animated"} ), Document( page_content = "Three men walk into the Zone, three men walk out of the Zone", metadata = {"year" : 1979, "director" : "Andrei Tarkovsky", "genre" : "thriller", "rating" : 9.9} ) ] openAIEmbeddings = OpenAIEmbeddings() chroma = Chroma.from_documents(documentList, openAIEmbeddings) attributeInfoList = [ AttributeInfo( name = "genre", description = "The genre of the movie. One of ['science fiction', 'comedy', 'drama', 'thriller', 'romance', 'action', 'animated']", type = "string" ), AttributeInfo( name = "year", description = "The year the movie was released", type = "integer" ), AttributeInfo( name = "director", description = "The name of the movie director", type = "string" ), AttributeInfo( name = "rating", description = "A 1-10 rating for the movie", type = "float" ) ] documentContentDescription = "Brief summary of a movie" chatOpenAI = ChatOpenAI(temperature = 0) selfQueryRetriever = SelfQueryRetriever.from_llm( chatOpenAI, chroma, documentContentDescription, attributeInfoList ) # 필터만 지정한다. resultDocumentList = selfQueryRetriever.invoke("I want to watch a movie rated higher than 8.5") for resultDocument in resultDocumentList: print(resultDocument.page_content) print() # 쿼리와 필터를 지정한다. resultDocumentList = selfQueryRetriever.invoke("Has Greta Gerwig directed any movies about women") for resultDocument in resultDocumentList: print(resultDocument.page_content) print() # 복합 필터를 지정한다. resultDocumentList = selfQueryRetriever.invoke("What's a highly rated (above 8.5) science fiction film?") for resultDocument in resultDocumentList: print(resultDocument.page_content) # 쿼리와 복합 필터를 지정한다. resultDocumentList = selfQueryRetriever.invoke("What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated") print() for resultDocument in resultDocumentList: print(resultDocument.page_content) |
▶ requirements.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
aiohappyeyeballs==2.4.0 aiohttp==3.10.5 aiosignal==1.3.1 annotated-types==0.7.0 anyio==4.4.0 asgiref==3.8.1 attrs==24.2.0 backoff==2.2.1 bcrypt==4.2.0 build==1.2.2 cachetools==5.5.0 certifi==2024.8.30 charset-normalizer==3.3.2 chroma-hnswlib==0.7.3 chromadb==0.5.3 click==8.1.7 colorama==0.4.6 coloredlogs==15.0.1 dataclasses-json==0.6.7 Deprecated==1.2.14 distro==1.9.0 fastapi==0.114.2 filelock==3.16.0 flatbuffers==24.3.25 frozenlist==1.4.1 fsspec==2024.9.0 google-auth==2.34.0 googleapis-common-protos==1.65.0 greenlet==3.1.0 grpcio==1.66.1 h11==0.14.0 httpcore==1.0.5 httptools==0.6.1 httpx==0.27.2 huggingface-hub==0.24.7 humanfriendly==10.0 idna==3.9 importlib_metadata==8.4.0 importlib_resources==6.4.5 jiter==0.5.0 jsonpatch==1.33 jsonpointer==3.0.0 kubernetes==30.1.0 langchain==0.3.0 langchain-chroma==0.1.4 langchain-community==0.3.0 langchain-core==0.3.0 langchain-openai==0.2.0 langchain-text-splitters==0.3.0 langsmith==0.1.120 lark==1.2.2 markdown-it-py==3.0.0 marshmallow==3.22.0 mdurl==0.1.2 mmh3==4.1.0 monotonic==1.6 mpmath==1.3.0 multidict==6.1.0 mypy-extensions==1.0.0 numpy==1.26.4 oauthlib==3.2.2 onnxruntime==1.19.2 openai==1.45.0 opentelemetry-api==1.27.0 opentelemetry-exporter-otlp-proto-common==1.27.0 opentelemetry-exporter-otlp-proto-grpc==1.27.0 opentelemetry-instrumentation==0.48b0 opentelemetry-instrumentation-asgi==0.48b0 opentelemetry-instrumentation-fastapi==0.48b0 opentelemetry-proto==1.27.0 opentelemetry-sdk==1.27.0 opentelemetry-semantic-conventions==0.48b0 opentelemetry-util-http==0.48b0 orjson==3.10.7 overrides==7.7.0 packaging==24.1 posthog==3.6.5 protobuf==4.25.4 pyasn1==0.6.1 pyasn1_modules==0.4.1 pydantic==2.9.1 pydantic-settings==2.5.2 pydantic_core==2.23.3 Pygments==2.18.0 PyPika==0.48.9 pyproject_hooks==1.1.0 pyreadline3==3.5.2 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 PyYAML==6.0.2 regex==2024.9.11 requests==2.32.3 requests-oauthlib==2.0.0 rich==13.8.1 rsa==4.9 setuptools==74.1.2 shellingham==1.5.4 six==1.16.0 sniffio==1.3.1 SQLAlchemy==2.0.34 starlette==0.38.5 sympy==1.13.2 tenacity==8.5.0 tiktoken==0.7.0 tokenizers==0.20.0 tqdm==4.66.5 typer==0.12.5 typing-inspect==0.9.0 typing_extensions==4.12.2 urllib3==2.2.3 uvicorn==0.30.6 watchfiles==0.24.0 websocket-client==1.8.0 websockets==13.0.1 wrapt==1.16.0 yarl==1.11.1 zipp==3.20.2 |
※ pip install python-dotenv langchain langchain-community langchain-chroma langchain-openai lark 명령을 실행했다.