■ Chroma 클래스의 from_documents 정적 메소드에서 persist_directory/collection_name 인자를 사용해 영구적 저장소를 만드는 방법을 보여준다.
※ OPENAI_API_KEY 환경 변수 값은 .env 파일에 정의한다.
▶ main.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
from dotenv import load_dotenv from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import TextLoader from langchain_openai.embeddings import OpenAIEmbeddings from langchain_chroma import Chroma load_dotenv() recursiveCharacterTextSplitter = RecursiveCharacterTextSplitter(chunk_size = 600, chunk_overlap = 0) textLoader = TextLoader("nlp-keywords.txt") splitDocumentList = textLoader.load_and_split(recursiveCharacterTextSplitter) openAIEmbeddings = OpenAIEmbeddings() chroma = Chroma.from_documents(splitDocumentList, openAIEmbeddings, persist_directory = "chroma_db", collection_name = "test1") |
▶ requirements.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
aiohappyeyeballs==2.4.4 aiohttp==3.11.11 aiosignal==1.3.2 annotated-types==0.7.0 anyio==4.8.0 asgiref==3.8.1 async-timeout==4.0.3 attrs==24.3.0 backoff==2.2.1 bcrypt==4.2.1 build==1.2.2.post1 cachetools==5.5.0 certifi==2024.12.14 charset-normalizer==3.4.1 chroma-hnswlib==0.7.6 chromadb==0.5.23 click==8.1.8 coloredlogs==15.0.1 dataclasses-json==0.6.7 Deprecated==1.2.15 distro==1.9.0 durationpy==0.9 exceptiongroup==1.2.2 fastapi==0.115.6 filelock==3.16.1 flatbuffers==24.12.23 frozenlist==1.5.0 fsspec==2024.12.0 google-auth==2.37.0 googleapis-common-protos==1.66.0 greenlet==3.1.1 grpcio==1.69.0 h11==0.14.0 httpcore==1.0.7 httptools==0.6.4 httpx==0.28.1 httpx-sse==0.4.0 huggingface-hub==0.27.1 humanfriendly==10.0 idna==3.10 importlib_metadata==8.5.0 importlib_resources==6.5.2 jiter==0.8.2 jsonpatch==1.33 jsonpointer==3.0.0 kubernetes==31.0.0 langchain==0.3.14 langchain-chroma==0.2.0 langchain-community==0.3.14 langchain-core==0.3.29 langchain-openai==0.3.0 langchain-text-splitters==0.3.5 langsmith==0.2.10 markdown-it-py==3.0.0 marshmallow==3.25.1 mdurl==0.1.2 mmh3==5.0.1 monotonic==1.6 mpmath==1.3.0 multidict==6.1.0 mypy-extensions==1.0.0 numpy==1.26.4 oauthlib==3.2.2 onnxruntime==1.20.1 openai==1.59.7 opentelemetry-api==1.29.0 opentelemetry-exporter-otlp-proto-common==1.29.0 opentelemetry-exporter-otlp-proto-grpc==1.29.0 opentelemetry-instrumentation==0.50b0 opentelemetry-instrumentation-asgi==0.50b0 opentelemetry-instrumentation-fastapi==0.50b0 opentelemetry-proto==1.29.0 opentelemetry-sdk==1.29.0 opentelemetry-semantic-conventions==0.50b0 opentelemetry-util-http==0.50b0 orjson==3.10.14 overrides==7.7.0 packaging==24.2 posthog==3.8.3 propcache==0.2.1 protobuf==5.29.3 pyasn1==0.6.1 pyasn1_modules==0.4.1 pydantic==2.10.5 pydantic-settings==2.7.1 pydantic_core==2.27.2 Pygments==2.19.1 PyPika==0.48.9 pyproject_hooks==1.2.0 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 PyYAML==6.0.2 regex==2024.11.6 requests==2.32.3 requests-oauthlib==2.0.0 requests-toolbelt==1.0.0 rich==13.9.4 rsa==4.9 shellingham==1.5.4 six==1.17.0 sniffio==1.3.1 SQLAlchemy==2.0.37 starlette==0.41.3 sympy==1.13.3 tenacity==9.0.0 tiktoken==0.8.0 tokenizers==0.20.3 tomli==2.2.1 tqdm==4.67.1 typer==0.15.1 typing-inspect==0.9.0 typing_extensions==4.12.2 urllib3==2.3.0 uvicorn==0.34.0 uvloop==0.21.0 watchfiles==1.0.4 websocket-client==1.8.0 websockets==14.1 wrapt==1.17.2 yarl==1.18.3 zipp==3.21.0 |
※ pip install python-dotenv langchain_community langchain_openai langchain_chroma 명령을 실행했다.