■ FAISS 클래스의 index_to_docstore_id 속성을 사용해 문서 인덱스와 ID를 구하는 방법을 보여준다.
※ OPENAI_API_KEY 환경 변수 값은 .env 파일에 정의한다.
▶ main.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import faiss from dotenv import load_dotenv from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import TextLoader from langchain_openai import OpenAIEmbeddings from langchain_community.vectorstores import FAISS load_dotenv() recursiveCharacterTextSplitter = RecursiveCharacterTextSplitter(chunk_size = 600, chunk_overlap = 0) textLoader = TextLoader("nlp-keywords.txt") splitDocumentList = textLoader.load_and_split(recursiveCharacterTextSplitter) openAIEmbeddings = OpenAIEmbeddings() faiss = FAISS.from_documents(documents = splitDocumentList, embedding = openAIEmbeddings) indexDictionary = faiss.index_to_docstore_id for item in indexDictionary.items(): index = item[0] documentID = item[1] print(f"{item[0]:2d} : {item[1]}") """ 0 f0044147-05df-43a3-a221-bbc138d1b275 0 : 3e4001c6-8afd-45f6-b32d-d08f53197f26 1 : 962f6282-ce08-43dd-8de7-27df84bc26f6 2 : cded9dac-fa71-48e9-b184-0072851a663f 3 : b71200c6-4c9d-45e2-91f4-56bed8692fd5 4 : 6898f09d-fe13-48d9-ab52-40d2f72c6e71 5 : 888093d0-7c5e-482e-a990-70852b5c49ce 6 : b7156aee-ffe6-4155-8d15-ccfc5d8640c0 7 : 4fd0cfe9-9b8b-41d0-98a4-87150303886a 8 : b3b4d6dc-3f37-4b8b-a1bd-8eac3206d17c 9 : 794a3047-cfb4-4fb0-a1d7-bba2892f0232 10 : 57871b97-6b41-44f2-973c-afc528c64397 """ |
▶ requirements.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
aiohappyeyeballs==2.4.4 aiohttp==3.11.11 aiosignal==1.3.2 annotated-types==0.7.0 anyio==4.8.0 async-timeout==4.0.3 attrs==24.3.0 certifi==2024.12.14 charset-normalizer==3.4.1 dataclasses-json==0.6.7 distro==1.9.0 exceptiongroup==1.2.2 faiss-gpu==1.7.2 frozenlist==1.5.0 greenlet==3.1.1 h11==0.14.0 httpcore==1.0.7 httpx==0.28.1 httpx-sse==0.4.0 idna==3.10 jiter==0.8.2 jsonpatch==1.33 jsonpointer==3.0.0 langchain==0.3.14 langchain-community==0.3.14 langchain-core==0.3.30 langchain-openai==0.3.0 langchain-text-splitters==0.3.5 langsmith==0.2.11 marshmallow==3.25.1 multidict==6.1.0 mypy-extensions==1.0.0 numpy==1.26.4 openai==1.59.8 orjson==3.10.14 packaging==24.2 propcache==0.2.1 pydantic==2.10.5 pydantic-settings==2.7.1 pydantic_core==2.27.2 python-dotenv==1.0.1 PyYAML==6.0.2 regex==2024.11.6 requests==2.32.3 requests-toolbelt==1.0.0 sniffio==1.3.1 SQLAlchemy==2.0.37 tenacity==9.0.0 tiktoken==0.8.0 tqdm==4.67.1 typing-inspect==0.9.0 typing_extensions==4.12.2 urllib3==2.3.0 yarl==1.18.3 |
※ pip install python-dotenv langchain-community langchain-openai faiss-gpu 명령을 실행했다.