■ BaseLoader 클래스를 사용해 커스텀 문서 로더를 만드는 방법을 보여준다.
▶ main.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import aiofiles import asyncio from langchain_core.document_loaders import BaseLoader from typing import Iterator from langchain_core.documents import Document from typing import AsyncIterator class CustomDocumentLoader(BaseLoader): """An example document loader that reads a file line by line.""" def __init__(self, filePath : str) -> None: """Initialize the loader with a file path. Args : file_path: The path to the file to load. """ self.filePath = filePath def lazy_load(self) -> Iterator[Document]: """A lazy loader that reads a file line by line. When you're implementing lazy load methods, you should use a generator to yield documents one by one. """ with open(self.filePath, encoding = "utf-8") as textIOWrapper: lineNumber = 0 for line in textIOWrapper: yield Document( page_content = line.strip(), metadata = {"line_number" : lineNumber, "source" : self.filePath} ) lineNumber += 1 # alazy_load는 선택 사항이다. # 구현을 생략하면 lazy_load에 위임하는 기본 구현이 사용된다! async def alazy_load(self) -> AsyncIterator[Document]: """An async lazy loader that reads a file line by line.""" async with aiofiles.open(self.filePath, encoding = "utf-8") as asyncTextIOWrapper: lineNumber = 0 async for line in asyncTextIOWrapper: yield Document( page_content = line.strip(), metadata = {"line_number" : lineNumber, "source" : self.filePath} ) lineNumber += 1 with open("./meow.txt", "w", encoding = "utf-8") as textIOWrapper: fileContent = "meow meow1\nmeow meow2\nmeow meow3" textIOWrapper.write(fileContent) customDocumentLoader = CustomDocumentLoader("./meow.txt") for document in customDocumentLoader.lazy_load(): print(document) print("-" * 50) documentList = customDocumentLoader.load() for document in documentList: print(document) print("-" * 50) async def main(): async for document in customDocumentLoader.alazy_load(): print(document) print("-" * 50) asyncio.run(main()) """ page_content='meow meow1' metadata={'line_number': 0, 'source': './meow.txt'} page_content='meow meow2' metadata={'line_number': 1, 'source': './meow.txt'} page_content='meow meow3' metadata={'line_number': 2, 'source': './meow.txt'} -------------------------------------------------- page_content='meow meow1' metadata={'line_number': 0, 'source': './meow.txt'} page_content='meow meow2' metadata={'line_number': 1, 'source': './meow.txt'} page_content='meow meow3' metadata={'line_number': 2, 'source': './meow.txt'} -------------------------------------------------- page_content='meow meow1' metadata={'line_number': 0, 'source': './meow.txt'} page_content='meow meow2' metadata={'line_number': 1, 'source': './meow.txt'} page_content='meow meow3' metadata={'line_number': 2, 'source': './meow.txt'} -------------------------------------------------- """ |
▶ requirements.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
aiofiles==24.1.0 aiohappyeyeballs==2.4.3 aiohttp==3.11.7 aiosignal==1.3.1 annotated-types==0.7.0 anyio==4.6.2.post1 attrs==24.2.0 certifi==2024.8.30 charset-normalizer==3.4.0 frozenlist==1.5.0 greenlet==3.1.1 h11==0.14.0 httpcore==1.0.7 httpx==0.27.2 idna==3.10 jsonpatch==1.33 jsonpointer==3.0.0 langchain==0.3.8 langchain-core==0.3.21 langchain-text-splitters==0.3.2 langsmith==0.1.145 multidict==6.1.0 numpy==1.26.4 orjson==3.10.11 packaging==24.2 propcache==0.2.0 pydantic==2.10.1 pydantic_core==2.27.1 PyYAML==6.0.2 requests==2.32.3 requests-toolbelt==1.0.0 sniffio==1.3.1 SQLAlchemy==2.0.36 tenacity==9.0.0 typing_extensions==4.12.2 urllib3==2.2.3 yarl==1.18.0 |
※ pip install aiofiles langchain 명령을 실행했다.