from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import DirectoryLoader
directoryLoader = DirectoryLoader(".", glob = "*.txt", loader_cls = TextLoader, silent_errors = False)
documentList = directoryLoader.load()
for document in documentList:
print(document)
print()
"""
Error loading file test-non-utf8.txt
Traceback (most recent call last):
File "/home/king/testproject/env/lib/python3.10/site-packages/langchain_community/document_loaders/text.py", line 43, in lazy_load
text = f.read()
File "/usr/lib/python3.10/codecs.py", line 322, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc5 in position 0: invalid continuation byte
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/king/testproject/main.py", line 6, in <module>
documentList = directoryLoader.load()
File "/home/king/testproject/env/lib/python3.10/site-packages/langchain_community/document_loaders/directory.py", line 117, in load
return list(self.lazy_load())
File "/home/king/testproject/env/lib/python3.10/site-packages/langchain_community/document_loaders/directory.py", line 195, in lazy_load
yield from self._lazy_load_file(i, p, pbar)
File "/home/king/testproject/env/lib/python3.10/site-packages/langchain_community/document_loaders/directory.py", line 233, in _lazy_load_file
raise e
File "/home/king/testproject/env/lib/python3.10/site-packages/langchain_community/document_loaders/directory.py", line 223, in _lazy_load_file
for subdoc in loader.lazy_load():
File "/home/king/testproject/env/lib/python3.10/site-packages/langchain_community/document_loaders/text.py", line 56, in lazy_load
raise RuntimeError(f"Error loading {self.file_path}") from e
RuntimeError: Error loading test-non-utf8.txt
"""