[PYTHON/LANGCHAIN] Llama 2 모델에 검색된 문서와 랭체인 허브 프롬프트를 전달해 요약 체인 만들기

■ Llama 2 모델에 검색된 문서와 랭체인 허브에서 가져온 프롬프트를 전달해 요약 체인을 만드는 방법을 보여준다.

▶ main.py


from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters             import RecursiveCharacterTextSplitter
from langchain_huggingface                import HuggingFaceEmbeddings
from langchain_chroma                     import Chroma
from langchain_community.llms             import LlamaCpp
from langchain                            import hub
from langchain_core.output_parsers        import StrOutputParser
from langchain_core.runnables             import RunnablePassthrough, RunnablePick

# 웹 문서를 로드한다.
webBaseLoader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")

documentList = webBaseLoader.load()

# 문서를 분할한다.
recursiveCharacterTextSplitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)

splitDocumentList = recursiveCharacterTextSplitter.split_documents(documentList)

# 임베딩 생성 및 유사도 검색을 위한 벡터 저장소를 설정한다.
chroma = Chroma.from_documents(documents = splitDocumentList, embedding = HuggingFaceEmbeddings())

# 벡터 저장소에 대해 유사도 검색을 한다.
resultDocumentList = chroma.similarity_search("What are the approaches to Task Decomposition?")

# LLM 모델을 설정한다.
gpuLayerCount = 1
batchSize     = 512 # 1에서 n_ctx 사이이어야 한다.

llamaCpp = LlamaCpp(
    model_path   = "./llama-2-13b-chat.Q4_0.gguf", # llama-2-13b-chat.Q4_0.gguf 파일 경로를 설정한다.
    n_gpu_layers = gpuLayerCount,
    n_batch      = batchSize,
    n_ctx        = 2048,
    f16_kv       = True, # 반드시 True로 설정해야 한다. 그렇지 않으면 몇 번의 호출 후에 문제가 발생하게 된다.
    verbose      = False
)

# 프롬프트 템플리트를 설정한다.
promptTemplate = hub.pull("rlm/rag-prompt")

# 문서 페이지 컨텐츠를 병합하는 함수를 정의한다.
def mergeDocumentPageContent(documentList):
    return "\n\n".join(doc.page_content for doc in documentList)

# 실행 체인을 설정한다.
runnableSequence  = (
    RunnablePassthrough.assign(context=RunnablePick("context") | mergeDocumentPageContent)
    | promptTemplate
    | llamaCpp
    | StrOutputParser()
)

# 질의 응답을 실행한다.
resultString = runnableSequence.invoke({"context": resultDocumentList, "question" : "Summarize the main themes in these retrieved docs"})

print(resultString)

"""
Summarizing the main themes in these retrieved documents reveals that task decomposition can be achieved through different methods like LLM prompting, using task-specific instructions, and human inputs. The challenges in long-term planning and task decomposition include the difficulty of adjusting plans when faced with unexpected errors. The agent breaks down large tasks into smaller subgoals, reflects on past actions, and refines future steps to improve final results.
"""

from langchain_community.document_loaders import WebBaseLoader

from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_huggingface import HuggingFaceEmbeddings

from langchain_chroma import Chroma

from langchain_community.llms import LlamaCpp

from langchain import hub

from langchain_core.output_parsers import StrOutputParser

from langchain_core.runnables import RunnablePassthrough, RunnablePick

# 웹 문서를 로드한다.

webBaseLoader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")

documentList = webBaseLoader.load()

# 문서를 분할한다.

recursiveCharacterTextSplitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)

splitDocumentList = recursiveCharacterTextSplitter.split_documents(documentList)

# 임베딩 생성 및 유사도 검색을 위한 벡터 저장소를 설정한다.

chroma = Chroma.from_documents(documents = splitDocumentList, embedding = HuggingFaceEmbeddings())

# 벡터 저장소에 대해 유사도 검색을 한다.

resultDocumentList = chroma.similarity_search("What are the approaches to Task Decomposition?")

# LLM 모델을 설정한다.

gpuLayerCount = 1

batchSize = 512 # 1에서 n_ctx 사이이어야 한다.

llamaCpp = LlamaCpp(

model_path = "./llama-2-13b-chat.Q4_0.gguf", # llama-2-13b-chat.Q4_0.gguf 파일 경로를 설정한다.

n_gpu_layers = gpuLayerCount,

n_batch = batchSize,

n_ctx = 2048,

f16_kv = True, # 반드시 True로 설정해야 한다. 그렇지 않으면 몇 번의 호출 후에 문제가 발생하게 된다.

verbose = False

)

# 프롬프트 템플리트를 설정한다.

promptTemplate = hub.pull("rlm/rag-prompt")

# 문서 페이지 컨텐츠를 병합하는 함수를 정의한다.

def mergeDocumentPageContent(documentList):

return "\n\n".join(doc.page_content for doc in documentList)

# 실행 체인을 설정한다.

runnableSequence = (

RunnablePassthrough.assign(context=RunnablePick("context") | mergeDocumentPageContent)

| promptTemplate

| llamaCpp

| StrOutputParser()

)

# 질의 응답을 실행한다.

resultString = runnableSequence.invoke({"context": resultDocumentList, "question" : "Summarize the main themes in these retrieved docs"})

print(resultString)

"""

Summarizing the main themes in these retrieved documents reveals that task decomposition can be achieved through different methods like LLM prompting, using task-specific instructions, and human inputs. The challenges in long-term planning and task decomposition include the difficulty of adjusting plans when faced with unexpected errors. The agent breaks down large tasks into smaller subgoals, reflects on past actions, and refines future steps to improve final results.

"""

▶ requirements.txt


aiohttp==3.9.5
aiosignal==1.3.1
annotated-types==0.7.0
anyio==4.4.0
asgiref==3.8.1
async-timeout==4.0.3
attrs==23.2.0
backoff==2.2.1
bcrypt==4.1.3
beautifulsoup4==4.12.3
bs4==0.0.2
build==1.2.1
cachetools==5.3.3
certifi==2024.6.2
charset-normalizer==3.3.2
chroma-hnswlib==0.7.3
chromadb==0.5.0
click==8.1.7
coloredlogs==15.0.1
dataclasses-json==0.6.7
Deprecated==1.2.14
diskcache==5.6.3
dnspython==2.6.1
email_validator==2.1.1
exceptiongroup==1.2.1
fastapi==0.111.0
fastapi-cli==0.0.4
filelock==3.15.1
flatbuffers==24.3.25
frozenlist==1.4.1
fsspec==2024.6.0
google-auth==2.30.0
googleapis-common-protos==1.63.1
greenlet==3.0.3
grpcio==1.64.1
h11==0.14.0
httpcore==1.0.5
httptools==0.6.1
httpx==0.27.0
huggingface-hub==0.23.3
humanfriendly==10.0
idna==3.7
importlib_metadata==7.1.0
importlib_resources==6.4.0
Jinja2==3.1.4
joblib==1.4.2
jsonpatch==1.33
jsonpointer==3.0.0
kubernetes==30.1.0
langchain==0.2.4
langchain-chroma==0.1.1
langchain-community==0.2.4
langchain-core==0.2.6
langchain-huggingface==0.0.3
langchain-text-splitters==0.2.1
langsmith==0.1.77
llama_cpp_python==0.2.78
markdown-it-py==3.0.0
MarkupSafe==2.1.5
marshmallow==3.21.3
mdurl==0.1.2
mmh3==4.1.0
monotonic==1.6
mpmath==1.3.0
multidict==6.0.5
mypy-extensions==1.0.0
networkx==3.3
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==8.9.2.26
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.20.5
nvidia-nvjitlink-cu12==12.5.40
nvidia-nvtx-cu12==12.1.105
oauthlib==3.2.2
onnxruntime==1.18.0
opentelemetry-api==1.25.0
opentelemetry-exporter-otlp-proto-common==1.25.0
opentelemetry-exporter-otlp-proto-grpc==1.25.0
opentelemetry-instrumentation==0.46b0
opentelemetry-instrumentation-asgi==0.46b0
opentelemetry-instrumentation-fastapi==0.46b0
opentelemetry-proto==1.25.0
opentelemetry-sdk==1.25.0
opentelemetry-semantic-conventions==0.46b0
opentelemetry-util-http==0.46b0
orjson==3.10.5
overrides==7.7.0
packaging==24.1
pillow==10.3.0
posthog==3.5.0
protobuf==4.25.3
pyasn1==0.6.0
pyasn1_modules==0.4.0
pydantic==2.7.4
pydantic_core==2.18.4
Pygments==2.18.0
PyPika==0.48.9
pyproject_hooks==1.1.0
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
python-multipart==0.0.9
PyYAML==6.0.1
regex==2024.5.15
requests==2.32.3
requests-oauthlib==2.0.0
rich==13.7.1
rsa==4.9
safetensors==0.4.3
scikit-learn==1.5.0
scipy==1.13.1
sentence-transformers==3.0.1
shellingham==1.5.4
six==1.16.0
sniffio==1.3.1
soupsieve==2.5
SQLAlchemy==2.0.30
starlette==0.37.2
sympy==1.12.1
tenacity==8.3.0
threadpoolctl==3.5.0
tokenizers==0.19.1
tomli==2.0.1
torch==2.3.1
tqdm==4.66.4
transformers==4.41.2
triton==2.3.1
typer==0.12.3
typing-inspect==0.9.0
typing_extensions==4.12.2
ujson==5.10.0
urllib3==2.2.1
uvicorn==0.30.1
uvloop==0.19.0
watchfiles==0.22.0
websocket-client==1.8.0
websockets==12.0
wrapt==1.16.0
yarl==1.9.4
zipp==3.19.2

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

aiohttp==3.9.5

aiosignal==1.3.1

annotated-types==0.7.0

anyio==4.4.0

asgiref==3.8.1

async-timeout==4.0.3

attrs==23.2.0

backoff==2.2.1

bcrypt==4.1.3

beautifulsoup4==4.12.3

bs4==0.0.2

build==1.2.1

cachetools==5.3.3

certifi==2024.6.2

charset-normalizer==3.3.2

chroma-hnswlib==0.7.3

chromadb==0.5.0

click==8.1.7

coloredlogs==15.0.1

dataclasses-json==0.6.7

Deprecated==1.2.14

diskcache==5.6.3

dnspython==2.6.1

email_validator==2.1.1

exceptiongroup==1.2.1

fastapi==0.111.0

fastapi-cli==0.0.4

filelock==3.15.1

flatbuffers==24.3.25

frozenlist==1.4.1

fsspec==2024.6.0

google-auth==2.30.0

googleapis-common-protos==1.63.1

greenlet==3.0.3

grpcio==1.64.1

h11==0.14.0

httpcore==1.0.5

httptools==0.6.1

httpx==0.27.0

huggingface-hub==0.23.3

humanfriendly==10.0

idna==3.7

importlib_metadata==7.1.0

importlib_resources==6.4.0

Jinja2==3.1.4

joblib==1.4.2

jsonpatch==1.33

jsonpointer==3.0.0

kubernetes==30.1.0

langchain==0.2.4

langchain-chroma==0.1.1

langchain-community==0.2.4

langchain-core==0.2.6

langchain-huggingface==0.0.3

langchain-text-splitters==0.2.1

langsmith==0.1.77

llama_cpp_python==0.2.78

markdown-it-py==3.0.0

MarkupSafe==2.1.5

marshmallow==3.21.3

mdurl==0.1.2

mmh3==4.1.0

monotonic==1.6

mpmath==1.3.0

multidict==6.0.5

mypy-extensions==1.0.0

networkx==3.3

numpy==1.26.4

nvidia-cublas-cu12==12.1.3.1

nvidia-cuda-cupti-cu12==12.1.105

nvidia-cuda-nvrtc-cu12==12.1.105

nvidia-cuda-runtime-cu12==12.1.105

nvidia-cudnn-cu12==8.9.2.26

nvidia-cufft-cu12==11.0.2.54

nvidia-curand-cu12==10.3.2.106

nvidia-cusolver-cu12==11.4.5.107

nvidia-cusparse-cu12==12.1.0.106

nvidia-nccl-cu12==2.20.5

nvidia-nvjitlink-cu12==12.5.40

nvidia-nvtx-cu12==12.1.105

oauthlib==3.2.2

onnxruntime==1.18.0

opentelemetry-api==1.25.0

opentelemetry-exporter-otlp-proto-common==1.25.0

opentelemetry-exporter-otlp-proto-grpc==1.25.0

opentelemetry-instrumentation==0.46b0

opentelemetry-instrumentation-asgi==0.46b0

opentelemetry-instrumentation-fastapi==0.46b0

opentelemetry-proto==1.25.0

opentelemetry-sdk==1.25.0

opentelemetry-semantic-conventions==0.46b0

opentelemetry-util-http==0.46b0

orjson==3.10.5

overrides==7.7.0

packaging==24.1

pillow==10.3.0

posthog==3.5.0

protobuf==4.25.3

pyasn1==0.6.0

pyasn1_modules==0.4.0

pydantic==2.7.4

pydantic_core==2.18.4

Pygments==2.18.0

PyPika==0.48.9

pyproject_hooks==1.1.0

python-dateutil==2.9.0.post0

python-dotenv==1.0.1

python-multipart==0.0.9

PyYAML==6.0.1

regex==2024.5.15

requests==2.32.3

requests-oauthlib==2.0.0

rich==13.7.1

rsa==4.9

safetensors==0.4.3

scikit-learn==1.5.0

scipy==1.13.1

sentence-transformers==3.0.1

shellingham==1.5.4

six==1.16.0

sniffio==1.3.1

soupsieve==2.5

SQLAlchemy==2.0.30

starlette==0.37.2

sympy==1.12.1

tenacity==8.3.0

threadpoolctl==3.5.0

tokenizers==0.19.1

tomli==2.0.1

torch==2.3.1

tqdm==4.66.4

transformers==4.41.2

triton==2.3.1

typer==0.12.3

typing-inspect==0.9.0

typing_extensions==4.12.2

ujson==5.10.0

urllib3==2.2.1

uvicorn==0.30.1

uvloop==0.19.0

watchfiles==0.22.0

websocket-client==1.8.0

websockets==12.0

wrapt==1.16.0

yarl==1.9.4

zipp==3.19.2

※ pip install langchain langchain_chroma langchain-community langchainhub langchain-huggingface bs4 sentence-transformers llama-cpp-python 명령을 실행했다.