■ CompiledStateGraph 클래스의 astream 메소드를 사용해 병렬화를 통한 텍스트를 요약하는 방법을 보여준다.
※ OPENAI_API_KEY 환경 변수 값은 .env 파일에 정의한다.
▶ main.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import operator import asyncio from dotenv import load_dotenv from langchain_community.document_loaders import WebBaseLoader from langchain_text_splitters import CharacterTextSplitter from langchain_core.prompts import ChatPromptTemplate from langchain_openai import ChatOpenAI from langchain_core.output_parsers import StrOutputParser from typing import TypedDict from typing import List from typing import Annotated from langchain_core.documents import Document from langchain.chains.combine_documents.reduce import split_list_of_docs from langchain.chains.combine_documents.reduce import acollapse_docs from langgraph.constants import Send from typing import Literal from langgraph.graph import END from langgraph.graph import START from langgraph.graph import StateGraph load_dotenv() webBaseLoader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/") documentList = webBaseLoader.load() characterTextSplitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size = 1000, chunk_overlap = 0) splitDocumentList = characterTextSplitter.split_documents(documentList) mapChatPromptTemplate = ChatPromptTemplate.from_messages([("human", "Write a concise summary of the following :\\n\\n{context}")]) chatOpenAI = ChatOpenAI(model = "gpt-4o-mini") mapRunnableSequence = mapChatPromptTemplate | chatOpenAI | StrOutputParser() reduceChatPromptTemplateString = """ The following is a set of summaries : {docs} Take these and distill it into a final, consolidated summary of the main themes. """ reduceChatPromptTemplate = ChatPromptTemplate([("human", reduceChatPromptTemplateString)]) reduceRunnableSequence = reduceChatPromptTemplate | chatOpenAI | StrOutputParser() class SummaryState(TypedDict): content : str class OverallState(TypedDict): contentList : List[str] summaryList : Annotated[list, operator.add] collapsedSummaryDocumentList : List[Document] finalSummary : str async def generateSummary(summaryState : SummaryState): responseString = await mapRunnableSequence.ainvoke(summaryState["content"]) return {"summaryList" : [responseString]} def collectSummaryDocumentList(overallState : OverallState): return {"collapsedSummaryDocumentList" : [Document(summary) for summary in overallState["summaryList"]]} def getLength(documentList : List[Document]) -> int: return sum(chatOpenAI.get_num_tokens(document.page_content) for document in documentList) maximumTokenCount = 1000 async def collapseSummaryDocumentList(overallState : OverallState): documentListList = split_list_of_docs(overallState["collapsedSummaryDocumentList"], getLength, maximumTokenCount) resultDocumentList = [] for documentList in documentListList: resultDocumentList.append(await acollapse_docs(documentList, reduceRunnableSequence.ainvoke)) return {"collapsedSummaryDocumentList" : resultDocumentList} async def generateFinalSummary(overallState : OverallState): responseString = await reduceRunnableSequence.ainvoke(overallState["collapsedSummaryDocumentList"]) return {"finalSummary" : responseString} def mapSummaryList(overallState : OverallState): return [Send("generate_summary", {"content" : content}) for content in overallState["contentList"]] def shouldCollapse(overallState : OverallState) -> Literal["collapse_summary_document_list", "generate_final_summary"]: tokenCount = getLength(overallState["collapsedSummaryDocumentList"]) if tokenCount > maximumTokenCount: return "collapse_summary_document_list" else: return "generate_final_summary" stateGraph = StateGraph(OverallState) stateGraph.add_node("generate_summary" , generateSummary ) stateGraph.add_node("collect_summary_document_list" , collectSummaryDocumentList ) stateGraph.add_node("collapse_summary_document_list", collapseSummaryDocumentList) stateGraph.add_node("generate_final_summary" , generateFinalSummary ) stateGraph.add_conditional_edges(START, mapSummaryList, ["generate_summary"]) stateGraph.add_edge("generate_summary", "collect_summary_document_list") stateGraph.add_conditional_edges("collect_summary_document_list" , shouldCollapse) stateGraph.add_conditional_edges("collapse_summary_document_list", shouldCollapse) stateGraph.add_edge("generate_final_summary", END) compiledStateGraph = stateGraph.compile() async def main(): async for addableUpdatesDict in compiledStateGraph.astream({"contentList" : [document.page_content for document in splitDocumentList]}, {"recursion_limit" : 10}): if "generate_final_summary" in addableUpdatesDict: print(addableUpdatesDict) asyncio.run(main()) """ {'generate_final_summary': {'finalSummary': "The consolidated summary of the main themes from the documents highlights advancements in artificial intelligence, particularly focusing on large language models (LLMs) and their integration into autonomous systems and software architecture development. Key themes include:\n\n1. **Integration and Application of LLMs**: LLMs are increasingly utilized in autonomous agents, enhancing their problem-solving abilities through sophisticated planning techniques (e.g., Chain of Thought, Tree of Thoughts) and classical methods (e.g., Planning Domain Definition Language). They are also applied in game development, emphasizing structured methodologies for task management and software architecture.\n\n2. **Enhanced Decision-Making and Self-Reflection**: The incorporation of self-reflective mechanisms enables agents to learn from past actions, using frameworks like ReAct and Reflexion to refine decision-making processes through structured cycles of thought and observation.\n\n3. **Memory Management**: Various memory types (sensory, short-term, long-term) are employed to improve decision-making and information retrieval, with techniques such as Algorithm Distillation and Maximum Inner Product Search enhancing functionality.\n\n4. **External Tool Integration**: Agents' ability to interact with external APIs and resources through frameworks like MRKL, TALM, and Toolformer expands their capabilities, allowing for effective task planning and execution beyond pre-trained knowledge.\n\n5. **Algorithmic Innovations**: The exploration of approximate nearest neighbor search algorithms, such as Locality-Sensitive Hashing and FAISS, showcases their importance in efficiently managing high-dimensional data.\n\n6. **Challenges and Limitations**: Despite the advancements, LLMs face challenges, including accuracy issues in technical applications, context length constraints, and difficulties with long-term planning, raising concerns about reliability and adaptability.\n\n7. **Generative Agents**: The emergence of generative agents, powered by LLMs, highlights their ability to simulate human-like interactions and exhibit social behaviors in controlled environments.\n\nOverall, the themes underscore the potential of LLMs in enhancing autonomous systems and software development while acknowledging the complexities and challenges inherent in their application across various domains."}} """ |
▶ requirements.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
aiohappyeyeballs==2.4.4 aiohttp==3.11.11 aiosignal==1.3.2 annotated-types==0.7.0 anyio==4.7.0 attrs==24.3.0 beautifulsoup4==4.12.3 bs4==0.0.2 certifi==2024.12.14 charset-normalizer==3.4.0 colorama==0.4.6 dataclasses-json==0.6.7 distro==1.9.0 frozenlist==1.5.0 greenlet==3.1.1 h11==0.14.0 httpcore==1.0.7 httpx==0.28.1 httpx-sse==0.4.0 idna==3.10 jiter==0.8.2 jsonpatch==1.33 jsonpointer==3.0.0 langchain==0.3.13 langchain-community==0.3.13 langchain-core==0.3.28 langchain-openai==0.2.14 langchain-text-splitters==0.3.4 langgraph==0.2.60 langgraph-checkpoint==2.0.9 langgraph-sdk==0.1.48 langsmith==0.2.4 marshmallow==3.23.2 msgpack==1.1.0 multidict==6.1.0 mypy-extensions==1.0.0 numpy==2.2.1 openai==1.58.1 orjson==3.10.12 packaging==24.2 propcache==0.2.1 pydantic==2.10.4 pydantic-settings==2.7.0 pydantic_core==2.27.2 python-dotenv==1.0.1 PyYAML==6.0.2 regex==2024.11.6 requests==2.32.3 requests-toolbelt==1.0.0 sniffio==1.3.1 soupsieve==2.6 SQLAlchemy==2.0.36 tenacity==9.0.0 tiktoken==0.8.0 tqdm==4.67.1 typing-inspect==0.9.0 typing_extensions==4.12.2 urllib3==2.3.0 yarl==1.18.3 |
※ pip install python-dotenv langchain langchain-community langchain-openai langgraph bs4 명령을 실행했다.