■ 라마 3.1 한국어 모델과 채팅하는 서버/클라이언트 프로그램을 만드는 방법을 보여준다.
※ Ollama 설치/실행과 llama3.1:8b-instruct-q2_K 모델이 다운로드 되어 있어야 한다.
▶ server.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import json import uvicorn from langchain_community.chat_models import ChatOllama from langchain_core.prompts import ChatPromptTemplate from langchain_core.prompts import MessagesPlaceholder from typing import List from typing import Dict from langchain_core.messages import AIMessage from langchain_core.messages import HumanMessage from pydantic import BaseModel from fastapi import FastAPI from fastapi.responses import StreamingResponse class ChatBotServer: def __init__(self, modelName): self.llm = ChatOllama( base_url = "http://localhost:11434", model = modelName, temperature = 0.7, # 생성 텍스트의 무작위성 정도 (0.0 ~ 1.0) top_p = 0.9, # 상위 확률 임계값 num_ctx = 4096, # 컨텍스트 길이 repeat_penalty = 1.1, # 반복 패널티 stop = ["Human:", "AI:"], # 생성 중지 토큰 seed = 42, # 난수 생성 시드 verbose = False ) self.prompt = ChatPromptTemplate.from_messages( [ ("system", "{system_message}"), MessagesPlaceholder(variable_name = "chat_history"), ("human", "{human_message}") ] ) def _parse_messages(self, messageList : List[Dict[str, str]]): systemMessageString = "" chatHistoryList = [] humanMessageString = "" for message in messageList: if message["role"] == "system": systemMessageString = message["content"] elif message["role"] == "user": humanMessageString = message["content"] elif message["role"] == "assistant": chatHistoryList.append(AIMessage(content = message["content"])) if message["role"] == "user" and not humanMessageString: chatHistoryList.append(HumanMessage(content = message["content"])) return systemMessageString, chatHistoryList, humanMessageString async def chat(self, messageList : List[Dict[str, str]]): systemMessageString, chatHistoryList, humanMessageString = self._parse_messages(messageList) runnableSequence = self.prompt | self.llm inputDictionary = { "system_message" : systemMessageString, "chat_history" : chatHistoryList, "human_message" : humanMessageString } async for aiChunkMessage in runnableSequence.astream(inputDictionary): yield aiChunkMessage class ChatRequest(BaseModel): messages : List[Dict[str, str]] chatbotServer = ChatBotServer("llama3.1:8b-instruct-q2_K") fastAPI = FastAPI() @fastAPI.post("/chat") async def processChat(chatRequest : ChatRequest): async def generate(): async for aiChunkMessage in chatbotServer.chat(chatRequest.messages): yield f"data: {json.dumps(aiChunkMessage.dict())}\n\n" yield "data: [DONE]\n\n" return StreamingResponse(generate(), media_type = "text/event-stream") if __name__ == "__main__": uvicorn.run(fastAPI, host = "0.0.0.0", port = 8000) |
▶ client.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import requests import sseclient import json class ChatBotClient: def __init__(self, serverURL): self.serverURL = serverURL self.headerDictionary = {"Accept" : "text/event-stream"} def chat(self, messageList, streamCallback = None): response = requests.post(self.serverURL, json = {"messages" : messageList}, headers = self.headerDictionary, stream = True) if response.status_code == 200: sseClient = sseclient.SSEClient(response) fullResponse = "" for event in sseClient.events(): if event.data == "[DONE]": break try: data = json.loads(event.data) content = data.get("content", "") if streamCallback: streamCallback(content) else: print(content, end = "", flush = True) fullResponse += content except json.JSONDecodeError: print(f"Error decoding JSON : {event.data}") print() return fullResponse else: print(f"Error : {response.status_code}") print(response.text) return None def printStream(content): print(content, end = "", flush = True) if __name__ == "__main__": chatBotClient = ChatBotClient("http://localhost:8000/chat") messageList = [ { "role" : "system", "content" : "당신은 Python 전문가 입니다. 설명시 이모지 문자는 출력하지 않습니다." }, { "role" : "user" , "content" : "python으로 현재 시간에 대해 시침과 분침의 각도를 계산하고 출력하는 코드를 작성해주세요."} ] print("Sending request to the chatbot...") responseString = chatBotClient.chat(messageList, streamCallback = printStream) if responseString: print("\nFull response received from the chatbot :") print(responseString) |
▶ requirements.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
aiohappyeyeballs==2.4.0 aiohttp==3.10.5 aiosignal==1.3.1 annotated-types==0.7.0 anyio==4.4.0 attrs==24.2.0 certifi==2024.8.30 charset-normalizer==3.3.2 click==8.1.7 colorama==0.4.6 dataclasses-json==0.6.7 fastapi==0.112.2 frozenlist==1.4.1 greenlet==3.0.3 h11==0.14.0 httpcore==1.0.5 httpx==0.27.2 idna==3.8 jsonpatch==1.33 jsonpointer==3.0.0 langchain==0.2.16 langchain-community==0.2.16 langchain-core==0.2.38 langchain-text-splitters==0.2.4 langsmith==0.1.114 marshmallow==3.22.0 multidict==6.0.5 mypy-extensions==1.0.0 numpy==1.26.4 orjson==3.10.7 packaging==24.1 pydantic==2.8.2 pydantic_core==2.20.1 PyYAML==6.0.2 requests==2.32.3 sniffio==1.3.1 SQLAlchemy==2.0.34 sseclient-py==1.8.0 starlette==0.38.4 tenacity==8.5.0 typing-inspect==0.9.0 typing_extensions==4.12.2 urllib3==2.2.2 uvicorn==0.30.6 yarl==1.9.11 |
※ pip install langchain-community fastapi uvicorn sseclient-py 명령을 실행했다.