■ trim_messages 함수의 token_counter 인자를 사용해 커스텀 카운터를 만드는 방법을 보여준다.
▶ main.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import tiktoken from langchain_core.messages import SystemMessage from langchain_core.messages import HumanMessage from langchain_core.messages import AIMessage from typing import List from langchain_core.messages import BaseMessage from langchain_core.messages import ToolMessage from langchain_core.messages import trim_messages messageList = [ SystemMessage("you're a good assistant, you always respond with a joke."), HumanMessage("i wonder why it's called langchain"), AIMessage('Well, I guess they thought "WordRope" and "SentenceString" just didn\'t have the same ring to it!'), HumanMessage("and who is harrison chasing anyways"), AIMessage("Hmmm let me think.\n\nWhy, he's probably chasing after the last cup of coffee in the office!"), HumanMessage("what do you call a speechless parrot") ] def getStringTokenCount(text : str) -> int: encoding = tiktoken.get_encoding("o200k_base") return len(encoding.encode(text)) def tiktokenCounter(messageList : List[BaseMessage]) -> int: tokenCount = 3 # 모든 답변은 <|start|>assistant<|message|>로 시작된다. tokenCountPerMessage = 3 tokenCountPerName = 1 for message in messageList: if isinstance(message, HumanMessage): role = "user" elif isinstance(message, AIMessage): role = "assistant" elif isinstance(message, ToolMessage): role = "tool" elif isinstance(message, SystemMessage): role = "system" else: raise ValueError(f"Unsupported messages type {message.__class__}") tokenCount += (tokenCountPerMessage + getStringTokenCount(role) + getStringTokenCount(message.content)) if message.name: tokenCount += tokenCountPerName + getStringTokenCount(message.name) return tokenCount trimMessageList = trim_messages( messageList, max_tokens = 45, strategy = "last", token_counter = tiktokenCounter ) print(trimMessageList) """ [ AIMessage( content = "Hmmm let me think.\n\nWhy, he's probably chasing after the last cup of coffee in the office!", additional_kwargs = {}, response_metadata = {} ), HumanMessage( content = 'what do you call a speechless parrot', additional_kwargs = {}, response_metadata = {} ) ] """ |
▶ requirements.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
aiohappyeyeballs==2.4.4 aiohttp==3.11.10 aiosignal==1.3.2 annotated-types==0.7.0 anyio==4.7.0 attrs==24.3.0 certifi==2024.12.14 charset-normalizer==3.4.0 frozenlist==1.5.0 greenlet==3.1.1 h11==0.14.0 httpcore==1.0.7 httpx==0.28.1 idna==3.10 jsonpatch==1.33 jsonpointer==3.0.0 langchain==0.3.12 langchain-core==0.3.25 langchain-text-splitters==0.3.3 langsmith==0.2.3 multidict==6.1.0 numpy==2.2.0 orjson==3.10.12 packaging==24.2 propcache==0.2.1 pydantic==2.10.3 pydantic_core==2.27.1 PyYAML==6.0.2 regex==2024.11.6 requests==2.32.3 requests-toolbelt==1.0.0 sniffio==1.3.1 SQLAlchemy==2.0.36 tenacity==9.0.0 tiktoken==0.8.0 typing_extensions==4.12.2 urllib3==2.2.3 yarl==1.18.3 |
※ pip install langchain tiktoken 명령을 사용했다.