[PYTHON/LANGCHAIN] trim_messages 함수 : token_counter 인자를 사용해 커스텀 카운터 만들기

■ trim_messages 함수의 token_counter 인자를 사용해 커스텀 카운터를 만드는 방법을 보여준다.

▶ main.py


import tiktoken

from langchain_core.messages import SystemMessage
from langchain_core.messages import HumanMessage
from langchain_core.messages import AIMessage
from typing                  import List
from langchain_core.messages import BaseMessage
from langchain_core.messages import ToolMessage
from langchain_core.messages import trim_messages

messageList = [
    SystemMessage("you're a good assistant, you always respond with a joke."),
    HumanMessage("i wonder why it's called langchain"),
    AIMessage('Well, I guess they thought "WordRope" and "SentenceString" just didn\'t have the same ring to it!'),
    HumanMessage("and who is harrison chasing anyways"),
    AIMessage("Hmmm let me think.\n\nWhy, he's probably chasing after the last cup of coffee in the office!"),
    HumanMessage("what do you call a speechless parrot")
]

def getStringTokenCount(text : str) -> int:
    encoding = tiktoken.get_encoding("o200k_base")
    return len(encoding.encode(text))

def tiktokenCounter(messageList : List[BaseMessage]) -> int:
    tokenCount           = 3  # 모든 답변은 <|start|>assistant<|message|>로 시작된다.
    tokenCountPerMessage = 3
    tokenCountPerName    = 1
    for message in messageList:
        if isinstance(message, HumanMessage):
            role = "user"
        elif isinstance(message, AIMessage):
            role = "assistant"
        elif isinstance(message, ToolMessage):
            role = "tool"
        elif isinstance(message, SystemMessage):
            role = "system"
        else:
            raise ValueError(f"Unsupported messages type {message.__class__}")
        tokenCount += (tokenCountPerMessage + getStringTokenCount(role) + getStringTokenCount(message.content))
        if message.name:
            tokenCount += tokenCountPerName + getStringTokenCount(message.name)
    return tokenCount

trimMessageList = trim_messages(
    messageList,
    max_tokens    = 45,
    strategy      = "last",
    token_counter = tiktokenCounter
)

print(trimMessageList)

"""
[
    AIMessage(
        content           = "Hmmm let me think.\n\nWhy, he's probably chasing after the last cup of coffee in the office!",
        additional_kwargs = {},
        response_metadata = {}
    ),
    HumanMessage(
        content           = 'what do you call a speechless parrot',
        additional_kwargs = {},
        response_metadata = {}
    )
]
"""

import tiktoken

from langchain_core.messages import SystemMessage

from langchain_core.messages import HumanMessage

from langchain_core.messages import AIMessage

from typing import List

from langchain_core.messages import BaseMessage

from langchain_core.messages import ToolMessage

from langchain_core.messages import trim_messages

messageList = [

SystemMessage("you're a good assistant, you always respond with a joke."),

HumanMessage("i wonder why it's called langchain"),

AIMessage('Well, I guess they thought "WordRope" and "SentenceString" just didn\'t have the same ring to it!'),

HumanMessage("and who is harrison chasing anyways"),

AIMessage("Hmmm let me think.\n\nWhy, he's probably chasing after the last cup of coffee in the office!"),

HumanMessage("what do you call a speechless parrot")

]

def getStringTokenCount(text : str) -> int:

encoding = tiktoken.get_encoding("o200k_base")

return len(encoding.encode(text))

def tiktokenCounter(messageList : List[BaseMessage]) -> int:

tokenCount = 3 # 모든 답변은 <|start|>assistant<|message|>로 시작된다.

tokenCountPerMessage = 3

tokenCountPerName = 1

for message in messageList:

if isinstance(message, HumanMessage):

role = "user"

elif isinstance(message, AIMessage):

role = "assistant"

elif isinstance(message, ToolMessage):

role = "tool"

elif isinstance(message, SystemMessage):

role = "system"

else:

raise ValueError(f"Unsupported messages type {message.__class__}")

tokenCount += (tokenCountPerMessage + getStringTokenCount(role) + getStringTokenCount(message.content))

if message.name:

tokenCount += tokenCountPerName + getStringTokenCount(message.name)

return tokenCount

trimMessageList = trim_messages(

messageList,

max_tokens = 45,

strategy = "last",

token_counter = tiktokenCounter

)

print(trimMessageList)

"""

[

AIMessage(

content = "Hmmm let me think.\n\nWhy, he's probably chasing after the last cup of coffee in the office!",

additional_kwargs = {},

response_metadata = {}

HumanMessage(

content = 'what do you call a speechless parrot',

additional_kwargs = {},

response_metadata = {}

)

]

"""

▶ requirements.txt


aiohappyeyeballs==2.4.4
aiohttp==3.11.10
aiosignal==1.3.2
annotated-types==0.7.0
anyio==4.7.0
attrs==24.3.0
certifi==2024.12.14
charset-normalizer==3.4.0
frozenlist==1.5.0
greenlet==3.1.1
h11==0.14.0
httpcore==1.0.7
httpx==0.28.1
idna==3.10
jsonpatch==1.33
jsonpointer==3.0.0
langchain==0.3.12
langchain-core==0.3.25
langchain-text-splitters==0.3.3
langsmith==0.2.3
multidict==6.1.0
numpy==2.2.0
orjson==3.10.12
packaging==24.2
propcache==0.2.1
pydantic==2.10.3
pydantic_core==2.27.1
PyYAML==6.0.2
regex==2024.11.6
requests==2.32.3
requests-toolbelt==1.0.0
sniffio==1.3.1
SQLAlchemy==2.0.36
tenacity==9.0.0
tiktoken==0.8.0
typing_extensions==4.12.2
urllib3==2.2.3
yarl==1.18.3

aiohappyeyeballs==2.4.4

aiohttp==3.11.10

aiosignal==1.3.2

annotated-types==0.7.0

anyio==4.7.0

attrs==24.3.0

certifi==2024.12.14

charset-normalizer==3.4.0

frozenlist==1.5.0

greenlet==3.1.1

h11==0.14.0

httpcore==1.0.7

httpx==0.28.1

idna==3.10

jsonpatch==1.33

jsonpointer==3.0.0

langchain==0.3.12

langchain-core==0.3.25

langchain-text-splitters==0.3.3

langsmith==0.2.3

multidict==6.1.0

numpy==2.2.0

orjson==3.10.12

packaging==24.2

propcache==0.2.1

pydantic==2.10.3

pydantic_core==2.27.1

PyYAML==6.0.2

regex==2024.11.6

requests==2.32.3

requests-toolbelt==1.0.0

sniffio==1.3.1

SQLAlchemy==2.0.36

tenacity==9.0.0

tiktoken==0.8.0

typing_extensions==4.12.2

urllib3==2.2.3

yarl==1.18.3

※ pip install langchain tiktoken 명령을 사용했다.