[PYTHON/LANGCHAIN] 모델 출력 구문 분석 일반 함수를 사용해 LCEL(LangChain Expression Language)로 커스텀 파서 만들기

■ 모델 출력을 구문 분석하는 일반 함수를 사용해 LCEL(LangChain Expression Language)로 커스텀 파서를 만드는 방법을 보여준다.

▶ main.py


import json
import os
import re

from typing import List

from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.messages    import AIMessage
from langchain_core.prompts     import ChatPromptTemplate
from langchain_openai           import ChatOpenAI

os.environ["OPENAI_API_KEY"] = "<OPENAI_API_KEY>"

class Person(BaseModel):
    """Information about a person."""

    name              : str   = Field(..., description = "The name of the person"                       )
    height_in_meters  : float = Field(..., description = "The height of the person expressed in meters.")

class People(BaseModel):
    """Identifying information about all people in a text."""

    people : List[Person]

chatPromptTemplate = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user query. Output your answer as JSON that  "
            "matches the given schema: ```json\n{schema}\n```. "
            "Make sure to wrap the answer in ```json and ``` tags"
        ),
        ("human", "{query}")
    ]
).partial(schema = People.schema())

def extractJSON(message : AIMessage) -> List[dict]:
    """Extracts JSON content from a string where JSON is embedded between ```json and ``` tags.

    Parameters:
        text (str): The text containing the JSON content.

    Returns:
        list: A list of extracted JSON strings.
    """
    text = message.content

    # JSON 블록과 일치하도록 정규식 패턴 정의한다.
    pattern = r"```json(.*?)```"

    # 문자열에서 패턴과 겹치지 않는 일치 항목을 모두 찾는다.
    matcheList = re.findall(pattern, text, re.DOTALL)

    print(type(matcheList))

    # 선행 또는 후행 공백을 제거하여 일치하는 JSON 문자열 목록을 반환한다.
    try:
        return [json.loads(match.strip()) for match in matcheList]
    except Exception:
        raise ValueError(f"Failed to parse : {message}")

chatOpenAI = ChatOpenAI(model = "gpt-3.5-turbo-0125")

runnableSequence = chatPromptTemplate | chatOpenAI | extractJSON

resultList = runnableSequence.invoke({"query" : "Anna is 23 years old and she is 6 feet tall"})

print(resultList)

"""
[{'title': 'People', 'description': 'Identifying information about all people in a text.', 'type': 'object', 'properties': {'people': {'title': 'People', 'type': 'array', 'items': {'$ref': '#/definitions/Person'}}}, 'required': ['people'], 'definitions': {'Person': {'title': 'Person', 'description': 'Information about a person.', 'type': 'object', 'properties': {'name': {'title': 'Name', 'description': 'The name of the person', 'type': 'string'}, 'height_in_meters': {'title': 'Height In Meters', 'description': 'The height of the person expressed in meters.', 'type': 'number'}}, 'required': ['name', 'height_in_meters']}}}]
"""

import json

import os

import re

from typing import List

from langchain_core.pydantic_v1 import BaseModel, Field

from langchain_core.messages import AIMessage

from langchain_core.prompts import ChatPromptTemplate

from langchain_openai import ChatOpenAI

os.environ["OPENAI_API_KEY"] = "<OPENAI_API_KEY>"

class Person(BaseModel):

"""Information about a person."""

name : str = Field(..., description = "The name of the person" )

height_in_meters : float = Field(..., description = "The height of the person expressed in meters.")

class People(BaseModel):

"""Identifying information about all people in a text."""

people : List[Person]

chatPromptTemplate = ChatPromptTemplate.from_messages(

[

(

"system",

"Answer the user query. Output your answer as JSON that "

"matches the given schema: ```json\n{schema}\n```. "

"Make sure to wrap the answer in ```json and ``` tags"

("human", "{query}")

]

).partial(schema = People.schema())

def extractJSON(message : AIMessage) -> List[dict]:

"""Extracts JSON content from a string where JSON is embedded between ```json and ``` tags.

Parameters:

text (str): The text containing the JSON content.

Returns:

list: A list of extracted JSON strings.

"""

text = message.content

# JSON 블록과 일치하도록 정규식 패턴 정의한다.

pattern = r"```json(.*?)```"

# 문자열에서 패턴과 겹치지 않는 일치 항목을 모두 찾는다.

matcheList = re.findall(pattern, text, re.DOTALL)

print(type(matcheList))

# 선행 또는 후행 공백을 제거하여 일치하는 JSON 문자열 목록을 반환한다.

try:

return [json.loads(match.strip()) for match in matcheList]

except Exception:

raise ValueError(f"Failed to parse : {message}")

chatOpenAI = ChatOpenAI(model = "gpt-3.5-turbo-0125")

runnableSequence = chatPromptTemplate | chatOpenAI | extractJSON

resultList = runnableSequence.invoke({"query" : "Anna is 23 years old and she is 6 feet tall"})

print(resultList)

"""

[{'title': 'People', 'description': 'Identifying information about all people in a text.', 'type': 'object', 'properties': {'people': {'title': 'People', 'type': 'array', 'items': {'$ref': '#/definitions/Person'}}}, 'required': ['people'], 'definitions': {'Person': {'title': 'Person', 'description': 'Information about a person.', 'type': 'object', 'properties': {'name': {'title': 'Name', 'description': 'The name of the person', 'type': 'string'}, 'height_in_meters': {'title': 'Height In Meters', 'description': 'The height of the person expressed in meters.', 'type': 'number'}}, 'required': ['name', 'height_in_meters']}}}]

"""

▶ requirements.txt


annotated-types==0.7.0
anyio==4.4.0
certifi==2024.6.2
charset-normalizer==3.3.2
distro==1.9.0
exceptiongroup==1.2.1
h11==0.14.0
httpcore==1.0.5
httpx==0.27.0
idna==3.7
jsonpatch==1.33
jsonpointer==3.0.0
langchain-core==0.2.7
langchain-openai==0.1.8
langsmith==0.1.77
openai==1.34.0
orjson==3.10.5
packaging==24.1
pydantic==2.7.4
pydantic_core==2.18.4
PyYAML==6.0.1
regex==2024.5.15
requests==2.32.3
sniffio==1.3.1
tenacity==8.3.0
tiktoken==0.7.0
tqdm==4.66.4
typing_extensions==4.12.2
urllib3==2.2.1

annotated-types==0.7.0

anyio==4.4.0

certifi==2024.6.2

charset-normalizer==3.3.2

distro==1.9.0

exceptiongroup==1.2.1

h11==0.14.0

httpcore==1.0.5

httpx==0.27.0

idna==3.7

jsonpatch==1.33

jsonpointer==3.0.0

langchain-core==0.2.7

langchain-openai==0.1.8

langsmith==0.1.77

openai==1.34.0

orjson==3.10.5

packaging==24.1

pydantic==2.7.4

pydantic_core==2.18.4

PyYAML==6.0.1

regex==2024.5.15

requests==2.32.3

sniffio==1.3.1

tenacity==8.3.0

tiktoken==0.7.0

tqdm==4.66.4

typing_extensions==4.12.2

urllib3==2.2.1

※ pip install langchain-openai 명령을 실행했다.