■ 모델 출력을 구문 분석하는 일반 함수를 사용해 LCEL(LangChain Expression Language)로 커스텀 파서를 만드는 방법을 보여준다.
▶ main.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import json import os import re from typing import List from langchain_core.pydantic_v1 import BaseModel, Field from langchain_core.messages import AIMessage from langchain_core.prompts import ChatPromptTemplate from langchain_openai import ChatOpenAI os.environ["OPENAI_API_KEY"] = "<OPENAI_API_KEY>" class Person(BaseModel): """Information about a person.""" name : str = Field(..., description = "The name of the person" ) height_in_meters : float = Field(..., description = "The height of the person expressed in meters.") class People(BaseModel): """Identifying information about all people in a text.""" people : List[Person] chatPromptTemplate = ChatPromptTemplate.from_messages( [ ( "system", "Answer the user query. Output your answer as JSON that " "matches the given schema: ```json\n{schema}\n```. " "Make sure to wrap the answer in ```json and ``` tags" ), ("human", "{query}") ] ).partial(schema = People.schema()) def extractJSON(message : AIMessage) -> List[dict]: """Extracts JSON content from a string where JSON is embedded between ```json and ``` tags. Parameters: text (str): The text containing the JSON content. Returns: list: A list of extracted JSON strings. """ text = message.content # JSON 블록과 일치하도록 정규식 패턴 정의한다. pattern = r"```json(.*?)```" # 문자열에서 패턴과 겹치지 않는 일치 항목을 모두 찾는다. matcheList = re.findall(pattern, text, re.DOTALL) print(type(matcheList)) # 선행 또는 후행 공백을 제거하여 일치하는 JSON 문자열 목록을 반환한다. try: return [json.loads(match.strip()) for match in matcheList] except Exception: raise ValueError(f"Failed to parse : {message}") chatOpenAI = ChatOpenAI(model = "gpt-3.5-turbo-0125") runnableSequence = chatPromptTemplate | chatOpenAI | extractJSON resultList = runnableSequence.invoke({"query" : "Anna is 23 years old and she is 6 feet tall"}) print(resultList) """ [{'title': 'People', 'description': 'Identifying information about all people in a text.', 'type': 'object', 'properties': {'people': {'title': 'People', 'type': 'array', 'items': {'$ref': '#/definitions/Person'}}}, 'required': ['people'], 'definitions': {'Person': {'title': 'Person', 'description': 'Information about a person.', 'type': 'object', 'properties': {'name': {'title': 'Name', 'description': 'The name of the person', 'type': 'string'}, 'height_in_meters': {'title': 'Height In Meters', 'description': 'The height of the person expressed in meters.', 'type': 'number'}}, 'required': ['name', 'height_in_meters']}}}] """ |
▶ requirements.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
annotated-types==0.7.0 anyio==4.4.0 certifi==2024.6.2 charset-normalizer==3.3.2 distro==1.9.0 exceptiongroup==1.2.1 h11==0.14.0 httpcore==1.0.5 httpx==0.27.0 idna==3.7 jsonpatch==1.33 jsonpointer==3.0.0 langchain-core==0.2.7 langchain-openai==0.1.8 langsmith==0.1.77 openai==1.34.0 orjson==3.10.5 packaging==24.1 pydantic==2.7.4 pydantic_core==2.18.4 PyYAML==6.0.1 regex==2024.5.15 requests==2.32.3 sniffio==1.3.1 tenacity==8.3.0 tiktoken==0.7.0 tqdm==4.66.4 typing_extensions==4.12.2 urllib3==2.2.1 |
※ pip install langchain-openai 명령을 실행했다.