[PYTHON/LANGCHAIN] RecursiveCharacterTextSplitter 클래스 : create_documents 메소드를 사용해 C# 소스 코드 문자열에서 문서 리스트 구하기

■ RecursiveCharacterTextSplitter 클래스를 사용해 create_documents 메소드를 사용해 C# 소스 코드 문자열에서 문서 리스트를 구하는 방법을 보여준다.

▶ main.py


from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import Language

codeString = """
using System;
class Program
{
    static void Main()
    {
        int age = 30; // Change the age value as needed

        // Categorize the age without any console output
        if (age < 18)
        {
            // Age is under 18
        }
        else if (age >= 18 && age < 65)
        {
            // Age is an adult
        }
        else
        {
            // Age is a senior citizen
        }
    }
}
"""

recursiveCharacterTextSplitter = RecursiveCharacterTextSplitter.from_language(language = Language.CSHARP, chunk_size = 128, chunk_overlap = 0)

documentList = recursiveCharacterTextSplitter.create_documents([codeString])

print(documentList)

"""
[Document(page_content='using System;'), Document(page_content='class Program\n{\n    static void Main()\n    {\n        int age = 30; // Change the age value as needed'), Document(page_content='// Categorize the age without any console output\n        if (age < 18)\n        {\n            // Age is under 18'), Document(page_content='}\n        else if (age >= 18 && age < 65)\n        {\n            // Age is an adult\n        }\n        else\n        {'), Document(page_content='// Age is a senior citizen\n        }\n    }\n}')]
"""

from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.text_splitter import Language

codeString = """

using System;

class Program

{

static void Main()

{

int age = 30; // Change the age value as needed

// Categorize the age without any console output

if (age < 18)

{

// Age is under 18

}

else if (age >= 18 && age < 65)

{

// Age is an adult

}

else

{

// Age is a senior citizen

}

"""

recursiveCharacterTextSplitter = RecursiveCharacterTextSplitter.from_language(language = Language.CSHARP, chunk_size = 128, chunk_overlap = 0)

documentList = recursiveCharacterTextSplitter.create_documents([codeString])

print(documentList)

"""

[Document(page_content='using System;'), Document(page_content='class Program\n{\n static void Main()\n {\n int age = 30; // Change the age value as needed'), Document(page_content='// Categorize the age without any console output\n if (age < 18)\n {\n // Age is under 18'), Document(page_content='}\n else if (age >= 18 && age < 65)\n {\n // Age is an adult\n }\n else\n {'), Document(page_content='// Age is a senior citizen\n }\n }\n}')]

"""

▶ requirements.txt


aiohttp==3.9.5
aiosignal==1.3.1
annotated-types==0.7.0
async-timeout==4.0.3
attrs==23.2.0
certifi==2024.6.2
charset-normalizer==3.3.2
frozenlist==1.4.1
greenlet==3.0.3
idna==3.7
jsonpatch==1.33
jsonpointer==3.0.0
langchain==0.2.6
langchain-core==0.2.10
langchain-text-splitters==0.2.2
langsmith==0.1.82
multidict==6.0.5
numpy==1.26.4
orjson==3.10.5
packaging==24.1
pydantic==2.7.4
pydantic_core==2.18.4
PyYAML==6.0.1
requests==2.32.3
SQLAlchemy==2.0.31
tenacity==8.4.2
typing_extensions==4.12.2
urllib3==2.2.2
yarl==1.9.4

aiohttp==3.9.5

aiosignal==1.3.1

annotated-types==0.7.0

async-timeout==4.0.3

attrs==23.2.0

certifi==2024.6.2

charset-normalizer==3.3.2

frozenlist==1.4.1

greenlet==3.0.3

idna==3.7

jsonpatch==1.33

jsonpointer==3.0.0

langchain==0.2.6

langchain-core==0.2.10

langchain-text-splitters==0.2.2

langsmith==0.1.82

multidict==6.0.5

numpy==1.26.4

orjson==3.10.5

packaging==24.1

pydantic==2.7.4

pydantic_core==2.18.4

PyYAML==6.0.1

requests==2.32.3

SQLAlchemy==2.0.31

tenacity==8.4.2

typing_extensions==4.12.2

urllib3==2.2.2

yarl==1.9.4

※ pip install langchain langchain-text-splitters 명령을 실행했다.