[PYTHON/LANGCHAIN] ChatOllama 클래스 : 이미지에서 추출한 텍스트를 이미지와 함꼐 표시하기 (llava:latest 모델)

■ ChatOllama 클래스를 사용해 이미지에서 추출한 텍스트를 이미지와 함꼐 표시하는 방법을 보여준다. (llava:latest 모델)

▶ main.py


import base64
import os
import matplotlib.pyplot as plt

from datetime                  import datetime
from langchain.schema.messages import HumanMessage
from langchain_ollama          import ChatOllama
from PIL                       import Image

def printMessage(message, *argumentTuple):
    timeStamp = datetime.now().strftime("[%H:%M:%S]")
    if argumentTuple:
        finalMessage = message % argumentTuple
    else:
        finalMessage = message
    print(f"{timeStamp} {finalMessage}")

def getBASE64StringFromFile(filePath):
    with open(filePath, "rb") as bufferedReader:
        imageBytes   = bufferedReader.read()
        base64Bytes  = base64.b64encode(imageBytes)
        base64String = base64Bytes.decode("utf-8")
        return base64String

def getImageDescription(chatOllama, imageFilePath):
    imageBASE64String = getBASE64StringFromFile(imageFilePath)
    humanMessage      = HumanMessage(
        content = [
            {"type" : "text"     , "text"      : "Please describe the image in detail."                 },
            {"type" : "image_url", "image_url" : {"url" : f"data:image/jpeg;base64,{imageBASE64String}"}}
        ]
    )
    responseAIMessage = chatOllama.invoke([humanMessage])
    return responseAIMessage.content

printMessage("START GET IMAGE FILE PATH LIST")
imageFilePathList = sorted([os.path.join("temp", imageFileName) for imageFileName in os.listdir("temp") if imageFileName.endswith(".jpg")])
printMessage("END GET IMAGE FILE PATH LIST")
print()

printMessage("START CREATE CHATOLLAMA")
chatOllama = ChatOllama(model = "llava:latest", temperature = 0)
printMessage("END CREATE CHATOLLAMA")
print()

printMessage("START GET IMAGE DESCRIPTION")
print("-" * 50)
imageDescriptionDictionary = dict()
for imageFilePath in imageFilePathList:
    imageDescription = getImageDescription(chatOllama, imageFilePath)
    imageDescriptionDictionary[imageFilePath] = imageDescription
    print(f"    {imageFilePath}")
print("-" * 50)
printMessage("END GET IMAGE DESCRIPTION")
print()

plt.figure(figsize = (20, 10))

for index, imageFilePath in enumerate(imageFilePathList):
    image = Image.open(imageFilePath).convert("RGB")
    plt.subplot(4, 5, index + 1)
    plt.imshow(image)
    plt.title(f"{os.path.basename(imageFilePath)}\n{imageDescriptionDictionary[imageFilePath][:50]}", fontsize = 8)

    plt.xticks([])
    plt.yticks([])

plt.tight_layout()
plt.show()

import base64

import os

import matplotlib.pyplot as plt

from datetime import datetime

from langchain.schema.messages import HumanMessage

from langchain_ollama import ChatOllama

from PIL import Image

def printMessage(message, *argumentTuple):

timeStamp = datetime.now().strftime("[%H:%M:%S]")

if argumentTuple:

finalMessage = message % argumentTuple

else:

finalMessage = message

print(f"{timeStamp} {finalMessage}")

def getBASE64StringFromFile(filePath):

with open(filePath, "rb") as bufferedReader:

imageBytes = bufferedReader.read()

base64Bytes = base64.b64encode(imageBytes)

base64String = base64Bytes.decode("utf-8")

return base64String

def getImageDescription(chatOllama, imageFilePath):

imageBASE64String = getBASE64StringFromFile(imageFilePath)

humanMessage = HumanMessage(

content = [

{"type" : "text" , "text" : "Please describe the image in detail." },

{"type" : "image_url", "image_url" : {"url" : f"data:image/jpeg;base64,{imageBASE64String}"}}

]

)

responseAIMessage = chatOllama.invoke([humanMessage])

return responseAIMessage.content

printMessage("START GET IMAGE FILE PATH LIST")

imageFilePathList = sorted([os.path.join("temp", imageFileName) for imageFileName in os.listdir("temp") if imageFileName.endswith(".jpg")])

printMessage("END GET IMAGE FILE PATH LIST")

print()

printMessage("START CREATE CHATOLLAMA")

chatOllama = ChatOllama(model = "llava:latest", temperature = 0)

printMessage("END CREATE CHATOLLAMA")

print()

printMessage("START GET IMAGE DESCRIPTION")

print("-" * 50)

imageDescriptionDictionary = dict()

for imageFilePath in imageFilePathList:

imageDescription = getImageDescription(chatOllama, imageFilePath)

imageDescriptionDictionary[imageFilePath] = imageDescription

print(f" {imageFilePath}")

print("-" * 50)

printMessage("END GET IMAGE DESCRIPTION")

print()

plt.figure(figsize = (20, 10))

for index, imageFilePath in enumerate(imageFilePathList):

image = Image.open(imageFilePath).convert("RGB")

plt.subplot(4, 5, index + 1)

plt.imshow(image)

plt.title(f"{os.path.basename(imageFilePath)}\n{imageDescriptionDictionary[imageFilePath][:50]}", fontsize = 8)

plt.xticks([])

plt.yticks([])

plt.tight_layout()

plt.show()

▶ requirements.txt


aiohappyeyeballs==2.4.4
aiohttp==3.11.11
aiosignal==1.3.2
annotated-types==0.7.0
anyio==4.8.0
attrs==24.3.0
certifi==2024.12.14
charset-normalizer==3.4.1
contourpy==1.3.1
cycler==0.12.1
fonttools==4.55.3
frozenlist==1.5.0
greenlet==3.1.1
h11==0.14.0
httpcore==1.0.7
httpx==0.27.2
idna==3.10
jsonpatch==1.33
jsonpointer==3.0.0
kiwisolver==1.4.8
langchain==0.3.14
langchain-core==0.3.29
langchain-ollama==0.2.2
langchain-text-splitters==0.3.5
langsmith==0.2.10
matplotlib==3.10.0
multidict==6.1.0
numpy==2.2.1
ollama==0.4.6
orjson==3.10.14
packaging==24.2
pillow==11.1.0
propcache==0.2.1
pydantic==2.10.5
pydantic_core==2.27.2
pyparsing==3.2.1
python-dateutil==2.9.0.post0
PyYAML==6.0.2
requests==2.32.3
requests-toolbelt==1.0.0
six==1.17.0
sniffio==1.3.1
SQLAlchemy==2.0.37
tenacity==9.0.0
typing_extensions==4.12.2
urllib3==2.3.0
yarl==1.18.3

aiohappyeyeballs==2.4.4

aiohttp==3.11.11

aiosignal==1.3.2

annotated-types==0.7.0

anyio==4.8.0

attrs==24.3.0

certifi==2024.12.14

charset-normalizer==3.4.1

contourpy==1.3.1

cycler==0.12.1

fonttools==4.55.3

frozenlist==1.5.0

greenlet==3.1.1

h11==0.14.0

httpcore==1.0.7

httpx==0.27.2

idna==3.10

jsonpatch==1.33

jsonpointer==3.0.0

kiwisolver==1.4.8

langchain==0.3.14

langchain-core==0.3.29

langchain-ollama==0.2.2

langchain-text-splitters==0.3.5

langsmith==0.2.10

matplotlib==3.10.0

multidict==6.1.0

numpy==2.2.1

ollama==0.4.6

orjson==3.10.14

packaging==24.2

pillow==11.1.0

propcache==0.2.1

pydantic==2.10.5

pydantic_core==2.27.2

pyparsing==3.2.1

python-dateutil==2.9.0.post0

PyYAML==6.0.2

requests==2.32.3

requests-toolbelt==1.0.0

six==1.17.0

sniffio==1.3.1

SQLAlchemy==2.0.37

tenacity==9.0.0

typing_extensions==4.12.2

urllib3==2.3.0

yarl==1.18.3

※ pip install langchain langchain_ollama matplotlib 명령을 실행했다.

Post Views: 3

2D AI GRAPHICS IMAGE LANGCHAIN LLAVA LLM OLLAMA PYTHON

icodebroker

[PYTHON/LANGCHAIN] ChatOllama 클래스 : 이미지에서 추출한 텍스트를 이미지와 함꼐 표시하기 (llava:latest 모델)

분류

보관함