■ ChatOllama 클래스를 사용해 이미지에서 추출한 텍스트를 이미지와 함꼐 표시하는 방법을 보여준다. (llava:latest 모델)
▶ main.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import base64 import os import matplotlib.pyplot as plt from datetime import datetime from langchain.schema.messages import HumanMessage from langchain_ollama import ChatOllama from PIL import Image def printMessage(message, *argumentTuple): timeStamp = datetime.now().strftime("[%H:%M:%S]") if argumentTuple: finalMessage = message % argumentTuple else: finalMessage = message print(f"{timeStamp} {finalMessage}") def getBASE64StringFromFile(filePath): with open(filePath, "rb") as bufferedReader: imageBytes = bufferedReader.read() base64Bytes = base64.b64encode(imageBytes) base64String = base64Bytes.decode("utf-8") return base64String def getImageDescription(chatOllama, imageFilePath): imageBASE64String = getBASE64StringFromFile(imageFilePath) humanMessage = HumanMessage( content = [ {"type" : "text" , "text" : "Please describe the image in detail." }, {"type" : "image_url", "image_url" : {"url" : f"data:image/jpeg;base64,{imageBASE64String}"}} ] ) responseAIMessage = chatOllama.invoke([humanMessage]) return responseAIMessage.content printMessage("START GET IMAGE FILE PATH LIST") imageFilePathList = sorted([os.path.join("temp", imageFileName) for imageFileName in os.listdir("temp") if imageFileName.endswith(".jpg")]) printMessage("END GET IMAGE FILE PATH LIST") print() printMessage("START CREATE CHATOLLAMA") chatOllama = ChatOllama(model = "llava:latest", temperature = 0) printMessage("END CREATE CHATOLLAMA") print() printMessage("START GET IMAGE DESCRIPTION") print("-" * 50) imageDescriptionDictionary = dict() for imageFilePath in imageFilePathList: imageDescription = getImageDescription(chatOllama, imageFilePath) imageDescriptionDictionary[imageFilePath] = imageDescription print(f" {imageFilePath}") print("-" * 50) printMessage("END GET IMAGE DESCRIPTION") print() plt.figure(figsize = (20, 10)) for index, imageFilePath in enumerate(imageFilePathList): image = Image.open(imageFilePath).convert("RGB") plt.subplot(4, 5, index + 1) plt.imshow(image) plt.title(f"{os.path.basename(imageFilePath)}\n{imageDescriptionDictionary[imageFilePath][:50]}", fontsize = 8) plt.xticks([]) plt.yticks([]) plt.tight_layout() plt.show() |
▶ requirements.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
aiohappyeyeballs==2.4.4 aiohttp==3.11.11 aiosignal==1.3.2 annotated-types==0.7.0 anyio==4.8.0 attrs==24.3.0 certifi==2024.12.14 charset-normalizer==3.4.1 contourpy==1.3.1 cycler==0.12.1 fonttools==4.55.3 frozenlist==1.5.0 greenlet==3.1.1 h11==0.14.0 httpcore==1.0.7 httpx==0.27.2 idna==3.10 jsonpatch==1.33 jsonpointer==3.0.0 kiwisolver==1.4.8 langchain==0.3.14 langchain-core==0.3.29 langchain-ollama==0.2.2 langchain-text-splitters==0.3.5 langsmith==0.2.10 matplotlib==3.10.0 multidict==6.1.0 numpy==2.2.1 ollama==0.4.6 orjson==3.10.14 packaging==24.2 pillow==11.1.0 propcache==0.2.1 pydantic==2.10.5 pydantic_core==2.27.2 pyparsing==3.2.1 python-dateutil==2.9.0.post0 PyYAML==6.0.2 requests==2.32.3 requests-toolbelt==1.0.0 six==1.17.0 sniffio==1.3.1 SQLAlchemy==2.0.37 tenacity==9.0.0 typing_extensions==4.12.2 urllib3==2.3.0 yarl==1.18.3 |
※ pip install langchain langchain_ollama matplotlib 명령을 실행했다.