■ 벡터로 텍스트를 표현하는 방법을 보여준다.
▶ main.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
from pymilvus import model # https://huggingface.co/에 연결이 실패한 경우 다음 경로의 주석 처리를 제거한다. # import os # os.environ["HF_ENDPOINT":"] = "https://hf-mirror.com" # 작은 임베딩 모델 "paraphrase-albert-small-v2"(~50MB)이 다운로드된다. onnxEmbeddingFunction = model.DefaultEmbeddingFunction() stringList = [ "Artificial intelligence was founded as an academic discipline in 1956.", "Alan Turing was the first person to conduct substantial research in AI.", "Born in Maida Vale, London, Turing was raised in southern England." ] stringVectorList = onnxEmbeddingFunction.encode_documents(stringList) # NDArray list itemList = [ {"id" : i, "vector" : stringVectorList[i], "text" : stringList[i], "subject" : "history"} for i in range(len(stringVectorList)) ] print("Data has", len(itemList), "entities, each with fields : ", itemList[0].keys()) print("Vector dim :", len(itemList[0]["vector"])) """ Data has 3 entities, each with fields : dict_keys(['id', 'vector', 'text', 'subject']) Vector dim : 768 """ |
▶ requirements.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
certifi==2024.8.30 charset-normalizer==3.3.2 coloredlogs==15.0.1 environs==9.5.0 filelock==3.16.1 flatbuffers==24.3.25 fsspec==2024.9.0 grpcio==1.66.2 huggingface-hub==0.25.1 humanfriendly==10.0 idna==3.10 marshmallow==3.22.0 milvus-lite==2.4.10 milvus-model==0.2.7 mpmath==1.3.0 numpy==2.1.2 onnxruntime==1.19.2 packaging==24.1 pandas==2.2.3 protobuf==5.28.2 pymilvus==2.4.7 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 pytz==2024.2 PyYAML==6.0.2 regex==2024.9.11 requests==2.32.3 safetensors==0.4.5 scipy==1.14.1 six==1.16.0 sympy==1.13.3 tokenizers==0.20.0 tqdm==4.66.5 transformers==4.45.1 typing_extensions==4.12.2 tzdata==2024.2 ujson==5.10.0 urllib3==2.2.3 |
※ pip install pymilvus[model] 명령을 실행했다.