■ ollama 명령의 serve 옵션을 사용해 서버를 실행하는 방법을 보여준다.
1. 명령 프롬프트를 실행한다.
2. 아래 스크립트를 실행한다.
▶ 실행 명령
1 2 3 4 5 6 7 8 9 10 11 12 13 |
SET OLLAMA_HOST=127.0.0.1:8080 ollama serve 2024/08/29 22:28:51 routes.go:1125: INFO server config env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: OLLAMA_DEBUG:false OLLAMA_FLASH_ATTENTION:false OLLAMA_HOST:http://127.0.0.1:8080 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_LLM_LIBRARY: OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:C:\\Users\\king\\.ollama\\models OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:0 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://*] OLLAMA_RUNNERS_DIR:C:\\Users\\king\\AppData\\Local\\Programs\\Ollama\\lib\\ollama\\runners OLLAMA_SCHED_SPREAD:false OLLAMA_TMPDIR: ROCR_VISIBLE_DEVICES:]" time=2024-08-29T22:28:51.163+09:00 level=INFO source=images.go:753 msg="total blobs: 11" time=2024-08-29T22:28:51.163+09:00 level=INFO source=images.go:760 msg="total unused blobs removed: 0" time=2024-08-29T22:28:51.163+09:00 level=INFO source=routes.go:1172 msg="Listening on 127.0.0.1:8080 (version 0.3.8)" time=2024-08-29T22:28:51.164+09:00 level=INFO source=payload.go:44 msg="Dynamic LLM libraries [cuda_v12 rocm_v6.1 cpu cpu_avx cpu_avx2 cuda_v11]" time=2024-08-29T22:28:51.164+09:00 level=INFO source=gpu.go:200 msg="looking for compatible GPUs" time=2024-08-29T22:28:51.332+09:00 level=INFO source=amd_windows.go:91 msg="unsupported Radeon iGPU detected skipping" id=0 name="AMD Radeon(TM) Graphics" gfx=gfx1036 time=2024-08-29T22:28:51.333+09:00 level=INFO source=types.go:107 msg="inference compute" id=GPU-ff1a7c25-a377-71b9-d670-2406e18d4593 library=cuda variant=v12 compute=8.9 driver=12.2 name="NVIDIA GeForce RTX 4070 Ti" total="12.0 GiB" available="3.5 GiB" |