diff --git a/chapters/en/chapter2/8.mdx b/chapters/en/chapter2/8.mdx index 84436e81b..fa7f0e4a9 100644 --- a/chapters/en/chapter2/8.mdx +++ b/chapters/en/chapter2/8.mdx @@ -166,19 +166,14 @@ cd llama.cpp make # Download the SmolLM2-1.7B-Instruct-GGUF model -curl -L -O https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF/resolve/main/smollm2-1.7b-instruct.Q4_K_M.gguf +curl -L -o smollm2-1.7b-instruct-q4_k_m.gguf "https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF/resolve/main/smollm2-1.7b-instruct-q4_k_m.gguf?download=1" ``` Then, launch the server (with OpenAI API compatibility): ```sh # Start the server -./server \ - -m smollm2-1.7b-instruct.Q4_K_M.gguf \ - --host 0.0.0.0 \ - --port 8080 \ - -c 4096 \ - --n-gpu-layers 0 # Set to a higher number to use GPU +./build/bin/llama-server -m /mnt/c/Users/yassi/llama.cpp/smollm2-1.7b-instruct-q4_k_m.gguf --host 0.0.0.0 --port 8080 -c 4096 --n-gpu-layers 0 ``` Interact with the server using Hugging Face's InferenceClient: