Spaces:
Runtime error
Runtime error
| sudo apt-get update -y | |
| sudo apt-get install -y gcc-12 g++-12 | |
| sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 | |
| pip install -v -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu | |
| git clone https://github.com/vllm-project/vllm.git | |
| cd vllm && VLLM_TARGET_DEVICE=cpu python setup.py install && cd .. | |
| git clone https://huggingface.co/Moses25/Mistral-7B-Instruct-32K-AWQ | |
| eexport VLLM_CPU_KVCACHE_SPACE=15 | |
| python -m vllm.entrypoints.openai.api_server --model=Mistral-7B-Instruct-32K-AWQ \ | |
| --trust-remote-code --host 0.0.0.0 --port 7777 \ | |
| --gpu-memory-utilization 0.8 \ | |
| --enforce-eager \ | |
| --max-model-len 8192 --chat-template llama2-chat-template.jinja \ | |
| --tensor-parallel-size 1 --served-model-name dewu-chat |