| 
							 | 
						--- | 
					
					
						
						| 
							 | 
						base_model: stabilityai/stablelm-zephyr-3b | 
					
					
						
						| 
							 | 
						datasets: | 
					
					
						
						| 
							 | 
						- HuggingFaceH4/ultrachat_200k | 
					
					
						
						| 
							 | 
						- HuggingFaceH4/ultrafeedback_binarized | 
					
					
						
						| 
							 | 
						- meta-math/MetaMathQA | 
					
					
						
						| 
							 | 
						- WizardLM/WizardLM_evol_instruct_V2_196k | 
					
					
						
						| 
							 | 
						- Intel/orca_dpo_pairs | 
					
					
						
						| 
							 | 
						language: | 
					
					
						
						| 
							 | 
						- en | 
					
					
						
						| 
							 | 
						license: other | 
					
					
						
						| 
							 | 
						thumbnail: "https://github.com/fabiomatricardi/OpenVINO-StableLM-3B-streamlit/raw/main/social.jpg" | 
					
					
						
						| 
							 | 
						tags: | 
					
					
						
						| 
							 | 
						- causal-lm | 
					
					
						
						| 
							 | 
						- openvino | 
					
					
						
						| 
							 | 
						- nncf | 
					
					
						
						| 
							 | 
						- 4-bit | 
					
					
						
						| 
							 | 
						extra_gated_fields: | 
					
					
						
						| 
							 | 
						  Name: text | 
					
					
						
						| 
							 | 
						  Email: text | 
					
					
						
						| 
							 | 
						  Country: text | 
					
					
						
						| 
							 | 
						  Organization or Affiliation: text | 
					
					
						
						| 
							 | 
						  I ALLOW Stability AI to email me about new model releases: checkbox | 
					
					
						
						| 
							 | 
						model-index: | 
					
					
						
						| 
							 | 
						- name: stablelm-zephyr-3b | 
					
					
						
						| 
							 | 
						  results: | 
					
					
						
						| 
							 | 
						  - task: | 
					
					
						
						| 
							 | 
						      type: text-generation | 
					
					
						
						| 
							 | 
						      name: Text Generation | 
					
					
						
						| 
							 | 
						    dataset: | 
					
					
						
						| 
							 | 
						      name: AI2 Reasoning Challenge (25-Shot) | 
					
					
						
						| 
							 | 
						      type: ai2_arc | 
					
					
						
						| 
							 | 
						      config: ARC-Challenge | 
					
					
						
						| 
							 | 
						      split: test | 
					
					
						
						| 
							 | 
						      args: | 
					
					
						
						| 
							 | 
						        num_few_shot: 25 | 
					
					
						
						| 
							 | 
						    metrics: | 
					
					
						
						| 
							 | 
						    - type: acc_norm | 
					
					
						
						| 
							 | 
						      value: 46.08 | 
					
					
						
						| 
							 | 
						      name: normalized accuracy | 
					
					
						
						| 
							 | 
						    source: | 
					
					
						
						| 
							 | 
						      url: >- | 
					
					
						
						| 
							 | 
						        https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=stabilityai/stablelm-zephyr-3b | 
					
					
						
						| 
							 | 
						      name: Open LLM Leaderboard | 
					
					
						
						| 
							 | 
						  - task: | 
					
					
						
						| 
							 | 
						      type: text-generation | 
					
					
						
						| 
							 | 
						      name: Text Generation | 
					
					
						
						| 
							 | 
						    dataset: | 
					
					
						
						| 
							 | 
						      name: HellaSwag (10-Shot) | 
					
					
						
						| 
							 | 
						      type: hellaswag | 
					
					
						
						| 
							 | 
						      split: validation | 
					
					
						
						| 
							 | 
						      args: | 
					
					
						
						| 
							 | 
						        num_few_shot: 10 | 
					
					
						
						| 
							 | 
						    metrics: | 
					
					
						
						| 
							 | 
						    - type: acc_norm | 
					
					
						
						| 
							 | 
						      value: 74.16 | 
					
					
						
						| 
							 | 
						      name: normalized accuracy | 
					
					
						
						| 
							 | 
						    source: | 
					
					
						
						| 
							 | 
						      url: >- | 
					
					
						
						| 
							 | 
						        https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=stabilityai/stablelm-zephyr-3b | 
					
					
						
						| 
							 | 
						      name: Open LLM Leaderboard | 
					
					
						
						| 
							 | 
						  - task: | 
					
					
						
						| 
							 | 
						      type: text-generation | 
					
					
						
						| 
							 | 
						      name: Text Generation | 
					
					
						
						| 
							 | 
						    dataset: | 
					
					
						
						| 
							 | 
						      name: MMLU (5-Shot) | 
					
					
						
						| 
							 | 
						      type: cais/mmlu | 
					
					
						
						| 
							 | 
						      config: all | 
					
					
						
						| 
							 | 
						      split: test | 
					
					
						
						| 
							 | 
						      args: | 
					
					
						
						| 
							 | 
						        num_few_shot: 5 | 
					
					
						
						| 
							 | 
						    metrics: | 
					
					
						
						| 
							 | 
						    - type: acc | 
					
					
						
						| 
							 | 
						      value: 46.17 | 
					
					
						
						| 
							 | 
						      name: accuracy | 
					
					
						
						| 
							 | 
						    source: | 
					
					
						
						| 
							 | 
						      url: >- | 
					
					
						
						| 
							 | 
						        https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=stabilityai/stablelm-zephyr-3b | 
					
					
						
						| 
							 | 
						      name: Open LLM Leaderboard | 
					
					
						
						| 
							 | 
						  - task: | 
					
					
						
						| 
							 | 
						      type: text-generation | 
					
					
						
						| 
							 | 
						      name: Text Generation | 
					
					
						
						| 
							 | 
						    dataset: | 
					
					
						
						| 
							 | 
						      name: TruthfulQA (0-shot) | 
					
					
						
						| 
							 | 
						      type: truthful_qa | 
					
					
						
						| 
							 | 
						      config: multiple_choice | 
					
					
						
						| 
							 | 
						      split: validation | 
					
					
						
						| 
							 | 
						      args: | 
					
					
						
						| 
							 | 
						        num_few_shot: 0 | 
					
					
						
						| 
							 | 
						    metrics: | 
					
					
						
						| 
							 | 
						    - type: mc2 | 
					
					
						
						| 
							 | 
						      value: 46.49 | 
					
					
						
						| 
							 | 
						    source: | 
					
					
						
						| 
							 | 
						      url: >- | 
					
					
						
						| 
							 | 
						        https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=stabilityai/stablelm-zephyr-3b | 
					
					
						
						| 
							 | 
						      name: Open LLM Leaderboard | 
					
					
						
						| 
							 | 
						  - task: | 
					
					
						
						| 
							 | 
						      type: text-generation | 
					
					
						
						| 
							 | 
						      name: Text Generation | 
					
					
						
						| 
							 | 
						    dataset: | 
					
					
						
						| 
							 | 
						      name: Winogrande (5-shot) | 
					
					
						
						| 
							 | 
						      type: winogrande | 
					
					
						
						| 
							 | 
						      config: winogrande_xl | 
					
					
						
						| 
							 | 
						      split: validation | 
					
					
						
						| 
							 | 
						      args: | 
					
					
						
						| 
							 | 
						        num_few_shot: 5 | 
					
					
						
						| 
							 | 
						    metrics: | 
					
					
						
						| 
							 | 
						    - type: acc | 
					
					
						
						| 
							 | 
						      value: 65.51 | 
					
					
						
						| 
							 | 
						      name: accuracy | 
					
					
						
						| 
							 | 
						    source: | 
					
					
						
						| 
							 | 
						      url: >- | 
					
					
						
						| 
							 | 
						        https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=stabilityai/stablelm-zephyr-3b | 
					
					
						
						| 
							 | 
						      name: Open LLM Leaderboard | 
					
					
						
						| 
							 | 
						  - task: | 
					
					
						
						| 
							 | 
						      type: text-generation | 
					
					
						
						| 
							 | 
						      name: Text Generation | 
					
					
						
						| 
							 | 
						    dataset: | 
					
					
						
						| 
							 | 
						      name: GSM8k (5-shot) | 
					
					
						
						| 
							 | 
						      type: gsm8k | 
					
					
						
						| 
							 | 
						      config: main | 
					
					
						
						| 
							 | 
						      split: test | 
					
					
						
						| 
							 | 
						      args: | 
					
					
						
						| 
							 | 
						        num_few_shot: 5 | 
					
					
						
						| 
							 | 
						    metrics: | 
					
					
						
						| 
							 | 
						    - type: acc | 
					
					
						
						| 
							 | 
						      value: 42.15 | 
					
					
						
						| 
							 | 
						      name: accuracy | 
					
					
						
						| 
							 | 
						    source: | 
					
					
						
						| 
							 | 
						      url: >- | 
					
					
						
						| 
							 | 
						        https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=stabilityai/stablelm-zephyr-3b | 
					
					
						
						| 
							 | 
						      name: Open LLM Leaderboard | 
					
					
						
						| 
							 | 
						--- | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						This model is a quantized version of [`stabilityai/stablelm-zephyr-3b`](https://huggingface.co/stabilityai/stablelm-zephyr-3b) and is converted to the OpenVINO format. This model was obtained via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space with [optimum-intel](https://github.com/huggingface/optimum-intel). | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						Please note: For commercial use, please refer to https://stability.ai/license. | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						### Model Description | 
					
					
						
						| 
							 | 
						StableLM Zephyr 3B is a 3 billion parameter instruction tuned inspired by [HugginFaceH4's Zephyr 7B](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) training pipeline this model was trained on a mix of publicly available datasets, synthetic datasets using [Direct Preference Optimization (DPO)](https://arxiv.org/abs/2305.18290), evaluation for this model based on [MT Bench](https://arxiv.org/abs/2306.05685) and [Alpaca Benchmark](https://tatsu-lab.github.io/alpaca_eval/) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						### Model Parameters | 
					
					
						
						| 
							 | 
						``` | 
					
					
						
						| 
							 | 
						context window   = 4096 | 
					
					
						
						| 
							 | 
						model type       = 3B | 
					
					
						
						| 
							 | 
						model params     = 2.80 B | 
					
					
						
						| 
							 | 
						BOS token        = 0 '<|endoftext|>' | 
					
					
						
						| 
							 | 
						EOS token        = 0 '<|endoftext|>' | 
					
					
						
						| 
							 | 
						UNK token        = 0 '<|endoftext|>' | 
					
					
						
						| 
							 | 
						PAD token        = 0 '<|endoftext|>' | 
					
					
						
						| 
							 | 
						``` | 
					
					
						
						| 
							 | 
						The tokenizer of this model supports `chat_templates` | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						### Usage | 
					
					
						
						| 
							 | 
						StableLM Zephyr 3B uses the following instruction format: | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						``` | 
					
					
						
						| 
							 | 
						<|user|> | 
					
					
						
						| 
							 | 
						List 3 synonyms for the word "tiny"<|endoftext|> | 
					
					
						
						| 
							 | 
						<|assistant|> | 
					
					
						
						| 
							 | 
						1. Dwarf | 
					
					
						
						| 
							 | 
						2. Little | 
					
					
						
						| 
							 | 
						3. Petite<|endoftext|> | 
					
					
						
						| 
							 | 
						``` | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						### Model Details | 
					
					
						
						| 
							 | 
						- Developed by: Stability AI | 
					
					
						
						| 
							 | 
						- Model type: StableLM Zephyr 3B model is an auto-regressive language model based on the transformer decoder architecture. | 
					
					
						
						| 
							 | 
						- Language(s): English | 
					
					
						
						| 
							 | 
						- Library: [Alignment Handbook](https://github.com/huggingface/alignment-handbook.git) | 
					
					
						
						| 
							 | 
						- Finetuned from model: [stabilityai/stablelm-3b-4e1t](https://huggingface.co/stabilityai/stablelm-3b-4e1t) | 
					
					
						
						| 
							 | 
						- License: [StabilityAI Community License](https://huggingface.co/stabilityai/stablelm-zephyr-3b/raw/main/LICENSE.md). | 
					
					
						
						| 
							 | 
						- Commercial License: to use this model commercially, please refer to https://stability.ai/license | 
					
					
						
						| 
							 | 
						- Contact: For questions and comments about the model, please email [email protected] | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						First make sure you have `optimum-intel` installed: | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						```bash | 
					
					
						
						| 
							 | 
						pip install openvino-genai==2024.4.0 | 
					
					
						
						| 
							 | 
						pip install optimum-intel[openvino] | 
					
					
						
						| 
							 | 
						``` | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						To load your model you can do as follows: | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						```python | 
					
					
						
						| 
							 | 
						from optimum.intel import OVModelForCausalLM | 
					
					
						
						| 
							 | 
						from transformers import AutoTokenizer, AutoConfig | 
					
					
						
						| 
							 | 
						from threading import Thread | 
					
					
						
						| 
							 | 
						from transformers import TextIteratorStreamer | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						model_id = "FM-1976/stablelm-zephyr-3b-openvino-4bit" | 
					
					
						
						| 
							 | 
						model = OVModelForCausalLM.from_pretrained(model_id) | 
					
					
						
						| 
							 | 
						tokenizer = AutoTokenizer.from_pretrained(model_id) | 
					
					
						
						| 
							 | 
						ov_model = OVModelForCausalLM.from_pretrained( | 
					
					
						
						| 
							 | 
						    model_id = model_id, | 
					
					
						
						| 
							 | 
						    device='CPU', | 
					
					
						
						| 
							 | 
						    ov_config={"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""}, | 
					
					
						
						| 
							 | 
						    config=AutoConfig.from_pretrained(model_id) | 
					
					
						
						| 
							 | 
						) | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						# Generation with a prompt message | 
					
					
						
						| 
							 | 
						question = 'Explain the plot of Cinderella in a sentence.' | 
					
					
						
						| 
							 | 
						messages = [ | 
					
					
						
						| 
							 | 
						    {"role": "user", "content": question} | 
					
					
						
						| 
							 | 
						] | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						print('Question:', question) | 
					
					
						
						| 
							 | 
						#Credit to https://github.com/openvino-dev-samples/chatglm3.openvino/blob/main/chat.py | 
					
					
						
						| 
							 | 
						streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True) | 
					
					
						
						| 
							 | 
						model_inputs = tokenizer.apply_chat_template(messages, | 
					
					
						
						| 
							 | 
						                                                     add_generation_prompt=True, | 
					
					
						
						| 
							 | 
						                                                     tokenize=True, | 
					
					
						
						| 
							 | 
						                                                     pad_token_id=tokenizer.eos_token_id, | 
					
					
						
						| 
							 | 
						                                                     num_return_sequences=1, | 
					
					
						
						| 
							 | 
						                                                     return_tensors="pt") | 
					
					
						
						| 
							 | 
						generate_kwargs = dict(input_ids=model_inputs, | 
					
					
						
						| 
							 | 
						                        max_new_tokens=450, | 
					
					
						
						| 
							 | 
						                        temperature=0.1, | 
					
					
						
						| 
							 | 
						                        do_sample=True, | 
					
					
						
						| 
							 | 
						                        top_p=0.5, | 
					
					
						
						| 
							 | 
						                        repetition_penalty=1.178, | 
					
					
						
						| 
							 | 
						                        streamer=streamer) | 
					
					
						
						| 
							 | 
						t1 = Thread(target=ov_model.generate, kwargs=generate_kwargs) | 
					
					
						
						| 
							 | 
						t1.start() | 
					
					
						
						| 
							 | 
						for new_text in streamer: | 
					
					
						
						| 
							 | 
						    new_text = new_text | 
					
					
						
						| 
							 | 
						    print(new_text, end="", flush=True) | 
					
					
						
						| 
							 | 
						``` | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 |