slim.frikha
		
	commited on
		
		
					Commit 
							
							·
						
						cf63936
	
0
								Parent(s):
							
							
falcon3 release
Browse files- .gitattributes +35 -0
- README.md +143 -0
- config.json +37 -0
- generation_config.json +6 -0
- model.safetensors +3 -0
- special_tokens_map.json +41 -0
- tokenizer.json +0 -0
- tokenizer_config.json +0 -0
    	
        .gitattributes
    ADDED
    
    | @@ -0,0 +1,35 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            *.7z filter=lfs diff=lfs merge=lfs -text
         | 
| 2 | 
            +
            *.arrow filter=lfs diff=lfs merge=lfs -text
         | 
| 3 | 
            +
            *.bin filter=lfs diff=lfs merge=lfs -text
         | 
| 4 | 
            +
            *.bz2 filter=lfs diff=lfs merge=lfs -text
         | 
| 5 | 
            +
            *.ckpt filter=lfs diff=lfs merge=lfs -text
         | 
| 6 | 
            +
            *.ftz filter=lfs diff=lfs merge=lfs -text
         | 
| 7 | 
            +
            *.gz filter=lfs diff=lfs merge=lfs -text
         | 
| 8 | 
            +
            *.h5 filter=lfs diff=lfs merge=lfs -text
         | 
| 9 | 
            +
            *.joblib filter=lfs diff=lfs merge=lfs -text
         | 
| 10 | 
            +
            *.lfs.* filter=lfs diff=lfs merge=lfs -text
         | 
| 11 | 
            +
            *.mlmodel filter=lfs diff=lfs merge=lfs -text
         | 
| 12 | 
            +
            *.model filter=lfs diff=lfs merge=lfs -text
         | 
| 13 | 
            +
            *.msgpack filter=lfs diff=lfs merge=lfs -text
         | 
| 14 | 
            +
            *.npy filter=lfs diff=lfs merge=lfs -text
         | 
| 15 | 
            +
            *.npz filter=lfs diff=lfs merge=lfs -text
         | 
| 16 | 
            +
            *.onnx filter=lfs diff=lfs merge=lfs -text
         | 
| 17 | 
            +
            *.ot filter=lfs diff=lfs merge=lfs -text
         | 
| 18 | 
            +
            *.parquet filter=lfs diff=lfs merge=lfs -text
         | 
| 19 | 
            +
            *.pb filter=lfs diff=lfs merge=lfs -text
         | 
| 20 | 
            +
            *.pickle filter=lfs diff=lfs merge=lfs -text
         | 
| 21 | 
            +
            *.pkl filter=lfs diff=lfs merge=lfs -text
         | 
| 22 | 
            +
            *.pt filter=lfs diff=lfs merge=lfs -text
         | 
| 23 | 
            +
            *.pth filter=lfs diff=lfs merge=lfs -text
         | 
| 24 | 
            +
            *.rar filter=lfs diff=lfs merge=lfs -text
         | 
| 25 | 
            +
            *.safetensors filter=lfs diff=lfs merge=lfs -text
         | 
| 26 | 
            +
            saved_model/**/* filter=lfs diff=lfs merge=lfs -text
         | 
| 27 | 
            +
            *.tar.* filter=lfs diff=lfs merge=lfs -text
         | 
| 28 | 
            +
            *.tar filter=lfs diff=lfs merge=lfs -text
         | 
| 29 | 
            +
            *.tflite filter=lfs diff=lfs merge=lfs -text
         | 
| 30 | 
            +
            *.tgz filter=lfs diff=lfs merge=lfs -text
         | 
| 31 | 
            +
            *.wasm filter=lfs diff=lfs merge=lfs -text
         | 
| 32 | 
            +
            *.xz filter=lfs diff=lfs merge=lfs -text
         | 
| 33 | 
            +
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
            +
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
            +
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
    	
        README.md
    ADDED
    
    | @@ -0,0 +1,143 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            library_name: transformers
         | 
| 3 | 
            +
            tags:
         | 
| 4 | 
            +
            - bitnet
         | 
| 5 | 
            +
            - falcon3
         | 
| 6 | 
            +
            base_model: tiiuae/Falcon3-7B-Instruct
         | 
| 7 | 
            +
            license: other 
         | 
| 8 | 
            +
            license_name: falcon-llm-license 
         | 
| 9 | 
            +
            license_link: https://falconllm.tii.ae/falcon-terms-and-conditions.html
         | 
| 10 | 
            +
            ---
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
            
         | 
| 14 | 
            +
             | 
| 15 | 
            +
             | 
| 16 | 
            +
            #  Table of Contents
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            0. [TL;DR](#TL;DR)
         | 
| 19 | 
            +
            1. [Model Details](#model-details)
         | 
| 20 | 
            +
            2. [Training Details](#training-details)
         | 
| 21 | 
            +
            3. [Usage](#usage)
         | 
| 22 | 
            +
            4. [Evaluation](#evaluation)
         | 
| 23 | 
            +
            5. [Citation](#citation)
         | 
| 24 | 
            +
             | 
| 25 | 
            +
             | 
| 26 | 
            +
            # TL;DR
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            # Model Details
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            ## Model Description
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            - **Developed by:** [https://www.tii.ae](https://www.tii.ae)
         | 
| 33 | 
            +
            - **Model type:** Causal decoder-only - instruct / chat version
         | 
| 34 | 
            +
            - **Architecture:** Pure-transformer - 1.58bit version
         | 
| 35 | 
            +
            - **Language(s) (NLP):** Mainly English
         | 
| 36 | 
            +
            - **License:** TII Falcon License 2.0
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            # Training details
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            The model has been trained following the training strategies from the recent [1-bit LLM HF blogpost](https://huggingface.co/blog/1_58_llm_extreme_quantization) and [1-bit LLM paper](https://github.com/microsoft/unilm/blob/master/bitnet/The-Era-of-1-bit-LLMs__Training_Tips_Code_FAQ.pdf).
         | 
| 41 | 
            +
            For more details about the training protocol of this model, please refer to the Falcon-3 technical report, section *Compression*.
         | 
| 42 | 
            +
             | 
| 43 | 
            +
             | 
| 44 | 
            +
            # Usage
         | 
| 45 | 
            +
             | 
| 46 | 
            +
            Currently to use this model you can either rely on Hugging Face transformers library or [BitNet](https://github.com/microsoft/BitNet) library. You can also play with the model using the [falcon-1.58bit playground](https://huggingface.co/spaces/tiiuae/falcon3-1.58bit-playground) (only for the 7B instruct version).
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            ## 🤗 transformers
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            ```python
         | 
| 51 | 
            +
            import torch
         | 
| 52 | 
            +
            from transformers import AutoModelForCausalLM, AutoTokenizer
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            model_id = "tiiuae/Falcon3-7B-Instruct-1.58bit"
         | 
| 55 | 
            +
             | 
| 56 | 
            +
            model = AutoModelForCausalLM.from_pretrained(
         | 
| 57 | 
            +
              model_id,
         | 
| 58 | 
            +
              torch_dtype=torch.bfloat16,
         | 
| 59 | 
            +
            ).to("cuda")
         | 
| 60 | 
            +
             | 
| 61 | 
            +
            # Perform text generation
         | 
| 62 | 
            +
            ```
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            ## BitNet
         | 
| 65 | 
            +
             | 
| 66 | 
            +
            ```
         | 
| 67 | 
            +
            git clone https://github.com/microsoft/BitNet && cd BitNet
         | 
| 68 | 
            +
            pip install -r requirements.txt
         | 
| 69 | 
            +
            python setup_env.py --hf-repo tiiuae/Falcon3-7B-Instruct-1.58bit -q i2_s
         | 
| 70 | 
            +
            python run_inference.py -m models/Falcon3-7B-1.58bit/ggml-model-i2_s.gguf -p "You are a helpful assistant" -cnv
         | 
| 71 | 
            +
            ```
         | 
| 72 | 
            +
             | 
| 73 | 
            +
            # Evaluation
         | 
| 74 | 
            +
            We report in the following table our internal pipeline benchmarks:
         | 
| 75 | 
            +
             | 
| 76 | 
            +
            <table border="1" style="width: 100%; text-align: center; border-collapse: collapse;">
         | 
| 77 | 
            +
                <colgroup>
         | 
| 78 | 
            +
                    <col style="width: 10%;">
         | 
| 79 | 
            +
                    <col style="width: 10%;">
         | 
| 80 | 
            +
                    <col style="background-color: rgba(80, 15, 213, 0.5); width: 7%;">
         | 
| 81 | 
            +
                </colgroup>
         | 
| 82 | 
            +
                <thead>
         | 
| 83 | 
            +
                    <tr>
         | 
| 84 | 
            +
                        <th>Benchmark</th>
         | 
| 85 | 
            +
                        <th>Llama3-8B-1.58-100B-tokens</th>
         | 
| 86 | 
            +
                        <th>Falcon3-7B-Instruct-1.58bit </th>
         | 
| 87 | 
            +
                    </tr>
         | 
| 88 | 
            +
                </thead>
         | 
| 89 | 
            +
                <tbody>
         | 
| 90 | 
            +
                    <tr>
         | 
| 91 | 
            +
                        <td>IFEval</td>
         | 
| 92 | 
            +
                        <td>17.91</td>
         | 
| 93 | 
            +
                        <td><b>59.24</b></td>
         | 
| 94 | 
            +
                    </tr>      
         | 
| 95 | 
            +
                    <tr>
         | 
| 96 | 
            +
                        <td>MUSR</td>
         | 
| 97 | 
            +
                        <td><b>4.87</b></td>
         | 
| 98 | 
            +
                        <td>1.76</td>
         | 
| 99 | 
            +
                    </tr>
         | 
| 100 | 
            +
                    <tr>
         | 
| 101 | 
            +
                        <td>GPQA</td>
         | 
| 102 | 
            +
                        <td>1.83</td>
         | 
| 103 | 
            +
                        <td><b>5.25</b></td>
         | 
| 104 | 
            +
                    </tr>
         | 
| 105 | 
            +
                    <tr>
         | 
| 106 | 
            +
                        <td>BBH</td>
         | 
| 107 | 
            +
                        <td>5.36</td>
         | 
| 108 | 
            +
                        <td><b>8.54</b></td>
         | 
| 109 | 
            +
                    </tr>
         | 
| 110 | 
            +
                    <tr>
         | 
| 111 | 
            +
                        <td>MMLU-PRO</td>
         | 
| 112 | 
            +
                        <td>2.78</td>
         | 
| 113 | 
            +
                        <td><b>8.44</b></td>
         | 
| 114 | 
            +
                    </tr>      
         | 
| 115 | 
            +
                    <tr>
         | 
| 116 | 
            +
                        <td>MATH</td>
         | 
| 117 | 
            +
                        <td>0.26</td>
         | 
| 118 | 
            +
                        <td><b>2.93</b></td>
         | 
| 119 | 
            +
                    </tr>
         | 
| 120 | 
            +
                    <tr>
         | 
| 121 | 
            +
                        <td>Average</td>
         | 
| 122 | 
            +
                        <td>5.5</td>
         | 
| 123 | 
            +
                        <td><b>14.36</b></td>
         | 
| 124 | 
            +
                    </tr>          
         | 
| 125 | 
            +
                </tbody>
         | 
| 126 | 
            +
            </table>
         | 
| 127 | 
            +
             | 
| 128 | 
            +
             | 
| 129 | 
            +
            ## Useful links
         | 
| 130 | 
            +
            - View our [release blogpost](https://huggingface.co/blog/falcon3).
         | 
| 131 | 
            +
            - Feel free to join [our discord server](https://discord.gg/fwXpMyGc) if you have any questions or to interact with our researchers and developers.
         | 
| 132 | 
            +
             | 
| 133 | 
            +
            ## Citation
         | 
| 134 | 
            +
            If the Falcon3 family of models were helpful to your work, feel free to give us a cite.
         | 
| 135 | 
            +
             | 
| 136 | 
            +
            ```
         | 
| 137 | 
            +
            @misc{Falcon3,
         | 
| 138 | 
            +
                title = {The Falcon 3 Family of Open Models},
         | 
| 139 | 
            +
                author = {Falcon-LLM Team},
         | 
| 140 | 
            +
                month = {December},
         | 
| 141 | 
            +
                year = {2024}
         | 
| 142 | 
            +
            }
         | 
| 143 | 
            +
            ```
         | 
    	
        config.json
    ADDED
    
    | @@ -0,0 +1,37 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "_name_or_path": "/efs/qiyang/checkpoint/Falcon3-7B-TL-BitNet-FT/hf_bitnet",
         | 
| 3 | 
            +
              "architectures": [
         | 
| 4 | 
            +
                "LlamaForCausalLM"
         | 
| 5 | 
            +
              ],
         | 
| 6 | 
            +
              "attention_bias": false,
         | 
| 7 | 
            +
              "attention_dropout": 0.0,
         | 
| 8 | 
            +
              "bos_token_id": 11,
         | 
| 9 | 
            +
              "eos_token_id": 11,
         | 
| 10 | 
            +
              "head_dim": 256,
         | 
| 11 | 
            +
              "hidden_act": "silu",
         | 
| 12 | 
            +
              "hidden_size": 3072,
         | 
| 13 | 
            +
              "initializer_range": 0.02,
         | 
| 14 | 
            +
              "intermediate_size": 23040,
         | 
| 15 | 
            +
              "is_bitnet_config": true,
         | 
| 16 | 
            +
              "max_position_embeddings": 32768,
         | 
| 17 | 
            +
              "mlp_bias": false,
         | 
| 18 | 
            +
              "model_type": "llama",
         | 
| 19 | 
            +
              "num_attention_heads": 12,
         | 
| 20 | 
            +
              "num_hidden_layers": 28,
         | 
| 21 | 
            +
              "num_key_value_heads": 4,
         | 
| 22 | 
            +
              "pretraining_tp": 1,
         | 
| 23 | 
            +
              "quantization_config": {
         | 
| 24 | 
            +
                "modules_to_not_convert": [
         | 
| 25 | 
            +
                  "lm_head"
         | 
| 26 | 
            +
                ],
         | 
| 27 | 
            +
                "quant_method": "bitnet"
         | 
| 28 | 
            +
              },
         | 
| 29 | 
            +
              "rms_norm_eps": 1e-06,
         | 
| 30 | 
            +
              "rope_scaling": null,
         | 
| 31 | 
            +
              "rope_theta": 1000042,
         | 
| 32 | 
            +
              "tie_word_embeddings": false,
         | 
| 33 | 
            +
              "torch_dtype": "bfloat16",
         | 
| 34 | 
            +
              "transformers_version": "4.46.0.dev0",
         | 
| 35 | 
            +
              "use_cache": true,
         | 
| 36 | 
            +
              "vocab_size": 131080
         | 
| 37 | 
            +
            }
         | 
    	
        generation_config.json
    ADDED
    
    | @@ -0,0 +1,6 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "_from_model_config": true,
         | 
| 3 | 
            +
              "bos_token_id": 11,
         | 
| 4 | 
            +
              "eos_token_id": 11,
         | 
| 5 | 
            +
              "transformers_version": "4.46.0.dev0"
         | 
| 6 | 
            +
            }
         | 
    	
        model.safetensors
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:0e0ebeda732cbbc8d142b9693907ad488eace004bbf275116ed79fe8c8fc5075
         | 
| 3 | 
            +
            size 3273630320
         | 
    	
        special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1,41 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "additional_special_tokens": [
         | 
| 3 | 
            +
                ">>TITLE<<",
         | 
| 4 | 
            +
                ">>ABSTRACT<<",
         | 
| 5 | 
            +
                ">>INTRODUCTION<<",
         | 
| 6 | 
            +
                ">>SUMMARY<<",
         | 
| 7 | 
            +
                ">>COMMENT<<",
         | 
| 8 | 
            +
                ">>ANSWER<<",
         | 
| 9 | 
            +
                ">>QUESTION<<",
         | 
| 10 | 
            +
                ">>DOMAIN<<",
         | 
| 11 | 
            +
                ">>EMAIL_ADDRESS<<",
         | 
| 12 | 
            +
                ">>IP_ADDRESS<<",
         | 
| 13 | 
            +
                "<|startoftext|>",
         | 
| 14 | 
            +
                ">>IP_ADDRESS_0<<",
         | 
| 15 | 
            +
                ">>IP_ADDRESS_1<<",
         | 
| 16 | 
            +
                ">>IP_ADDRESS_2<<",
         | 
| 17 | 
            +
                ">>IP_ADDRESS_3<<",
         | 
| 18 | 
            +
                ">>IP_ADDRESS_4<<",
         | 
| 19 | 
            +
                ">>IP_ADDRESS_5<<",
         | 
| 20 | 
            +
                ">>IP_ADDRESS_6<<",
         | 
| 21 | 
            +
                ">>IP_ADDRESS_7<<",
         | 
| 22 | 
            +
                ">>IP_ADDRESS_8<<",
         | 
| 23 | 
            +
                ">>IP_ADDRESS_9<<",
         | 
| 24 | 
            +
                ">>PASSWORD<<",
         | 
| 25 | 
            +
                ">>KEY<<"
         | 
| 26 | 
            +
              ],
         | 
| 27 | 
            +
              "eos_token": {
         | 
| 28 | 
            +
                "content": "<|endoftext|>",
         | 
| 29 | 
            +
                "lstrip": false,
         | 
| 30 | 
            +
                "normalized": false,
         | 
| 31 | 
            +
                "rstrip": false,
         | 
| 32 | 
            +
                "single_word": false
         | 
| 33 | 
            +
              },
         | 
| 34 | 
            +
              "pad_token": {
         | 
| 35 | 
            +
                "content": "<pad>",
         | 
| 36 | 
            +
                "lstrip": false,
         | 
| 37 | 
            +
                "normalized": false,
         | 
| 38 | 
            +
                "rstrip": false,
         | 
| 39 | 
            +
                "single_word": false
         | 
| 40 | 
            +
              }
         | 
| 41 | 
            +
            }
         | 
    	
        tokenizer.json
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        tokenizer_config.json
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
