Commit 
							
							·
						
						48d1fa4
	
1
								Parent(s):
							
							7d47ee8
								
tgi infos
Browse files
    	
        README.md
    CHANGED
    
    | @@ -151,6 +151,40 @@ for i, t in enumerate(generated_text): | |
| 151 | 
             
                print(f"{i}:\n{t}\n")
         | 
| 152 | 
             
            ```
         | 
| 153 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 154 | 
             
            # Training Details
         | 
| 155 |  | 
| 156 | 
             
            ## IDEFICS
         | 
|  | |
| 151 | 
             
                print(f"{i}:\n{t}\n")
         | 
| 152 | 
             
            ```
         | 
| 153 |  | 
| 154 | 
            +
            ## Text generation inference
         | 
| 155 | 
            +
             | 
| 156 | 
            +
            The hosted inference API is powered by [Text Generation Inference](https://github.com/huggingface/text-generation-inference). To query the model, you can use the following code snippet. The key is to pass images as fetchable URLs with the markdown syntax:
         | 
| 157 | 
            +
            ```
         | 
| 158 | 
            +
            from text_generation import Client
         | 
| 159 | 
            +
             | 
| 160 | 
            +
            API_TOKEN = "<YOUR_API_TOKEN>"
         | 
| 161 | 
            +
            API_URL = "https://api-inference.huggingface.co/models/HuggingFaceM4/idefics-80b-instruct"
         | 
| 162 | 
            +
            DECODING_STRATEGY = "Greedy"
         | 
| 163 | 
            +
            QUERY = "User: What is in this image?<end_of_utterance>\nAssistant:"
         | 
| 164 | 
            +
             | 
| 165 | 
            +
            client = Client(
         | 
| 166 | 
            +
                base_url=API_URL,
         | 
| 167 | 
            +
                headers={"x-use-cache": "0", "Authorization": f"Bearer {API_TOKEN}"},
         | 
| 168 | 
            +
            )
         | 
| 169 | 
            +
            generation_args = {
         | 
| 170 | 
            +
                "max_new_tokens": 256,
         | 
| 171 | 
            +
                "repetition_penalty": 1.0,
         | 
| 172 | 
            +
                "stop_sequences": ["<end_of_utterance>", "\nUser:"],
         | 
| 173 | 
            +
            }
         | 
| 174 | 
            +
             | 
| 175 | 
            +
            if DECODING_STRATEGY == "Greedy":
         | 
| 176 | 
            +
                generation_args["do_sample"] = False
         | 
| 177 | 
            +
            elif DECODING_STRATEGY == "Top P Sampling":
         | 
| 178 | 
            +
                generation_args["temperature"] = 1.
         | 
| 179 | 
            +
                generation_args["do_sample"] = True
         | 
| 180 | 
            +
                generation_args["top_p"] = 0.95
         | 
| 181 | 
            +
                
         | 
| 182 | 
            +
            generated_text = client.generate(prompt=QUERY, **generation_args)  
         | 
| 183 | 
            +
            print(generated_text)
         | 
| 184 | 
            +
            ```
         | 
| 185 | 
            +
             | 
| 186 | 
            +
            Note that we currently only host the inference for the instructed models.
         | 
| 187 | 
            +
             | 
| 188 | 
             
            # Training Details
         | 
| 189 |  | 
| 190 | 
             
            ## IDEFICS
         | 

