yutong-dai
commited on
Commit
·
099c0ff
1
Parent(s):
9d0dc73
update inference code to support transformers==4.41.1
Browse files
README.md
CHANGED
|
@@ -52,7 +52,7 @@ The model is for research purposes, more technical details will come with a tech
|
|
| 52 |
|
| 53 |
# How to use
|
| 54 |
|
| 55 |
-
|
| 56 |
|
| 57 |
```python
|
| 58 |
from transformers import AutoModelForVision2Seq, AutoTokenizer, AutoImageProcessor
|
|
@@ -149,4 +149,10 @@ pip install torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https
|
|
| 149 |
pip install open_clip_torch==2.24.0
|
| 150 |
pip install einops
|
| 151 |
pip install einops-exts
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
# How to use
|
| 54 |
|
| 55 |
+
~~> We require the use of the development version (`"4.41.0.dev0"`) of the `transformers` library. To get it, as of 05/07/2024, one can use `pip uninstall -y transformers && pip install git+https://github.com/huggingface/transformers.`~~
|
| 56 |
|
| 57 |
```python
|
| 58 |
from transformers import AutoModelForVision2Seq, AutoTokenizer, AutoImageProcessor
|
|
|
|
| 149 |
pip install open_clip_torch==2.24.0
|
| 150 |
pip install einops
|
| 151 |
pip install einops-exts
|
| 152 |
+
pip install transformers==4.41.1
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
# Changelog
|
| 156 |
+
|
| 157 |
+
* 05/24/2024
|
| 158 |
+
* update codebase to be compatiable with `transformers==4.41.1`.
|
vlm.py
CHANGED
|
@@ -10,6 +10,7 @@ from transformers.modeling_outputs import CausalLMOutputWithPast
|
|
| 10 |
from dataclasses import dataclass
|
| 11 |
from transformers import CLIPVisionModel
|
| 12 |
import transformers
|
|
|
|
| 13 |
|
| 14 |
from .utils import num_params, getattr_recursive, stack_with_padding, get_anyres_image_grid_shape, unpad_image
|
| 15 |
|
|
@@ -1289,8 +1290,7 @@ class Kosmos(VLMWithLanguageStream):
|
|
| 1289 |
padding_side="left",
|
| 1290 |
num_beams=num_beams,
|
| 1291 |
)
|
| 1292 |
-
|
| 1293 |
-
if transformers.__version__ == '4.41.0.dev0':
|
| 1294 |
output = self.lang_model.generate(
|
| 1295 |
**new_inputs,
|
| 1296 |
num_beams=num_beams,
|
|
@@ -1298,11 +1298,5 @@ class Kosmos(VLMWithLanguageStream):
|
|
| 1298 |
eos_token_id=self.end_of_trunk_token_id,
|
| 1299 |
**kwargs)
|
| 1300 |
else:
|
| 1301 |
-
|
| 1302 |
-
**new_inputs,
|
| 1303 |
-
past_key_values=past_key_values,
|
| 1304 |
-
num_beams=num_beams,
|
| 1305 |
-
use_cache=True,
|
| 1306 |
-
eos_token_id=self.end_of_trunk_token_id,
|
| 1307 |
-
**kwargs)
|
| 1308 |
return output
|
|
|
|
| 10 |
from dataclasses import dataclass
|
| 11 |
from transformers import CLIPVisionModel
|
| 12 |
import transformers
|
| 13 |
+
from packaging.version import Version
|
| 14 |
|
| 15 |
from .utils import num_params, getattr_recursive, stack_with_padding, get_anyres_image_grid_shape, unpad_image
|
| 16 |
|
|
|
|
| 1290 |
padding_side="left",
|
| 1291 |
num_beams=num_beams,
|
| 1292 |
)
|
| 1293 |
+
if Version(transformers.__version__) >= Version('4.41.1'):
|
|
|
|
| 1294 |
output = self.lang_model.generate(
|
| 1295 |
**new_inputs,
|
| 1296 |
num_beams=num_beams,
|
|
|
|
| 1298 |
eos_token_id=self.end_of_trunk_token_id,
|
| 1299 |
**kwargs)
|
| 1300 |
else:
|
| 1301 |
+
raise ValueError("Please upgrade transformers to version 4.41.1 or higher.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1302 |
return output
|