Upload folder using huggingface_hub
Browse files- .gitattributes +3 -0
- README.md +599 -0
- added_tokens.json +38 -0
- chat_template.jinja +6 -0
- config.json +121 -0
- examples/image1.jpg +0 -0
- examples/image2.jpg +3 -0
- examples/red-panda.mp4 +3 -0
- generation_config.json +6 -0
- merges.txt +0 -0
- model-00001-of-00002.safetensors +3 -0
- model-00002-of-00002.safetensors +3 -0
- model.safetensors.index.json +849 -0
- preprocessor_config.json +34 -0
- processor_config.json +4 -0
- special_tokens_map.json +44 -0
- tokenizer.json +3 -0
- tokenizer_config.json +339 -0
- video_preprocessor_config.json +70 -0
- vocab.json +0 -0
    	
        .gitattributes
    CHANGED
    
    | @@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
|  | |
|  | |
|  | 
|  | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
| 36 | 
            +
            examples/image2.jpg filter=lfs diff=lfs merge=lfs -text
         | 
| 37 | 
            +
            examples/red-panda.mp4 filter=lfs diff=lfs merge=lfs -text
         | 
| 38 | 
            +
            tokenizer.json filter=lfs diff=lfs merge=lfs -text
         | 
    	
        README.md
    ADDED
    
    | @@ -0,0 +1,599 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            license: apache-2.0
         | 
| 3 | 
            +
            pipeline_tag: image-text-to-text
         | 
| 4 | 
            +
            library_name: transformers
         | 
| 5 | 
            +
            base_model:
         | 
| 6 | 
            +
              - OpenGVLab/InternVL3_5-4B-MPO
         | 
| 7 | 
            +
            base_model_relation: finetune
         | 
| 8 | 
            +
            datasets:
         | 
| 9 | 
            +
              - OpenGVLab/MMPR-v1.2
         | 
| 10 | 
            +
              - OpenGVLab/MMPR-Tiny
         | 
| 11 | 
            +
            language:
         | 
| 12 | 
            +
              - multilingual
         | 
| 13 | 
            +
            tags:
         | 
| 14 | 
            +
              - internvl
         | 
| 15 | 
            +
              - custom_code
         | 
| 16 | 
            +
            ---
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            # InternVL3_5-4B
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            [\[📂 GitHub\]](https://github.com/OpenGVLab/InternVL)  [\[📜 InternVL 1.0\]](https://huggingface.co/papers/2312.14238)  [\[📜 InternVL 1.5\]](https://huggingface.co/papers/2404.16821)  [\[📜 InternVL 2.5\]](https://huggingface.co/papers/2412.05271)  [\[📜 InternVL2.5-MPO\]](https://huggingface.co/papers/2411.10442)  [\[📜 InternVL3\]](https://huggingface.co/papers/2504.10479) [\[📜 InternVL3.5\]](https://huggingface.co/papers/2508.18265)
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            [\[🆕 Blog\]](https://internvl.github.io/blog/)  [\[🗨️ Chat Demo\]](https://chat.intern-ai.org.cn/)  [\[🚀 Quick Start\]](#quick-start)  [\[📖 Documents\]](https://internvl.readthedocs.io/en/latest/)
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            <div align="center">
         | 
| 25 | 
            +
              <img width="500" alt="image" src="https://cdn-uploads.huggingface.co/production/uploads/64006c09330a45b03605bba3/zJsd2hqd3EevgXo6fNgC-.png">
         | 
| 26 | 
            +
            </div>
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            ## Introduction
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            We introduce *InternVL3.5*, a new family of open-source multimodal models that significantly advances versatility, reasoning capability, and inference efficiency along the InternVL series. A key innovation is the *Cascade Reinforcement Learning (Cascade RL)* framework, which enhances reasoning through a two-stage process: offline RL for stable convergence and online RL for refined alignment. This coarse-to-fine training strategy leads to substantial improvements on downstream reasoning tasks, e.g., MMMU and MathVista. To optimize efficiency, we propose a *Visual Resolution Router (ViR)* that dynamically adjusts the resolution of visual tokens without compromising performance. Coupled with ViR, our Decoupled *Vision-Language Deployment (DvD)* strategy separates the vision encoder and language model across different GPUs, effectively balancing computational load. These contributions collectively enable InternVL3.5 to achieve up to a +16.0\% gain in overall reasoning performance and a 4.05 \\(\times\\) inference speedup compared to its predecessor, i.e., InternVL3. In addition, InternVL3.5 supports novel capabilities such as GUI interaction and embodied agency. Notably, our largest model, i.e.,  InternVL3.5-241B-A28B, attains state-of-the-art results among open-source MLLMs across general multimodal, reasoning, text, and agentic tasks—narrowing the performance gap with leading commercial models like GPT-5. All models and code are publicly released.
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            > Hatched bars represent closed-source commercial models. We report average scores on a set of multimodal general, reasoning, text, and agentic benchmarks: MMBench v1.1 (en), MMStar,BLINK, HallusionBench, AI2D, OCRBench, MMVet, MME-RealWorld (en), MVBench, VideoMME, MMMU, MathVista, MathVision, MathVerse, DynaMath, WeMath, LogicVista, MATH500, AIME24, AIME25, GPQA, MMLU-Pro, GAOKAO, IFEval, SGP-Bench, VSI-Bench, ERQA, SpaCE-10, and OmniSpatial.
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            See [quick start](#quick-start) for how to use our model.
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            ## InternVL3.5 Family
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            In the following table, we provide an overview of the InternVL3.5 series.
         | 
| 41 | 
            +
            To maintain consistency with earlier generations, we provide two model formats: [the GitHub format](https://huggingface.co/OpenGVLab/InternVL3_5-241B-A28B), consistent with prior releases, and [the HF format](https://huggingface.co/OpenGVLab/InternVL3_5-241B-A28B-HF), aligned with the official Transformers standard.
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            > If you want to convert the checkpoint between these two formats, please refer to the scripts about [custom2hf](https://github.com/OpenGVLab/InternVL/blob/main/internvl_chat/tools/internvl_custom2hf.py) and [hf2custom](https://github.com/OpenGVLab/InternVL/blob/main/internvl_chat/tools/internvl_hf2custom.py).
         | 
| 44 | 
            +
             | 
| 45 | 
            +
             | 
| 46 | 
            +
            ### Github Format
         | 
| 47 | 
            +
             | 
| 48 | 
            +
             | 
| 49 | 
            +
            | Model                 | #Vision Param | #Language Param | #Total Param | HF Link                                                                        | ModelScope Link                                                                          |
         | 
| 50 | 
            +
            | --------------------- | ------------- | --------------- | ------------ | ------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------- |
         | 
| 51 | 
            +
            | InternVL3.5-1B        | 0.3B          | 0.8B            | 1.1B         | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-1B)                      | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-1B)                      |
         | 
| 52 | 
            +
            | InternVL3.5-2B        | 0.3B          | 2.0B            | 2.3B         | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-2B)                      | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-2B)                      |
         | 
| 53 | 
            +
            | InternVL3.5-4B        | 0.3B          | 4.4B            | 4.7B         | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-4B)                      | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-4B)                      |
         | 
| 54 | 
            +
            | InternVL3.5-8B        | 0.3B          | 8.2B            | 8.5B         | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-8B)                      | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-8B)                      |
         | 
| 55 | 
            +
            | InternVL3.5-14B       | 0.3B          | 14.8B           | 15.1B        | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-14B)                     | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-14B)                     |
         | 
| 56 | 
            +
            | InternVL3.5-38B       | 5.5B          | 32.8B           | 38.4B        | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-38B)                     | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-38B)                     |
         | 
| 57 | 
            +
            | InternVL3.5-20B-A4B   | 0.3B          | 20.9B           | 21.2B-A4B    | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview) | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview) |
         | 
| 58 | 
            +
            | InternVL3.5-30B-A3B   | 0.3B          | 30.5B           | 30.8B-A3B    | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-30B-A3B)                 | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-30B-A3B)                 |
         | 
| 59 | 
            +
            | InternVL3.5-241B-A28B | 5.5B          | 235.1B          | 240.7B-A29B  | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-241B-A28B)               | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-241B-A28B)               |
         | 
| 60 | 
            +
             | 
| 61 | 
            +
             | 
| 62 | 
            +
            ### HuggingFace Format
         | 
| 63 | 
            +
             | 
| 64 | 
            +
             | 
| 65 | 
            +
            | Model                    | #Vision Param | #Language Param | #Total Param | HF Link                                                                           | ModelScope Link                                                                             |
         | 
| 66 | 
            +
            | ------------------------ | ------------- | --------------- | ------------ | --------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------- |
         | 
| 67 | 
            +
            | InternVL3.5-1B-HF        | 0.3B          | 0.8B            | 1.1B         | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-1B-HF)                      | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-1B-HF)                      |
         | 
| 68 | 
            +
            | InternVL3.5-2B-HF        | 0.3B          | 2.0B            | 2.3B         | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-2B-HF)                      | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-2B-HF)                      |
         | 
| 69 | 
            +
            | InternVL3.5-4B-HF        | 0.3B          | 4.4B            | 4.7B         | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-4B-HF)                      | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-4B-HF)                      |
         | 
| 70 | 
            +
            | InternVL3.5-8B-HF        | 0.3B          | 8.2B            | 8.5B         | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-8B-HF)                      | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-8B-HF)                      |
         | 
| 71 | 
            +
            | InternVL3.5-14B-HF       | 0.3B          | 14.8B           | 15.1B        | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-14B-HF)                     | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-14B-HF)                     |
         | 
| 72 | 
            +
            | InternVL3.5-38B-HF       | 5.5B          | 32.8B           | 38.4B        | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-38B-HF)                     | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-38B-HF)                     |
         | 
| 73 | 
            +
            | InternVL3.5-20B-A4B-HF   | 0.3B          | 20.9B           | 21.2B-A4B    | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview-HF) | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview-HF) |
         | 
| 74 | 
            +
            | InternVL3.5-30B-A3B-HF   | 0.3B          | 30.5B           | 30.8B-A3B    | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-30B-A3B-HF)                 | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-30B-A3B-HF)                 |
         | 
| 75 | 
            +
            | InternVL3.5-241B-A28B-HF | 5.5B          | 235.1B          | 240.7B-A29B  | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-241B-A28B-HF)               | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-241B-A28B-HF)               |
         | 
| 76 | 
            +
             | 
| 77 | 
            +
             | 
| 78 | 
            +
            
         | 
| 79 | 
            +
             | 
| 80 | 
            +
            > We conduct the evaluation with [VLMEvalkit](https://github.com/open-compass/VLMEvalKit). ***To enable the Thinking mode of our model, please set the system prompt to [R1_SYSTEM_PROMPT](https://github.com/open-compass/VLMEvalKit/blob/main/vlmeval/vlm/internvl/internvl_chat.py#L38).*** When enabling Thinking mode, we recommend setting `do_sample=True` and `temperature=0.6` to mitigate undesired repetition.
         | 
| 81 | 
            +
             | 
| 82 | 
            +
            Our training pipeline comprises four stages: Multimodal Continual Pre-Training (**CPT**), Supervised Fine-Tuning (**SFT**), and Cascade Reinforcement Learning (**CascadeRL**). In CascadeRL, we first fine-tune the model using Mixed Preference Optimization (**MPO**) under an offline RL setting, followed by **GSPO** under an oneline RL setting.
         | 
| 83 | 
            +
            For the Flash version of InternVL3.5, we additionally introduce a lightweight training stage, termed Visual Consistency Learning (**ViCO**), which reduces the token cost required to represent an image patch.
         | 
| 84 | 
            +
             | 
| 85 | 
            +
            
         | 
| 86 | 
            +
             | 
| 87 | 
            +
            Here, we also open-source the model weights after different training stages for potential research usage.
         | 
| 88 | 
            +
            ***If you're unsure which version to use, please select the one without any suffix, as it has completed the full training pipeline.***
         | 
| 89 | 
            +
             | 
| 90 | 
            +
             | 
| 91 | 
            +
            | Model                            | Training Pipeline     | HF Link                                                                     | ModelScope Link                                                                       |
         | 
| 92 | 
            +
            | -------------------------------- | --------------------- | --------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- |
         | 
| 93 | 
            +
            | InternVL3.5-1B-Pretrained        | CPT                   | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-1B-Pretrained)        | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-1B-Pretrained)        |
         | 
| 94 | 
            +
            | InternVL3.5-1B-Instruct          | CPT + SFT             | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-1B-Instruct)          | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-1B-Instruct)          |
         | 
| 95 | 
            +
            | InternVL3.5-1B-MPO               | CPT + SFT + MPO       | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-1B-MPO)               | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-1B-MPO)               |
         | 
| 96 | 
            +
            | InternVL3.5-1B                   | CPT + SFT + CascadeRL | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-1B)                   | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-1B)                   |
         | 
| 97 | 
            +
            | InternVL3.5-2B-Pretrained        | CPT                   | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-2B-Pretrained)        | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-2B-Pretrained)        |
         | 
| 98 | 
            +
            | InternVL3.5-2B-Instruct          | CPT + SFT             | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-2B-Instruct)          | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-2B-Instruct)          |
         | 
| 99 | 
            +
            | InternVL3.5-2B-MPO               | CPT + SFT + MPO       | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-2B-MPO)               | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-2B-MPO)               |
         | 
| 100 | 
            +
            | InternVL3.5-2B                   | CPT + SFT + CascadeRL | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-2B)                   | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-2B)                   |
         | 
| 101 | 
            +
            | InternVL3.5-4B-Pretrained        | CPT                   | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-4B-Pretrained)        | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-4B-Pretrained)        |
         | 
| 102 | 
            +
            | InternVL3.5-4B-Instruct          | CPT + SFT             | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-4B-Instruct)          | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-4B-Instruct)          |
         | 
| 103 | 
            +
            | InternVL3.5-4B-MPO               | CPT + SFT + MPO       | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-4B-MPO)               | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-4B-MPO)               |
         | 
| 104 | 
            +
            | InternVL3.5-4B                   | CPT + SFT + CascadeRL | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-4B)                   | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-4B)                   |
         | 
| 105 | 
            +
            | InternVL3.5-8B-Pretrained        | CPT                   | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-8B-Pretrained)        | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-8B-Pretrained)        |
         | 
| 106 | 
            +
            | InternVL3.5-8B-Instruct          | CPT + SFT             | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-8B-Instruct)          | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-8B-Instruct)          |
         | 
| 107 | 
            +
            | InternVL3.5-8B-MPO               | CPT + SFT + MPO       | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-8B-MPO)               | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-8B-MPO)               |
         | 
| 108 | 
            +
            | InternVL3.5-8B                   | CPT + SFT + CascadeRL | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-8B)                   | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-8B)                   |
         | 
| 109 | 
            +
            | InternVL3.5-14B-Pretrained       | CPT                   | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-14B-Pretrained)       | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-14B-Pretrained)       |
         | 
| 110 | 
            +
            | InternVL3.5-14B-Instruct         | CPT + SFT             | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-14B-Instruct)         | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-14B-Instruct)         |
         | 
| 111 | 
            +
            | InternVL3.5-14B-MPO              | CPT + SFT + MPO       | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-14B-MPO)              | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-14B-MPO)              |
         | 
| 112 | 
            +
            | InternVL3.5-14B                  | CPT + SFT + CascadeRL | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-14B)                  | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-14B)                  |
         | 
| 113 | 
            +
            | InternVL3.5-30B-A3B-Pretrained   | CPT                   | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-30B-A3B-Pretrained)   | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-30B-A3B-Pretrained)   |
         | 
| 114 | 
            +
            | InternVL3.5-30B-A3B-Instruct     | CPT + SFT             | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-30B-A3B-Instruct)     | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-30B-A3B-Instruct)     |
         | 
| 115 | 
            +
            | InternVL3.5-30B-A3B-MPO          | CPT + SFT + MPO       | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-30B-A3B-MPO)          | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-30B-A3B-MPO)          |
         | 
| 116 | 
            +
            | InternVL3.5-30B-A3B              | CPT + SFT + CascadeRL | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-30B-A3B)              | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-30B-A3B)              |
         | 
| 117 | 
            +
            | InternVL3.5-38B-Pretrained       | CPT                   | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-38B-Pretrained)       | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-38B-Pretrained)       |
         | 
| 118 | 
            +
            | InternVL3.5-38B-Instruct         | CPT + SFT             | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-38B-Instruct)         | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-38B-Instruct)         |
         | 
| 119 | 
            +
            | InternVL3.5-38B-MPO              | CPT + SFT + MPO       | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-38B-MPO)              | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-38B-MPO)              |
         | 
| 120 | 
            +
            | InternVL3.5-38B                  | CPT + SFT + CascadeRL | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-38B)                  | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-38B)                  |
         | 
| 121 | 
            +
            | InternVL3.5-241B-A28B-Pretrained | CPT                   | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-241B-A28B-Pretrained) | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-241B-A28B-Pretrained) |
         | 
| 122 | 
            +
            | InternVL3.5-241B-A28B-Instruct   | CPT + SFT             | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-241B-A28B-Instruct)   | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-241B-A28B-Instruct)   |
         | 
| 123 | 
            +
            | InternVL3.5-241B-A28B-MPO        | CPT + SFT + MPO       | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-241B-A28B-MPO)        | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-241B-A28B-MPO)        |
         | 
| 124 | 
            +
            | InternVL3.5-241B-A28B            | CPT + SFT + CascadeRL | [🤗 link](https://huggingface.co/OpenGVLab/InternVL3_5-241B-A28B)            | [🤖 link](https://www.modelscope.cn/models/OpenGVLab/InternVL3_5-241B-A28B)            |
         | 
| 125 | 
            +
             | 
| 126 | 
            +
             | 
| 127 | 
            +
            The Flash version of our model will be released as soon as possible.
         | 
| 128 | 
            +
             | 
| 129 | 
            +
             | 
| 130 | 
            +
             | 
| 131 | 
            +
            ## Model Architecture
         | 
| 132 | 
            +
             | 
| 133 | 
            +
            `InternVL3.5`:
         | 
| 134 | 
            +
            This series of models follow the "ViT–MLP–LLM" paradigm adopted in previous versions of InternVL.
         | 
| 135 | 
            +
            We initialize the language model using the Qwen3 series and GPT-OSS, and the vision encoder using InternViT-300M and InternViT-6B.
         | 
| 136 | 
            +
            The Dynamic High Resolution strategy introduced in InternVL1.5 is also retained in our design.
         | 
| 137 | 
            +
             | 
| 138 | 
            +
             | 
| 139 | 
            +
            `InternVL3.5-Flash`:
         | 
| 140 | 
            +
            Compared to InternVL3.5, InternVL3.5-Flash further integrates the *Visual Resolution Router (ViR)*, thus yielding a series of  efficient variants friendly  suitable for  resource-constrained scenarios. 
         | 
| 141 | 
            +
            Specifically, in InternVL3.5, each image patch is initially represented as 1024 visual tokens for the vision encoder, which are then compressed into 256 tokens via a pixel shuffle module before being passed to the Large Language Model (LLM).
         | 
| 142 | 
            +
            In InternVL3.5-Flash, as shown in the Figure below, an additional pixel shuffle module with a higher compression rate is included, enabling the compression of visual tokens down to 64 tokens.
         | 
| 143 | 
            +
            For each patch, the patch router determines the appropriate compression rate by assessing its semantic richness, and routes it to the corresponding pixel shuffle module accordingly.
         | 
| 144 | 
            +
            Benefiting from this patch-aware compression mechanism, InternVL3.5-Flash is able to reduce the number of visual tokens by 50\% while maintaining nearly 100\% of the performance of InternVL3.5.
         | 
| 145 | 
            +
             | 
| 146 | 
            +
             | 
| 147 | 
            +
            
         | 
| 148 | 
            +
             | 
| 149 | 
            +
            ## Training and Deployment Strategy
         | 
| 150 | 
            +
             | 
| 151 | 
            +
            ### Pre-Training
         | 
| 152 | 
            +
             | 
| 153 | 
            +
            During the pre-training stage, we update all model parameters jointly using the combination of large-scale text and multimodal corpora. Specifically, given an arbitrary training sample consisting of a multimodal token sequence \\(\mathbf{x}=\left(x_1, x_2, \ldots, x_L\right)\\), the next token prediction (NTP) loss is calculated on each text token as follows:
         | 
| 154 | 
            +
             | 
| 155 | 
            +
            $$
         | 
| 156 | 
            +
                \mathcal{L}_{i}=-\log p_\theta\left(x_i \mid x_1, \ldots, x_{i-1}\right),
         | 
| 157 | 
            +
            $$
         | 
| 158 | 
            +
             | 
| 159 | 
            +
            where \\(x_i\\) is the predicted token and  prefix tokens in \\(\{x_1, x_2, \ldots, x_{i-1}\}\\) can be either  text tokens or  image tokens. Notably, for conversation samples, only response tokens  are included for the calculation of the loss.
         | 
| 160 | 
            +
            Additionally, to mitigate bias toward either longer or shorter responses during training, we adopt the square averaging to re-weight the NTP loss  as follows:
         | 
| 161 | 
            +
             | 
| 162 | 
            +
            $$
         | 
| 163 | 
            +
            \mathcal{L}_{i}^{'} = \frac{w_i}{\sum_j w_j} \cdot \mathcal{L}_i, \quad w_i = \frac{1}{N^{0.5}},
         | 
| 164 | 
            +
            $$
         | 
| 165 | 
            +
             | 
| 166 | 
            +
            where \\(N\\) denotes the number of tokens in the training sample on which the loss needs to be calculated. The random JPEG compression is also included to enhance the model's real-world performance.
         | 
| 167 | 
            +
             | 
| 168 | 
            +
            ### Supervised Fine-Tuning
         | 
| 169 | 
            +
             | 
| 170 | 
            +
            During the SFT phase, we adopt the same objective as in the pre-training stage and use the  square-root averaging strategy to calculate the final loss.  In this stage, the context window is set to 32K tokens to adapt long-context information.
         | 
| 171 | 
            +
            Compared to InternVL3, the SFT stage of InternVL3.5 contains  more high-quality and  diverse training data derived from three sources: 
         | 
| 172 | 
            +
             | 
| 173 | 
            +
            (1) Instruction-following data from InternVL3, which are reused to preserve broad coverage of vision–language tasks. 
         | 
| 174 | 
            +
             | 
| 175 | 
            +
            (2) Multimodal reasoning data in the "Thinking" mode, which are included to instill long-thinking capabilities in the model. To construct such data, we first use InternVL3-78B to describe the image and then input the description into DeepSeek-R1 to sample rollouts with detailed reasoning processes. Rollouts with an incorrect final answer are filtered out. The questions in these datasets cover various expert domains, such as mathematics and scientific disciplines, thereby strengthening performance on different reasoning tasks. 
         | 
| 176 | 
            +
             | 
| 177 | 
            +
            (3) Capability-expansion datasets, which endow InternVL3.5 with new skills, including GUI-based interaction, embodied interaction, and scalable vect
         | 
| 178 | 
            +
             | 
| 179 | 
            +
            ### Cascade Reinforcement Learning
         | 
| 180 | 
            +
             | 
| 181 | 
            +
            Cascade RL aims to combine the benefits of offline RL and online RL to progressively facilitate the post-training of MLLMs in an efficient manner.
         | 
| 182 | 
            +
            Specifically, we first fine-tune the model using an offline RL algorithm as an efficient warm-up stage to reach a satisfied results, which can guarantee the high-quality rollouts for the latter stage. 
         | 
| 183 | 
            +
            Subsequently, we employ an online RL algorithm to further refine the output distribution based on rollouts generated by the model itself.  Compared to the single offline or online RL stage, our cascaded RL achieves significant performance improvements at a fraction of the GPU time cost.
         | 
| 184 | 
            +
             | 
| 185 | 
            +
             | 
| 186 | 
            +
             | 
| 187 | 
            +
            During the offline RL stage, we employ mixed preference optimization (MPO) to fine-tune the model. Specifically, the training objective of MPO is a combination of preference loss \\(\mathcal{L}_{p}\\), quality loss \\(\mathcal{L}_{q}\\), and generation loss \\(\mathcal{L}_{g}\\), which can be formulated as follows:
         | 
| 188 | 
            +
             | 
| 189 | 
            +
            $$
         | 
| 190 | 
            +
                \mathcal{L}_{\text{MPO}}=
         | 
| 191 | 
            +
                w_{p} \mathcal{L}_{p}
         | 
| 192 | 
            +
                +
         | 
| 193 | 
            +
                w_{q} \mathcal{L}_{q}
         | 
| 194 | 
            +
                +
         | 
| 195 | 
            +
                w_{g} \mathcal{L}_{g}
         | 
| 196 | 
            +
                ,
         | 
| 197 | 
            +
            $$
         | 
| 198 | 
            +
             | 
| 199 | 
            +
            where \\(w_{*}\\) represents the weight assigned to each loss component.
         | 
| 200 | 
            +
            The DPO loss, BCO loss, and LM loss serve as the preference loss, quality loss, and generation loss, respectively.
         | 
| 201 | 
            +
             | 
| 202 | 
            +
             | 
| 203 | 
            +
            During the online RL stage, we employ GSPO, without reference model constraints, as our online RL algorithm, which we find more effective in training both dense and mixture-of-experts (MoE) models. Similar to GRPO, the advantage is defined as the normalized reward across responses sampled from the same query.
         | 
| 204 | 
            +
            The training objective of GSPO is given by:
         | 
| 205 | 
            +
             | 
| 206 | 
            +
            $$
         | 
| 207 | 
            +
                \mathcal{L}_{\mathrm{GSPO}}(\theta)=\mathbb{E}_{x \sim \mathcal{D},\left\{y_i\right\}_{i=1}^G \sim \pi_{\theta \text { old }}(\cdot \mid x)}\left[\frac{1}{G} \sum_{i=1}^G \min \left(s_i(\theta) \widehat{A}_i, \operatorname{clip}\left(s_i(\theta), 1-\varepsilon, 1+\varepsilon\right) \widehat{A}_i\right)\right],
         | 
| 208 | 
            +
            $$
         | 
| 209 | 
            +
             | 
| 210 | 
            +
            where the importance sampling ratio is defined as the geometric mean of the per-token ratios.
         | 
| 211 | 
            +
             | 
| 212 | 
            +
            > Please see [our paper](https://huggingface.co/papers/2508.18265) for more technical and experimental details.
         | 
| 213 | 
            +
             | 
| 214 | 
            +
             | 
| 215 | 
            +
            ### Visual Consistency Learning
         | 
| 216 | 
            +
             | 
| 217 | 
            +
             | 
| 218 | 
            +
            We further include ViCO as an additional training stage to integrate the *visual resolution router (ViR)* into InternVL3.5, thereby reducing the inference cost of InternVL3.5. The obtained efficient version of InternVL3.5 are termed as *InternVL3.5-Flash*. In particular, ViCO comprises two stages:
         | 
| 219 | 
            +
             | 
| 220 | 
            +
            `Consistency training`:
         | 
| 221 | 
            +
            In this stage, the entire model is trained to minimize the divergence between response distributions conditioned on visual tokens with different compression rates.
         | 
| 222 | 
            +
            In practice, we introduce an extra reference model, which is frozen and initialized with InternVL3.5.
         | 
| 223 | 
            +
            Given a sample, each image patch is represented as either 256 or 64 tokens, and the training objective is defined as follows:
         | 
| 224 | 
            +
             | 
| 225 | 
            +
             | 
| 226 | 
            +
            $$
         | 
| 227 | 
            +
            \mathcal{L}_\text{ViCO} =
         | 
| 228 | 
            +
            \mathbb{E}_{\xi \sim \mathcal{R}} \Bigg[
         | 
| 229 | 
            +
            \frac{1}{N} \sum_{i=1}^{N} \mathrm{KL} \Big(
         | 
| 230 | 
            +
            \pi_{\theta_{ref}}\left(y_i \mid y_{<i}, I\right) \;\Big\|\;
         | 
| 231 | 
            +
            \pi_{\theta_{policy}}\left(y_i \mid y_{<i}, I_\xi\right)
         | 
| 232 | 
            +
            \Big)
         | 
| 233 | 
            +
            \Bigg],
         | 
| 234 | 
            +
            $$
         | 
| 235 | 
            +
             | 
| 236 | 
            +
            where \\(\mathrm{KL}\) denotes the KL divergence and \(\xi\) denotes the compression rate, which is uniformly sampled from \(\{\frac{1}{4},\frac{1}{16}\}\). The image \(I_\xi\) is represented as 256 tokens when \(\xi=\frac{1}{4}\) and 64 tokens when \(\xi=\frac{1}{16}\). Notably, the reference model always performs inference with \(\xi=\frac{1}{4}\).
         | 
| 237 | 
            +
             | 
| 238 | 
            +
             | 
| 239 | 
            +
            `Router training`:
         | 
| 240 | 
            +
            This stage aims to train the ViR to select an appropriate trade-off resolution for different inputs.
         | 
| 241 | 
            +
            ViR is formulated as a binary classifier and trained using standard cross-entropy loss.
         | 
| 242 | 
            +
            To construct the route targets, we first compute the KL divergence between the model outputs conditioned on uncompressed visual tokens (i.e., 256 tokens per patch) and those conditioned on compressed visual tokens (i.e., 64 tokens per patch).
         | 
| 243 | 
            +
            During this stage, the main MLLM (ViT, MLP and LLM) is kept frozen, and only the ViR is trained.
         | 
| 244 | 
            +
            Specifically, we first compute the loss ratio for each patch:
         | 
| 245 | 
            +
             | 
| 246 | 
            +
            $$
         | 
| 247 | 
            +
            r_i = \frac{\mathcal{L}_\text{ViCO}\big(y_i \mid I_{\frac{1}{16}}\big)}{\mathcal{L}_\text{ViCO}\big(y_i \mid I_{\frac{1}{4}}\big)},
         | 
| 248 | 
            +
            $$
         | 
| 249 | 
            +
             | 
| 250 | 
            +
            which quantifies the relative increase in loss caused by compressing the visual tokens. Based on this ratio, the binary ground-truth label for the patch router is defined as:
         | 
| 251 | 
            +
             | 
| 252 | 
            +
            $$
         | 
| 253 | 
            +
            y_i^\text{router} =
         | 
| 254 | 
            +
            \begin{cases}
         | 
| 255 | 
            +
            0, & r_i < \tau \; \text{(compression has negligible impact)} \\
         | 
| 256 | 
            +
            1, & r_i \ge \tau \; \text{(compression has significant impact)},
         | 
| 257 | 
            +
            \end{cases}
         | 
| 258 | 
            +
            $$
         | 
| 259 | 
            +
             | 
| 260 | 
            +
            where \(y_i^{\text{router}}=0\) and \(y_i^{\text{router}}=1\)  indicate that the compression rate \(\xi\) is set to \(\tfrac{1}{16}\) and \(\tfrac{1}{4}\), respectively.
         | 
| 261 | 
            +
             | 
| 262 | 
            +
            > Please see [our paper](https://huggingface.co/papers/2508.18265) for more technical and experimental details.
         | 
| 263 | 
            +
             | 
| 264 | 
            +
             | 
| 265 | 
            +
            ### Test-Time Scaling
         | 
| 266 | 
            +
             | 
| 267 | 
            +
             | 
| 268 | 
            +
            Test-time scaling (TTS) has been empirically demonstrated as an effective approach to enhance the reasoning capabilities of LLMs and MLLMs, particularly for complex tasks necessitating multi-step inference.
         | 
| 269 | 
            +
            In this work, we implement a comprehensive test-time scaling approach that simultaneously improves reasoning depth (i.e., deep thinking) and breadth (i.e., parallel thinking).
         | 
| 270 | 
            +
             | 
| 271 | 
            +
            `Deep Thinking`: By activating the Thinking mode, we guide the model to deliberately engage in step-by-step reasoning (i.e., decomposing complex problems into logical steps and validating intermediate conclusions) prior to generating the final answer. This approach systematically improves the logical structure of solutions for complex problems, particularly those requiring multi-step inference, and enhances reasoning depth.
         | 
| 272 | 
            +
             | 
| 273 | 
            +
            `Parallel Thinking`: Following InternVL3, for reasoning tasks, we adopt the Best-of-N (BoN) strategy by employing [VisualPRM-v1.1](https://huggingface.co/OpenGVLab/VisualPRM-8B-v1_1) as the critic model to select the optimal response from multiple reasoning candidates.
         | 
| 274 | 
            +
            This approach improves reasoning breadth.
         | 
| 275 | 
            +
             | 
| 276 | 
            +
            > Notably, unless otherwise specified, the experimental results reported in our paper are obtained without applying TTS. Thus far, we have only applied TTS to reasoning benchmarks, since we found that the model already exhibits strong perception and understanding capabilities, and initiating TTS yields no significant improvement.
         | 
| 277 | 
            +
             | 
| 278 | 
            +
             | 
| 279 | 
            +
            ### Decoupled Vision-Language Deployment
         | 
| 280 | 
            +
             | 
| 281 | 
            +
            In multimodal inference, the vision encoder and language model have distinct computational characteristics. The vision encoder that transforms images into semantic features is highly parallelizable and does not rely on long-term history state.  In contrast,  the language model adopts the inference in an autoregressive manner, which requires previous states to compute the next one. This sequential property makes the language part more sensitive to memory bandwidth and latency. 
         | 
| 282 | 
            +
            When MLLMs are deployed online at scale, the vision and language models often block each other, thus incurring additional inference cost. This effect becomes more pronounced with larger vision models or higher-resolution images.
         | 
| 283 | 
            +
             | 
| 284 | 
            +
            
         | 
| 285 | 
            +
             | 
| 286 | 
            +
            As shown in the Figure above, we propose decoupled vision-language deployment (DvD) to address this issue by separating vision and language processing, with a particular focus on optimizing the prefilling stage. The vision subsystem batches and processes images to produce compact feature embeddings, which are then transmitted to the language subsystem for fusion with the text context prior to decoding. This separation alleviates blocking and brings multimodal prefilling performance closer to that of pure language models.
         | 
| 287 | 
            +
            In our system implementation, the ViT and MLP (and ViR for InternVL3.5-Flash) are deployed on the vision server, while the language server executes only the LLM. The communication is unidirectional, transmitting BF16 visual features over TCP, with RDMA optionally employed to achieve higher transmission speed. Vision processing, feature transmission, and language processing are organized into an asynchronous three-stage pipeline, enabling overlapped execution and minimizing pipeline stalls.
         | 
| 288 | 
            +
             | 
| 289 | 
            +
             | 
| 290 | 
            +
            DvD increases GPU utilization and processing efficiency on the vision side, while enabling the language server to focus exclusively on the LLM’s prefilling and decoding without being blocked by vision computation. This design leads to improved throughput and responsiveness. Moreover, the architecture supports independent hardware cost optimization for the vision and language modules, and facilitates the seamless integration of new modules without requiring modifications to the language server deployment.
         | 
| 291 | 
            +
             | 
| 292 | 
            +
             | 
| 293 | 
            +
            ## Evaluation on Multimodal Capability
         | 
| 294 | 
            +
             | 
| 295 | 
            +
            ### Multimodal Reasoning and Mathematics
         | 
| 296 | 
            +
             | 
| 297 | 
            +
            
         | 
| 298 | 
            +
             | 
| 299 | 
            +
            ### OCR, Chart, and Document Understanding
         | 
| 300 | 
            +
             | 
| 301 | 
            +
            
         | 
| 302 | 
            +
             | 
| 303 | 
            +
            ### Multi-Image Understanding & Real-World Comprehension
         | 
| 304 | 
            +
             | 
| 305 | 
            +
            
         | 
| 306 | 
            +
             | 
| 307 | 
            +
            ### Comprehensive Multimodal Understanding & Multimodal Hallucination Evaluation
         | 
| 308 | 
            +
             | 
| 309 | 
            +
            
         | 
| 310 | 
            +
             | 
| 311 | 
            +
            ### Visual Grounding
         | 
| 312 | 
            +
             | 
| 313 | 
            +
            
         | 
| 314 | 
            +
             | 
| 315 | 
            +
            ### Multimodal Multilingual Understanding
         | 
| 316 | 
            +
             | 
| 317 | 
            +
            
         | 
| 318 | 
            +
             | 
| 319 | 
            +
            ### Video Understanding
         | 
| 320 | 
            +
             | 
| 321 | 
            +
            
         | 
| 322 | 
            +
             | 
| 323 | 
            +
            ### GUI Tasks
         | 
| 324 | 
            +
             | 
| 325 | 
            +
            
         | 
| 326 | 
            +
             | 
| 327 | 
            +
            ### Embodied Tasks
         | 
| 328 | 
            +
             | 
| 329 | 
            +
            
         | 
| 330 | 
            +
             | 
| 331 | 
            +
            ### SVG Tasks
         | 
| 332 | 
            +
             | 
| 333 | 
            +
            
         | 
| 334 | 
            +
             | 
| 335 | 
            +
            
         | 
| 336 | 
            +
             | 
| 337 | 
            +
            ## Evaluation on Language Capability
         | 
| 338 | 
            +
             | 
| 339 | 
            +
            
         | 
| 340 | 
            +
             | 
| 341 | 
            +
            ## Ablation Study
         | 
| 342 | 
            +
             | 
| 343 | 
            +
            ### Cascade Reinforcement Learning
         | 
| 344 | 
            +
             | 
| 345 | 
            +
            
         | 
| 346 | 
            +
             | 
| 347 | 
            +
            
         | 
| 348 | 
            +
             | 
| 349 | 
            +
            ### Decoupled Vision-Language Deployment
         | 
| 350 | 
            +
             | 
| 351 | 
            +
             | 
| 352 | 
            +
            
         | 
| 353 | 
            +
             | 
| 354 | 
            +
            ## Quick Start
         | 
| 355 | 
            +
             | 
| 356 | 
            +
            We provide an example code to run `InternVL3.5-8B-HF` using `transformers`. Please note that our models with up to 30B parameters can be deployed on a single A100 GPU, while the 38B model requires two A100 GPUs and the 235B model requires eight A100 GPUs.
         | 
| 357 | 
            +
             | 
| 358 | 
            +
            > In most cases, both [LMDeploy](https://github.com/InternLM/lmdeploy) and [vLLM](https://github.com/vllm-project/vllm) can be used for model deployment. However, for InternVL3.5-20B-A4B, we recommend using vLLM since lmdeploy has not yet supported GPT-OSS.
         | 
| 359 | 
            +
             | 
| 360 | 
            +
            > Please use transformers>=4.52.1 to ensure the model works normally. For the 20B version of our model, transformers>=4.55.0 is required.
         | 
| 361 | 
            +
             | 
| 362 | 
            +
            ### Model Loading
         | 
| 363 | 
            +
             | 
| 364 | 
            +
            #### 16-bit (bf16 / fp16)
         | 
| 365 | 
            +
             | 
| 366 | 
            +
            ```python
         | 
| 367 | 
            +
            import torch
         | 
| 368 | 
            +
            from transformers import AutoTokenizer, AutoModel
         | 
| 369 | 
            +
            path = "OpenGVLab/InternVL3_5-8B-HF"
         | 
| 370 | 
            +
            model = AutoModel.from_pretrained(
         | 
| 371 | 
            +
                path,
         | 
| 372 | 
            +
                torch_dtype=torch.bfloat16,
         | 
| 373 | 
            +
                low_cpu_mem_usage=True,
         | 
| 374 | 
            +
                use_flash_attn=True,
         | 
| 375 | 
            +
                trust_remote_code=True).eval().cuda()
         | 
| 376 | 
            +
            ```
         | 
| 377 | 
            +
             | 
| 378 | 
            +
            #### BNB 8-bit Quantization
         | 
| 379 | 
            +
             | 
| 380 | 
            +
            ```python
         | 
| 381 | 
            +
            import torch
         | 
| 382 | 
            +
            from transformers import AutoTokenizer, AutoModel
         | 
| 383 | 
            +
            path = "OpenGVLab/InternVL3_5-8B-HF"
         | 
| 384 | 
            +
            model = AutoModel.from_pretrained(
         | 
| 385 | 
            +
                path,
         | 
| 386 | 
            +
                torch_dtype=torch.bfloat16,
         | 
| 387 | 
            +
                load_in_8bit=True,
         | 
| 388 | 
            +
                low_cpu_mem_usage=True,
         | 
| 389 | 
            +
                use_flash_attn=True,
         | 
| 390 | 
            +
                trust_remote_code=True).eval()
         | 
| 391 | 
            +
            ```
         | 
| 392 | 
            +
             | 
| 393 | 
            +
            #### Multiple GPUs
         | 
| 394 | 
            +
             | 
| 395 | 
            +
            ```python
         | 
| 396 | 
            +
            import math
         | 
| 397 | 
            +
            import torch
         | 
| 398 | 
            +
            from transformers import AutoTokenizer, AutoModel
         | 
| 399 | 
            +
             | 
| 400 | 
            +
            path = "OpenGVLab/InternVL3_5-8B-HF"
         | 
| 401 | 
            +
            model = AutoModel.from_pretrained(
         | 
| 402 | 
            +
                path,
         | 
| 403 | 
            +
                torch_dtype=torch.bfloat16,
         | 
| 404 | 
            +
                low_cpu_mem_usage=True,
         | 
| 405 | 
            +
                use_flash_attn=True,
         | 
| 406 | 
            +
                trust_remote_code=True,
         | 
| 407 | 
            +
                device_map="auto").eval()
         | 
| 408 | 
            +
            ```
         | 
| 409 | 
            +
             | 
| 410 | 
            +
            ### Thinking Mode
         | 
| 411 | 
            +
             | 
| 412 | 
            +
            To enable thinking mode, please set the system prompt to our Thinking System Prompt. When enabling Thinking mode, we recommend setting `do_sample=True` and `temperature=0.6` to mitigate undesired repetition.
         | 
| 413 | 
            +
             | 
| 414 | 
            +
            ```python
         | 
| 415 | 
            +
            R1_SYSTEM_PROMPT = """
         | 
| 416 | 
            +
            You are an AI assistant that rigorously follows this response protocol:
         | 
| 417 | 
            +
             | 
| 418 | 
            +
            1. First, conduct a detailed analysis of the question. Consider different angles, potential solutions, and reason through the problem step-by-step. Enclose this entire thinking process within <think> and </think> tags.
         | 
| 419 | 
            +
             | 
| 420 | 
            +
            2. After the thinking section, provide a clear, concise, and direct answer to the user's question. Separate the answer from the think section with a newline.
         | 
| 421 | 
            +
             | 
| 422 | 
            +
            Ensure that the thinking process is thorough but remains focused on the query. The final answer should be standalone and not reference the thinking section.
         | 
| 423 | 
            +
            """.strip()
         | 
| 424 | 
            +
             | 
| 425 | 
            +
            messages = [
         | 
| 426 | 
            +
                {
         | 
| 427 | 
            +
                    "role": "system",
         | 
| 428 | 
            +
                    "content": [
         | 
| 429 | 
            +
                        {"type": "text", "text": R1_SYSTEM_PROMPT},
         | 
| 430 | 
            +
                    ],
         | 
| 431 | 
            +
                },
         | 
| 432 | 
            +
                {
         | 
| 433 | 
            +
                    "role": "user",
         | 
| 434 | 
            +
                    "content": [
         | 
| 435 | 
            +
                        {"type": "text", "text": "xxx"},
         | 
| 436 | 
            +
                    ],
         | 
| 437 | 
            +
                },
         | 
| 438 | 
            +
            ]
         | 
| 439 | 
            +
            ```
         | 
| 440 | 
            +
             | 
| 441 | 
            +
            ### Inference with Transformers
         | 
| 442 | 
            +
             | 
| 443 | 
            +
            The HuggingFace format checkpoints of our models are fully consistent with the APIs of the official HuggingFace models. For details, please refer to the official [documentation](https://huggingface.co/docs/transformers/v4.55.4/en/model_doc/internvl).
         | 
| 444 | 
            +
             | 
| 445 | 
            +
            ## Finetune
         | 
| 446 | 
            +
             | 
| 447 | 
            +
            Many repositories now support fine-tuning of the InternVL series models, including [InternVL](https://github.com/OpenGVLab/InternVL), [SWIFT](https://github.com/modelscope/ms-swift), [XTuner](https://github.com/InternLM/xtuner), and others. Please refer to their documentation for more details on fine-tuning.
         | 
| 448 | 
            +
             | 
| 449 | 
            +
            ## Deployment
         | 
| 450 | 
            +
             | 
| 451 | 
            +
            ### LMDeploy
         | 
| 452 | 
            +
             | 
| 453 | 
            +
            LMDeploy is a toolkit for compressing, deploying, and serving LLMs & VLMs.
         | 
| 454 | 
            +
             | 
| 455 | 
            +
            ```sh
         | 
| 456 | 
            +
            pip install lmdeploy>=0.9.1
         | 
| 457 | 
            +
            ```
         | 
| 458 | 
            +
             | 
| 459 | 
            +
            LMDeploy abstracts the complex inference process of multi-modal Vision-Language Models (VLM) into an easy-to-use pipeline, similar to the Large Language Model (LLM) inference pipeline.
         | 
| 460 | 
            +
             | 
| 461 | 
            +
            #### A 'Hello, world' Example
         | 
| 462 | 
            +
             | 
| 463 | 
            +
            ```python
         | 
| 464 | 
            +
            from lmdeploy import pipeline, PytorchEngineConfig
         | 
| 465 | 
            +
            from lmdeploy.vl import load_image
         | 
| 466 | 
            +
             | 
| 467 | 
            +
            image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg')
         | 
| 468 | 
            +
             | 
| 469 | 
            +
            # Please set tp=2 for the 38B version and tp=8 for the 241B-A28B version.
         | 
| 470 | 
            +
            model = 'OpenGVLab/InternVL3_5-8B'
         | 
| 471 | 
            +
            pipe = pipeline(model, backend_config=PytorchEngineConfig(session_len=32768, tp=1))
         | 
| 472 | 
            +
             | 
| 473 | 
            +
            response = pipe(('describe this image', image))
         | 
| 474 | 
            +
            print(response.text)
         | 
| 475 | 
            +
            ```
         | 
| 476 | 
            +
             | 
| 477 | 
            +
            #### Multi-images Inference
         | 
| 478 | 
            +
             | 
| 479 | 
            +
            When dealing with multiple images, you can put them all in one list. Keep in mind that multiple images will lead to a higher number of input tokens, and as a result, the size of the context window typically needs to be increased.
         | 
| 480 | 
            +
             | 
| 481 | 
            +
            ```python
         | 
| 482 | 
            +
            from lmdeploy import pipeline, PytorchEngineConfig
         | 
| 483 | 
            +
            from lmdeploy.vl import load_image
         | 
| 484 | 
            +
            from lmdeploy.vl.constants import IMAGE_TOKEN
         | 
| 485 | 
            +
             | 
| 486 | 
            +
            # Please set tp=2 for the 38B version and tp=8 for the 241B-A28B version.
         | 
| 487 | 
            +
            model = 'OpenGVLab/InternVL3_5-8B'
         | 
| 488 | 
            +
            pipe = pipeline(model, backend_config=PytorchEngineConfig(session_len=32768, tp=1))
         | 
| 489 | 
            +
             | 
| 490 | 
            +
            image_urls=[
         | 
| 491 | 
            +
                'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/human-pose.jpg',
         | 
| 492 | 
            +
                'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/det.jpg'
         | 
| 493 | 
            +
            ]
         | 
| 494 | 
            +
             | 
| 495 | 
            +
            images = [load_image(img_url) for img_url in image_urls]
         | 
| 496 | 
            +
            # Numbering images improves multi-image conversations
         | 
| 497 | 
            +
            response = pipe((f'Image-1: {IMAGE_TOKEN}\nImage-2: {IMAGE_TOKEN}\ndescribe these two images', images))
         | 
| 498 | 
            +
            print(response.text)
         | 
| 499 | 
            +
            ```
         | 
| 500 | 
            +
             | 
| 501 | 
            +
            #### Batch Prompts Inference
         | 
| 502 | 
            +
             | 
| 503 | 
            +
            Conducting inference with batch prompts is quite straightforward; just place them within a list structure:
         | 
| 504 | 
            +
             | 
| 505 | 
            +
            ```python
         | 
| 506 | 
            +
            from lmdeploy import pipeline, PytorchEngineConfig
         | 
| 507 | 
            +
            from lmdeploy.vl import load_image
         | 
| 508 | 
            +
             | 
| 509 | 
            +
            # Please set tp=2 for the 38B version and tp=8 for the 241B-A28B version.
         | 
| 510 | 
            +
            model = 'OpenGVLab/InternVL3_5-8B'
         | 
| 511 | 
            +
            pipe = pipeline(model, backend_config=PytorchEngineConfig(session_len=32768, tp=1))
         | 
| 512 | 
            +
             | 
| 513 | 
            +
            image_urls=[
         | 
| 514 | 
            +
                "https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/human-pose.jpg",
         | 
| 515 | 
            +
                "https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/det.jpg"
         | 
| 516 | 
            +
            ]
         | 
| 517 | 
            +
            prompts = [('describe this image', load_image(img_url)) for img_url in image_urls]
         | 
| 518 | 
            +
            response = pipe(prompts)
         | 
| 519 | 
            +
            print(response)
         | 
| 520 | 
            +
            ```
         | 
| 521 | 
            +
             | 
| 522 | 
            +
            #### Multi-turn Conversation
         | 
| 523 | 
            +
             | 
| 524 | 
            +
            There are two ways to do the multi-turn conversations with the pipeline. One is to construct messages according to the format of OpenAI and use above introduced method, the other is to use the `pipeline.chat` interface.
         | 
| 525 | 
            +
             | 
| 526 | 
            +
            ```python
         | 
| 527 | 
            +
            from lmdeploy import pipeline, PytorchEngineConfig, GenerationConfig
         | 
| 528 | 
            +
            from lmdeploy.vl import load_image
         | 
| 529 | 
            +
             | 
| 530 | 
            +
            # Please set tp=2 for the 38B version and tp=8 for the 241B-A28B version.
         | 
| 531 | 
            +
            model = 'OpenGVLab/InternVL3_5-8B'
         | 
| 532 | 
            +
            pipe = pipeline(model, backend_config=PytorchEngineConfig(session_len=32768, tp=1))
         | 
| 533 | 
            +
             | 
| 534 | 
            +
            image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/human-pose.jpg')
         | 
| 535 | 
            +
            gen_config = GenerationConfig(top_k=50, top_p=0.95, temperature=0.6, max_new_tokens=8192)
         | 
| 536 | 
            +
            sess = pipe.chat(('describe this image', image), gen_config=gen_config)
         | 
| 537 | 
            +
            print(sess.response.text)
         | 
| 538 | 
            +
            sess = pipe.chat('What is the woman doing?', session=sess, gen_config=gen_config)
         | 
| 539 | 
            +
            print(sess.response.text)
         | 
| 540 | 
            +
            ```
         | 
| 541 | 
            +
             | 
| 542 | 
            +
            #### Service
         | 
| 543 | 
            +
             | 
| 544 | 
            +
            LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup:
         | 
| 545 | 
            +
             | 
| 546 | 
            +
            ```shell
         | 
| 547 | 
            +
            lmdeploy serve api_server OpenGVLab/InternVL3_5-8B --server-port 23333 --tp 1 --backend pytorch
         | 
| 548 | 
            +
            ```
         | 
| 549 | 
            +
             | 
| 550 | 
            +
            To use the OpenAI-style interface, you need to install OpenAI:
         | 
| 551 | 
            +
             | 
| 552 | 
            +
            ```shell
         | 
| 553 | 
            +
            pip install openai
         | 
| 554 | 
            +
            ```
         | 
| 555 | 
            +
             | 
| 556 | 
            +
            Then, use the code below to make the API call:
         | 
| 557 | 
            +
             | 
| 558 | 
            +
            ```python
         | 
| 559 | 
            +
            from openai import OpenAI
         | 
| 560 | 
            +
             | 
| 561 | 
            +
            client = OpenAI(api_key='YOUR_API_KEY', base_url='http://0.0.0.0:23333/v1')
         | 
| 562 | 
            +
            model_name = client.models.list().data[0].id
         | 
| 563 | 
            +
            response = client.chat.completions.create(
         | 
| 564 | 
            +
                model=model_name,
         | 
| 565 | 
            +
                messages=[{
         | 
| 566 | 
            +
                    'role':
         | 
| 567 | 
            +
                    'user',
         | 
| 568 | 
            +
                    'content': [{
         | 
| 569 | 
            +
                        'type': 'text',
         | 
| 570 | 
            +
                        'text': 'describe this image',
         | 
| 571 | 
            +
                    }, {
         | 
| 572 | 
            +
                        'type': 'image_url',
         | 
| 573 | 
            +
                        'image_url': {
         | 
| 574 | 
            +
                            'url':
         | 
| 575 | 
            +
                            'https://modelscope.oss-cn-beijing.aliyuncs.com/resource/tiger.jpeg',
         | 
| 576 | 
            +
                        },
         | 
| 577 | 
            +
                    }],
         | 
| 578 | 
            +
                }],
         | 
| 579 | 
            +
                temperature=0.8,
         | 
| 580 | 
            +
                top_p=0.8)
         | 
| 581 | 
            +
            print(response)
         | 
| 582 | 
            +
            ```
         | 
| 583 | 
            +
             | 
| 584 | 
            +
            ## License
         | 
| 585 | 
            +
             | 
| 586 | 
            +
            This project is released under the apache-2.0 License. This project uses the pre-trained Qwen3 as a component, which is licensed under the apache-2.0 License.
         | 
| 587 | 
            +
             | 
| 588 | 
            +
            ## Citation
         | 
| 589 | 
            +
             | 
| 590 | 
            +
            If you find this project useful in your research, please consider citing:
         | 
| 591 | 
            +
             | 
| 592 | 
            +
            ```BibTeX
         | 
| 593 | 
            +
            @article{wang2025internvl3_5,
         | 
| 594 | 
            +
              title={InternVL3.5: Advancing Open-Source Multimodal Models in Versatility, Reasoning, and Efficiency},
         | 
| 595 | 
            +
              author={Wang, Weiyun and Gao, Zhangwei and Gu, Lixin and Pu, Hengjun and Cui, Long and Wei, Xingguang and Liu, Zhaoyang and Jing, Linglin and Ye, Shenglong and Shao, Jie and others},
         | 
| 596 | 
            +
              journal={arXiv preprint arXiv:2508.18265},
         | 
| 597 | 
            +
              year={2025}
         | 
| 598 | 
            +
            }
         | 
| 599 | 
            +
            ```
         | 
    	
        added_tokens.json
    ADDED
    
    | @@ -0,0 +1,38 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "</box>": 151677,
         | 
| 3 | 
            +
              "</img>": 151670,
         | 
| 4 | 
            +
              "</quad>": 151673,
         | 
| 5 | 
            +
              "</ref>": 151675,
         | 
| 6 | 
            +
              "</think>": 151668,
         | 
| 7 | 
            +
              "</tool_call>": 151658,
         | 
| 8 | 
            +
              "</tool_response>": 151666,
         | 
| 9 | 
            +
              "<IMG_CONTEXT>": 151671,
         | 
| 10 | 
            +
              "<box>": 151676,
         | 
| 11 | 
            +
              "<img>": 151669,
         | 
| 12 | 
            +
              "<quad>": 151672,
         | 
| 13 | 
            +
              "<ref>": 151674,
         | 
| 14 | 
            +
              "<think>": 151667,
         | 
| 15 | 
            +
              "<tool_call>": 151657,
         | 
| 16 | 
            +
              "<tool_response>": 151665,
         | 
| 17 | 
            +
              "<video>": 151678,
         | 
| 18 | 
            +
              "<|box_end|>": 151649,
         | 
| 19 | 
            +
              "<|box_start|>": 151648,
         | 
| 20 | 
            +
              "<|endoftext|>": 151643,
         | 
| 21 | 
            +
              "<|file_sep|>": 151664,
         | 
| 22 | 
            +
              "<|fim_middle|>": 151660,
         | 
| 23 | 
            +
              "<|fim_pad|>": 151662,
         | 
| 24 | 
            +
              "<|fim_prefix|>": 151659,
         | 
| 25 | 
            +
              "<|fim_suffix|>": 151661,
         | 
| 26 | 
            +
              "<|im_end|>": 151645,
         | 
| 27 | 
            +
              "<|im_start|>": 151644,
         | 
| 28 | 
            +
              "<|image_pad|>": 151655,
         | 
| 29 | 
            +
              "<|object_ref_end|>": 151647,
         | 
| 30 | 
            +
              "<|object_ref_start|>": 151646,
         | 
| 31 | 
            +
              "<|quad_end|>": 151651,
         | 
| 32 | 
            +
              "<|quad_start|>": 151650,
         | 
| 33 | 
            +
              "<|repo_name|>": 151663,
         | 
| 34 | 
            +
              "<|video_pad|>": 151656,
         | 
| 35 | 
            +
              "<|vision_end|>": 151653,
         | 
| 36 | 
            +
              "<|vision_pad|>": 151654,
         | 
| 37 | 
            +
              "<|vision_start|>": 151652
         | 
| 38 | 
            +
            }
         | 
    	
        chat_template.jinja
    ADDED
    
    | @@ -0,0 +1,6 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {% for message in messages %}{{'<|im_start|>' + message['role'] + '
         | 
| 2 | 
            +
            '}}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<IMG_CONTEXT>
         | 
| 3 | 
            +
            ' }}{% elif content['type'] == 'video' %}{{ '<video>
         | 
| 4 | 
            +
            ' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{'<|im_end|>
         | 
| 5 | 
            +
            '}}{% endfor %}{% if add_generation_prompt %}{{'<|im_start|>assistant
         | 
| 6 | 
            +
            ' }}{% endif %}
         | 
    	
        config.json
    ADDED
    
    | @@ -0,0 +1,121 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "architectures": [
         | 
| 3 | 
            +
                "InternVLForConditionalGeneration"
         | 
| 4 | 
            +
              ],
         | 
| 5 | 
            +
              "downsample_ratio": 0.5,
         | 
| 6 | 
            +
              "image_seq_length": 256,
         | 
| 7 | 
            +
              "image_token_id": 151671,
         | 
| 8 | 
            +
              "model_type": "internvl",
         | 
| 9 | 
            +
              "projector_hidden_act": "gelu",
         | 
| 10 | 
            +
              "text_config": {
         | 
| 11 | 
            +
                "_name_or_path": "/root/codespace/checkpoints/Qwen3-4B",
         | 
| 12 | 
            +
                "architectures": [
         | 
| 13 | 
            +
                  "Qwen3ForCausalLM"
         | 
| 14 | 
            +
                ],
         | 
| 15 | 
            +
                "attention_bias": false,
         | 
| 16 | 
            +
                "attention_dropout": 0.0,
         | 
| 17 | 
            +
                "bos_token_id": 151643,
         | 
| 18 | 
            +
                "debug": false,
         | 
| 19 | 
            +
                "eos_token_id": 151645,
         | 
| 20 | 
            +
                "ep_size": 1,
         | 
| 21 | 
            +
                "head_dim": 128,
         | 
| 22 | 
            +
                "hidden_act": "silu",
         | 
| 23 | 
            +
                "hidden_size": 2560,
         | 
| 24 | 
            +
                "initializer_range": 0.02,
         | 
| 25 | 
            +
                "intermediate_size": 9728,
         | 
| 26 | 
            +
                "layer_types": [
         | 
| 27 | 
            +
                  "full_attention",
         | 
| 28 | 
            +
                  "full_attention",
         | 
| 29 | 
            +
                  "full_attention",
         | 
| 30 | 
            +
                  "full_attention",
         | 
| 31 | 
            +
                  "full_attention",
         | 
| 32 | 
            +
                  "full_attention",
         | 
| 33 | 
            +
                  "full_attention",
         | 
| 34 | 
            +
                  "full_attention",
         | 
| 35 | 
            +
                  "full_attention",
         | 
| 36 | 
            +
                  "full_attention",
         | 
| 37 | 
            +
                  "full_attention",
         | 
| 38 | 
            +
                  "full_attention",
         | 
| 39 | 
            +
                  "full_attention",
         | 
| 40 | 
            +
                  "full_attention",
         | 
| 41 | 
            +
                  "full_attention",
         | 
| 42 | 
            +
                  "full_attention",
         | 
| 43 | 
            +
                  "full_attention",
         | 
| 44 | 
            +
                  "full_attention",
         | 
| 45 | 
            +
                  "full_attention",
         | 
| 46 | 
            +
                  "full_attention",
         | 
| 47 | 
            +
                  "full_attention",
         | 
| 48 | 
            +
                  "full_attention",
         | 
| 49 | 
            +
                  "full_attention",
         | 
| 50 | 
            +
                  "full_attention",
         | 
| 51 | 
            +
                  "full_attention",
         | 
| 52 | 
            +
                  "full_attention",
         | 
| 53 | 
            +
                  "full_attention",
         | 
| 54 | 
            +
                  "full_attention",
         | 
| 55 | 
            +
                  "full_attention",
         | 
| 56 | 
            +
                  "full_attention",
         | 
| 57 | 
            +
                  "full_attention",
         | 
| 58 | 
            +
                  "full_attention",
         | 
| 59 | 
            +
                  "full_attention",
         | 
| 60 | 
            +
                  "full_attention",
         | 
| 61 | 
            +
                  "full_attention",
         | 
| 62 | 
            +
                  "full_attention"
         | 
| 63 | 
            +
                ],
         | 
| 64 | 
            +
                "max_position_embeddings": 40960,
         | 
| 65 | 
            +
                "max_window_layers": 36,
         | 
| 66 | 
            +
                "micro_forward": false,
         | 
| 67 | 
            +
                "model_type": "qwen3",
         | 
| 68 | 
            +
                "num_attention_heads": 32,
         | 
| 69 | 
            +
                "num_hidden_layers": 36,
         | 
| 70 | 
            +
                "num_key_value_heads": 8,
         | 
| 71 | 
            +
                "rms_norm_eps": 1e-06,
         | 
| 72 | 
            +
                "rope_scaling": null,
         | 
| 73 | 
            +
                "rope_theta": 1000000,
         | 
| 74 | 
            +
                "skip_checkpoint": false,
         | 
| 75 | 
            +
                "sliding_window": null,
         | 
| 76 | 
            +
                "torch_dtype": "bfloat16",
         | 
| 77 | 
            +
                "use_cache": true,
         | 
| 78 | 
            +
                "use_deepep": false,
         | 
| 79 | 
            +
                "use_sliding_window": false,
         | 
| 80 | 
            +
                "vocab_size": 151936
         | 
| 81 | 
            +
              },
         | 
| 82 | 
            +
              "torch_dtype": "bfloat16",
         | 
| 83 | 
            +
              "transformers_version": "4.55.0",
         | 
| 84 | 
            +
              "vision_config": {
         | 
| 85 | 
            +
                "architectures": [
         | 
| 86 | 
            +
                  "InternVisionModel"
         | 
| 87 | 
            +
                ],
         | 
| 88 | 
            +
                "attention_bias": true,
         | 
| 89 | 
            +
                "attention_dropout": 0.0,
         | 
| 90 | 
            +
                "dropout": 0.0,
         | 
| 91 | 
            +
                "hidden_act": "gelu",
         | 
| 92 | 
            +
                "hidden_dropout_prob": 0.0,
         | 
| 93 | 
            +
                "hidden_size": 1024,
         | 
| 94 | 
            +
                "image_size": [
         | 
| 95 | 
            +
                  448,
         | 
| 96 | 
            +
                  448
         | 
| 97 | 
            +
                ],
         | 
| 98 | 
            +
                "initializer_factor": 0.1,
         | 
| 99 | 
            +
                "initializer_range": 1e-10,
         | 
| 100 | 
            +
                "intermediate_size": 4096,
         | 
| 101 | 
            +
                "layer_norm_eps": 1e-06,
         | 
| 102 | 
            +
                "layer_scale_init_value": 0.1,
         | 
| 103 | 
            +
                "model_type": "internvl_vision",
         | 
| 104 | 
            +
                "norm_type": "layer_norm",
         | 
| 105 | 
            +
                "num_attention_heads": 16,
         | 
| 106 | 
            +
                "num_channels": 3,
         | 
| 107 | 
            +
                "num_hidden_layers": 24,
         | 
| 108 | 
            +
                "patch_size": [
         | 
| 109 | 
            +
                  14,
         | 
| 110 | 
            +
                  14
         | 
| 111 | 
            +
                ],
         | 
| 112 | 
            +
                "projection_dropout": 0.0,
         | 
| 113 | 
            +
                "torch_dtype": "bfloat16",
         | 
| 114 | 
            +
                "use_absolute_position_embeddings": true,
         | 
| 115 | 
            +
                "use_mask_token": false,
         | 
| 116 | 
            +
                "use_mean_pooling": true,
         | 
| 117 | 
            +
                "use_qk_norm": false
         | 
| 118 | 
            +
              },
         | 
| 119 | 
            +
              "vision_feature_layer": -1,
         | 
| 120 | 
            +
              "vision_feature_select_strategy": "default"
         | 
| 121 | 
            +
            }
         | 
    	
        examples/image1.jpg
    ADDED
    
    |   | 
    	
        examples/image2.jpg
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        examples/red-panda.mp4
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:d921c07bb97224d65a37801541d246067f0d506f08723ffa1ad85c217907ccb8
         | 
| 3 | 
            +
            size 1867237
         | 
    	
        generation_config.json
    ADDED
    
    | @@ -0,0 +1,6 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "_from_model_config": true,
         | 
| 3 | 
            +
              "bos_token_id": 151643,
         | 
| 4 | 
            +
              "eos_token_id": 151645,
         | 
| 5 | 
            +
              "transformers_version": "4.55.0"
         | 
| 6 | 
            +
            }
         | 
    	
        merges.txt
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        model-00001-of-00002.safetensors
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:337d6d5bea97956480a1560c3a88e4e6c4455c3954b94ed468c367cf4b500f20
         | 
| 3 | 
            +
            size 4954007064
         | 
    	
        model-00002-of-00002.safetensors
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:343234927f41c7d3a4aa8a2280864f477b1f77795463feb6b26910fa2e4921d3
         | 
| 3 | 
            +
            size 4511078400
         | 
    	
        model.safetensors.index.json
    ADDED
    
    | @@ -0,0 +1,849 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "metadata": {
         | 
| 3 | 
            +
                "total_parameters": 4732489216,
         | 
| 4 | 
            +
                "total_size": 9464978432
         | 
| 5 | 
            +
              },
         | 
| 6 | 
            +
              "weight_map": {
         | 
| 7 | 
            +
                "language_model.lm_head.weight": "model-00002-of-00002.safetensors",
         | 
| 8 | 
            +
                "language_model.model.embed_tokens.weight": "model-00001-of-00002.safetensors",
         | 
| 9 | 
            +
                "language_model.model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 10 | 
            +
                "language_model.model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 11 | 
            +
                "language_model.model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 12 | 
            +
                "language_model.model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 13 | 
            +
                "language_model.model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 14 | 
            +
                "language_model.model.layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 15 | 
            +
                "language_model.model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 16 | 
            +
                "language_model.model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 17 | 
            +
                "language_model.model.layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 18 | 
            +
                "language_model.model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 19 | 
            +
                "language_model.model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 20 | 
            +
                "language_model.model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 21 | 
            +
                "language_model.model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 22 | 
            +
                "language_model.model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 23 | 
            +
                "language_model.model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 24 | 
            +
                "language_model.model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 25 | 
            +
                "language_model.model.layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 26 | 
            +
                "language_model.model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 27 | 
            +
                "language_model.model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 28 | 
            +
                "language_model.model.layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 29 | 
            +
                "language_model.model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 30 | 
            +
                "language_model.model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 31 | 
            +
                "language_model.model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 32 | 
            +
                "language_model.model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 33 | 
            +
                "language_model.model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 34 | 
            +
                "language_model.model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 35 | 
            +
                "language_model.model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 36 | 
            +
                "language_model.model.layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 37 | 
            +
                "language_model.model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 38 | 
            +
                "language_model.model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 39 | 
            +
                "language_model.model.layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 40 | 
            +
                "language_model.model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 41 | 
            +
                "language_model.model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 42 | 
            +
                "language_model.model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 43 | 
            +
                "language_model.model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 44 | 
            +
                "language_model.model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 45 | 
            +
                "language_model.model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 46 | 
            +
                "language_model.model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 47 | 
            +
                "language_model.model.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 48 | 
            +
                "language_model.model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 49 | 
            +
                "language_model.model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 50 | 
            +
                "language_model.model.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 51 | 
            +
                "language_model.model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 52 | 
            +
                "language_model.model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 53 | 
            +
                "language_model.model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 54 | 
            +
                "language_model.model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 55 | 
            +
                "language_model.model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 56 | 
            +
                "language_model.model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 57 | 
            +
                "language_model.model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 58 | 
            +
                "language_model.model.layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 59 | 
            +
                "language_model.model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 60 | 
            +
                "language_model.model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 61 | 
            +
                "language_model.model.layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 62 | 
            +
                "language_model.model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 63 | 
            +
                "language_model.model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 64 | 
            +
                "language_model.model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 65 | 
            +
                "language_model.model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 66 | 
            +
                "language_model.model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 67 | 
            +
                "language_model.model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 68 | 
            +
                "language_model.model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 69 | 
            +
                "language_model.model.layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 70 | 
            +
                "language_model.model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 71 | 
            +
                "language_model.model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 72 | 
            +
                "language_model.model.layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 73 | 
            +
                "language_model.model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 74 | 
            +
                "language_model.model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 75 | 
            +
                "language_model.model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 76 | 
            +
                "language_model.model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 77 | 
            +
                "language_model.model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 78 | 
            +
                "language_model.model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 79 | 
            +
                "language_model.model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 80 | 
            +
                "language_model.model.layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 81 | 
            +
                "language_model.model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 82 | 
            +
                "language_model.model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 83 | 
            +
                "language_model.model.layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 84 | 
            +
                "language_model.model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 85 | 
            +
                "language_model.model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 86 | 
            +
                "language_model.model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 87 | 
            +
                "language_model.model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 88 | 
            +
                "language_model.model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 89 | 
            +
                "language_model.model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 90 | 
            +
                "language_model.model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 91 | 
            +
                "language_model.model.layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 92 | 
            +
                "language_model.model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 93 | 
            +
                "language_model.model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 94 | 
            +
                "language_model.model.layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 95 | 
            +
                "language_model.model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 96 | 
            +
                "language_model.model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 97 | 
            +
                "language_model.model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 98 | 
            +
                "language_model.model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 99 | 
            +
                "language_model.model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 100 | 
            +
                "language_model.model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 101 | 
            +
                "language_model.model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 102 | 
            +
                "language_model.model.layers.16.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 103 | 
            +
                "language_model.model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 104 | 
            +
                "language_model.model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 105 | 
            +
                "language_model.model.layers.16.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 106 | 
            +
                "language_model.model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 107 | 
            +
                "language_model.model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 108 | 
            +
                "language_model.model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 109 | 
            +
                "language_model.model.layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 110 | 
            +
                "language_model.model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 111 | 
            +
                "language_model.model.layers.17.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 112 | 
            +
                "language_model.model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 113 | 
            +
                "language_model.model.layers.17.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 114 | 
            +
                "language_model.model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 115 | 
            +
                "language_model.model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 116 | 
            +
                "language_model.model.layers.17.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 117 | 
            +
                "language_model.model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 118 | 
            +
                "language_model.model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 119 | 
            +
                "language_model.model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 120 | 
            +
                "language_model.model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 121 | 
            +
                "language_model.model.layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 122 | 
            +
                "language_model.model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 123 | 
            +
                "language_model.model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 124 | 
            +
                "language_model.model.layers.18.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 125 | 
            +
                "language_model.model.layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 126 | 
            +
                "language_model.model.layers.18.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 127 | 
            +
                "language_model.model.layers.18.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 128 | 
            +
                "language_model.model.layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 129 | 
            +
                "language_model.model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 130 | 
            +
                "language_model.model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 131 | 
            +
                "language_model.model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 132 | 
            +
                "language_model.model.layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 133 | 
            +
                "language_model.model.layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 134 | 
            +
                "language_model.model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 135 | 
            +
                "language_model.model.layers.19.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 136 | 
            +
                "language_model.model.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 137 | 
            +
                "language_model.model.layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 138 | 
            +
                "language_model.model.layers.19.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 139 | 
            +
                "language_model.model.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 140 | 
            +
                "language_model.model.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 141 | 
            +
                "language_model.model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 142 | 
            +
                "language_model.model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 143 | 
            +
                "language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 144 | 
            +
                "language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 145 | 
            +
                "language_model.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 146 | 
            +
                "language_model.model.layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 147 | 
            +
                "language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 148 | 
            +
                "language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 149 | 
            +
                "language_model.model.layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 150 | 
            +
                "language_model.model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 151 | 
            +
                "language_model.model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 152 | 
            +
                "language_model.model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 153 | 
            +
                "language_model.model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 154 | 
            +
                "language_model.model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 155 | 
            +
                "language_model.model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 156 | 
            +
                "language_model.model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 157 | 
            +
                "language_model.model.layers.20.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 158 | 
            +
                "language_model.model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 159 | 
            +
                "language_model.model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 160 | 
            +
                "language_model.model.layers.20.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 161 | 
            +
                "language_model.model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 162 | 
            +
                "language_model.model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 163 | 
            +
                "language_model.model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 164 | 
            +
                "language_model.model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 165 | 
            +
                "language_model.model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 166 | 
            +
                "language_model.model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 167 | 
            +
                "language_model.model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 168 | 
            +
                "language_model.model.layers.21.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 169 | 
            +
                "language_model.model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 170 | 
            +
                "language_model.model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 171 | 
            +
                "language_model.model.layers.21.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 172 | 
            +
                "language_model.model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 173 | 
            +
                "language_model.model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 174 | 
            +
                "language_model.model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 175 | 
            +
                "language_model.model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 176 | 
            +
                "language_model.model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 177 | 
            +
                "language_model.model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 178 | 
            +
                "language_model.model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 179 | 
            +
                "language_model.model.layers.22.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 180 | 
            +
                "language_model.model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 181 | 
            +
                "language_model.model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 182 | 
            +
                "language_model.model.layers.22.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 183 | 
            +
                "language_model.model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 184 | 
            +
                "language_model.model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 185 | 
            +
                "language_model.model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 186 | 
            +
                "language_model.model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 187 | 
            +
                "language_model.model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 188 | 
            +
                "language_model.model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 189 | 
            +
                "language_model.model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 190 | 
            +
                "language_model.model.layers.23.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 191 | 
            +
                "language_model.model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 192 | 
            +
                "language_model.model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 193 | 
            +
                "language_model.model.layers.23.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 194 | 
            +
                "language_model.model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 195 | 
            +
                "language_model.model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 196 | 
            +
                "language_model.model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 197 | 
            +
                "language_model.model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 198 | 
            +
                "language_model.model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 199 | 
            +
                "language_model.model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 200 | 
            +
                "language_model.model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 201 | 
            +
                "language_model.model.layers.24.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 202 | 
            +
                "language_model.model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 203 | 
            +
                "language_model.model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 204 | 
            +
                "language_model.model.layers.24.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 205 | 
            +
                "language_model.model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 206 | 
            +
                "language_model.model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 207 | 
            +
                "language_model.model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 208 | 
            +
                "language_model.model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 209 | 
            +
                "language_model.model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 210 | 
            +
                "language_model.model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 211 | 
            +
                "language_model.model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 212 | 
            +
                "language_model.model.layers.25.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 213 | 
            +
                "language_model.model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 214 | 
            +
                "language_model.model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 215 | 
            +
                "language_model.model.layers.25.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 216 | 
            +
                "language_model.model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 217 | 
            +
                "language_model.model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 218 | 
            +
                "language_model.model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 219 | 
            +
                "language_model.model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 220 | 
            +
                "language_model.model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 221 | 
            +
                "language_model.model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 222 | 
            +
                "language_model.model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 223 | 
            +
                "language_model.model.layers.26.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 224 | 
            +
                "language_model.model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 225 | 
            +
                "language_model.model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 226 | 
            +
                "language_model.model.layers.26.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 227 | 
            +
                "language_model.model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 228 | 
            +
                "language_model.model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 229 | 
            +
                "language_model.model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 230 | 
            +
                "language_model.model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 231 | 
            +
                "language_model.model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 232 | 
            +
                "language_model.model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 233 | 
            +
                "language_model.model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 234 | 
            +
                "language_model.model.layers.27.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 235 | 
            +
                "language_model.model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 236 | 
            +
                "language_model.model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 237 | 
            +
                "language_model.model.layers.27.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 238 | 
            +
                "language_model.model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 239 | 
            +
                "language_model.model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 240 | 
            +
                "language_model.model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 241 | 
            +
                "language_model.model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 242 | 
            +
                "language_model.model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 243 | 
            +
                "language_model.model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 244 | 
            +
                "language_model.model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 245 | 
            +
                "language_model.model.layers.28.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 246 | 
            +
                "language_model.model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 247 | 
            +
                "language_model.model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 248 | 
            +
                "language_model.model.layers.28.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 249 | 
            +
                "language_model.model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 250 | 
            +
                "language_model.model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 251 | 
            +
                "language_model.model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 252 | 
            +
                "language_model.model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 253 | 
            +
                "language_model.model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 254 | 
            +
                "language_model.model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 255 | 
            +
                "language_model.model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 256 | 
            +
                "language_model.model.layers.29.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 257 | 
            +
                "language_model.model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 258 | 
            +
                "language_model.model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 259 | 
            +
                "language_model.model.layers.29.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 260 | 
            +
                "language_model.model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 261 | 
            +
                "language_model.model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 262 | 
            +
                "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 263 | 
            +
                "language_model.model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 264 | 
            +
                "language_model.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 265 | 
            +
                "language_model.model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 266 | 
            +
                "language_model.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 267 | 
            +
                "language_model.model.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 268 | 
            +
                "language_model.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 269 | 
            +
                "language_model.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 270 | 
            +
                "language_model.model.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 271 | 
            +
                "language_model.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 272 | 
            +
                "language_model.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 273 | 
            +
                "language_model.model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 274 | 
            +
                "language_model.model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 275 | 
            +
                "language_model.model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 276 | 
            +
                "language_model.model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 277 | 
            +
                "language_model.model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 278 | 
            +
                "language_model.model.layers.30.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 279 | 
            +
                "language_model.model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 280 | 
            +
                "language_model.model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 281 | 
            +
                "language_model.model.layers.30.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 282 | 
            +
                "language_model.model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 283 | 
            +
                "language_model.model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 284 | 
            +
                "language_model.model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 285 | 
            +
                "language_model.model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 286 | 
            +
                "language_model.model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 287 | 
            +
                "language_model.model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 288 | 
            +
                "language_model.model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 289 | 
            +
                "language_model.model.layers.31.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 290 | 
            +
                "language_model.model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 291 | 
            +
                "language_model.model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 292 | 
            +
                "language_model.model.layers.31.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 293 | 
            +
                "language_model.model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 294 | 
            +
                "language_model.model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 295 | 
            +
                "language_model.model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 296 | 
            +
                "language_model.model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 297 | 
            +
                "language_model.model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 298 | 
            +
                "language_model.model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 299 | 
            +
                "language_model.model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 300 | 
            +
                "language_model.model.layers.32.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 301 | 
            +
                "language_model.model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 302 | 
            +
                "language_model.model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 303 | 
            +
                "language_model.model.layers.32.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 304 | 
            +
                "language_model.model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 305 | 
            +
                "language_model.model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 306 | 
            +
                "language_model.model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 307 | 
            +
                "language_model.model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 308 | 
            +
                "language_model.model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 309 | 
            +
                "language_model.model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 310 | 
            +
                "language_model.model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 311 | 
            +
                "language_model.model.layers.33.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 312 | 
            +
                "language_model.model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 313 | 
            +
                "language_model.model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 314 | 
            +
                "language_model.model.layers.33.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 315 | 
            +
                "language_model.model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 316 | 
            +
                "language_model.model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 317 | 
            +
                "language_model.model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 318 | 
            +
                "language_model.model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 319 | 
            +
                "language_model.model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 320 | 
            +
                "language_model.model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 321 | 
            +
                "language_model.model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 322 | 
            +
                "language_model.model.layers.34.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 323 | 
            +
                "language_model.model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 324 | 
            +
                "language_model.model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 325 | 
            +
                "language_model.model.layers.34.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 326 | 
            +
                "language_model.model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 327 | 
            +
                "language_model.model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 328 | 
            +
                "language_model.model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 329 | 
            +
                "language_model.model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 330 | 
            +
                "language_model.model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 331 | 
            +
                "language_model.model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 332 | 
            +
                "language_model.model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
         | 
| 333 | 
            +
                "language_model.model.layers.35.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 334 | 
            +
                "language_model.model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 335 | 
            +
                "language_model.model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 336 | 
            +
                "language_model.model.layers.35.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
         | 
| 337 | 
            +
                "language_model.model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 338 | 
            +
                "language_model.model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
         | 
| 339 | 
            +
                "language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 340 | 
            +
                "language_model.model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 341 | 
            +
                "language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 342 | 
            +
                "language_model.model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 343 | 
            +
                "language_model.model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 344 | 
            +
                "language_model.model.layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 345 | 
            +
                "language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 346 | 
            +
                "language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 347 | 
            +
                "language_model.model.layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 348 | 
            +
                "language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 349 | 
            +
                "language_model.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 350 | 
            +
                "language_model.model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 351 | 
            +
                "language_model.model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 352 | 
            +
                "language_model.model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 353 | 
            +
                "language_model.model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 354 | 
            +
                "language_model.model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 355 | 
            +
                "language_model.model.layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 356 | 
            +
                "language_model.model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 357 | 
            +
                "language_model.model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 358 | 
            +
                "language_model.model.layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 359 | 
            +
                "language_model.model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 360 | 
            +
                "language_model.model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 361 | 
            +
                "language_model.model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 362 | 
            +
                "language_model.model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 363 | 
            +
                "language_model.model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 364 | 
            +
                "language_model.model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 365 | 
            +
                "language_model.model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 366 | 
            +
                "language_model.model.layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 367 | 
            +
                "language_model.model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 368 | 
            +
                "language_model.model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 369 | 
            +
                "language_model.model.layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 370 | 
            +
                "language_model.model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 371 | 
            +
                "language_model.model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 372 | 
            +
                "language_model.model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 373 | 
            +
                "language_model.model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 374 | 
            +
                "language_model.model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 375 | 
            +
                "language_model.model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 376 | 
            +
                "language_model.model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 377 | 
            +
                "language_model.model.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 378 | 
            +
                "language_model.model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 379 | 
            +
                "language_model.model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 380 | 
            +
                "language_model.model.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 381 | 
            +
                "language_model.model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 382 | 
            +
                "language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 383 | 
            +
                "language_model.model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 384 | 
            +
                "language_model.model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 385 | 
            +
                "language_model.model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 386 | 
            +
                "language_model.model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 387 | 
            +
                "language_model.model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 388 | 
            +
                "language_model.model.layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 389 | 
            +
                "language_model.model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 390 | 
            +
                "language_model.model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 391 | 
            +
                "language_model.model.layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 392 | 
            +
                "language_model.model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 393 | 
            +
                "language_model.model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 394 | 
            +
                "language_model.model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 395 | 
            +
                "language_model.model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 396 | 
            +
                "language_model.model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 397 | 
            +
                "language_model.model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 398 | 
            +
                "language_model.model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
         | 
| 399 | 
            +
                "language_model.model.layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 400 | 
            +
                "language_model.model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 401 | 
            +
                "language_model.model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 402 | 
            +
                "language_model.model.layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 403 | 
            +
                "language_model.model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 404 | 
            +
                "language_model.model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 405 | 
            +
                "language_model.model.norm.weight": "model-00002-of-00002.safetensors",
         | 
| 406 | 
            +
                "multi_modal_projector.layer_norm.bias": "model-00001-of-00002.safetensors",
         | 
| 407 | 
            +
                "multi_modal_projector.layer_norm.weight": "model-00001-of-00002.safetensors",
         | 
| 408 | 
            +
                "multi_modal_projector.linear_1.bias": "model-00001-of-00002.safetensors",
         | 
| 409 | 
            +
                "multi_modal_projector.linear_1.weight": "model-00001-of-00002.safetensors",
         | 
| 410 | 
            +
                "multi_modal_projector.linear_2.bias": "model-00001-of-00002.safetensors",
         | 
| 411 | 
            +
                "multi_modal_projector.linear_2.weight": "model-00001-of-00002.safetensors",
         | 
| 412 | 
            +
                "vision_tower.embeddings.cls_token": "model-00001-of-00002.safetensors",
         | 
| 413 | 
            +
                "vision_tower.embeddings.patch_embeddings.projection.bias": "model-00001-of-00002.safetensors",
         | 
| 414 | 
            +
                "vision_tower.embeddings.patch_embeddings.projection.weight": "model-00001-of-00002.safetensors",
         | 
| 415 | 
            +
                "vision_tower.embeddings.position_embeddings": "model-00001-of-00002.safetensors",
         | 
| 416 | 
            +
                "vision_tower.encoder.layer.0.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 417 | 
            +
                "vision_tower.encoder.layer.0.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 418 | 
            +
                "vision_tower.encoder.layer.0.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 419 | 
            +
                "vision_tower.encoder.layer.0.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 420 | 
            +
                "vision_tower.encoder.layer.0.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 421 | 
            +
                "vision_tower.encoder.layer.0.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 422 | 
            +
                "vision_tower.encoder.layer.0.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 423 | 
            +
                "vision_tower.encoder.layer.0.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 424 | 
            +
                "vision_tower.encoder.layer.0.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 425 | 
            +
                "vision_tower.encoder.layer.0.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 426 | 
            +
                "vision_tower.encoder.layer.0.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 427 | 
            +
                "vision_tower.encoder.layer.0.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 428 | 
            +
                "vision_tower.encoder.layer.0.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 429 | 
            +
                "vision_tower.encoder.layer.0.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 430 | 
            +
                "vision_tower.encoder.layer.0.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 431 | 
            +
                "vision_tower.encoder.layer.0.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 432 | 
            +
                "vision_tower.encoder.layer.0.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 433 | 
            +
                "vision_tower.encoder.layer.0.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 434 | 
            +
                "vision_tower.encoder.layer.1.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 435 | 
            +
                "vision_tower.encoder.layer.1.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 436 | 
            +
                "vision_tower.encoder.layer.1.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 437 | 
            +
                "vision_tower.encoder.layer.1.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 438 | 
            +
                "vision_tower.encoder.layer.1.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 439 | 
            +
                "vision_tower.encoder.layer.1.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 440 | 
            +
                "vision_tower.encoder.layer.1.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 441 | 
            +
                "vision_tower.encoder.layer.1.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 442 | 
            +
                "vision_tower.encoder.layer.1.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 443 | 
            +
                "vision_tower.encoder.layer.1.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 444 | 
            +
                "vision_tower.encoder.layer.1.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 445 | 
            +
                "vision_tower.encoder.layer.1.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 446 | 
            +
                "vision_tower.encoder.layer.1.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 447 | 
            +
                "vision_tower.encoder.layer.1.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 448 | 
            +
                "vision_tower.encoder.layer.1.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 449 | 
            +
                "vision_tower.encoder.layer.1.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 450 | 
            +
                "vision_tower.encoder.layer.1.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 451 | 
            +
                "vision_tower.encoder.layer.1.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 452 | 
            +
                "vision_tower.encoder.layer.10.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 453 | 
            +
                "vision_tower.encoder.layer.10.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 454 | 
            +
                "vision_tower.encoder.layer.10.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 455 | 
            +
                "vision_tower.encoder.layer.10.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 456 | 
            +
                "vision_tower.encoder.layer.10.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 457 | 
            +
                "vision_tower.encoder.layer.10.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 458 | 
            +
                "vision_tower.encoder.layer.10.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 459 | 
            +
                "vision_tower.encoder.layer.10.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 460 | 
            +
                "vision_tower.encoder.layer.10.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 461 | 
            +
                "vision_tower.encoder.layer.10.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 462 | 
            +
                "vision_tower.encoder.layer.10.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 463 | 
            +
                "vision_tower.encoder.layer.10.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 464 | 
            +
                "vision_tower.encoder.layer.10.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 465 | 
            +
                "vision_tower.encoder.layer.10.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 466 | 
            +
                "vision_tower.encoder.layer.10.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 467 | 
            +
                "vision_tower.encoder.layer.10.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 468 | 
            +
                "vision_tower.encoder.layer.10.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 469 | 
            +
                "vision_tower.encoder.layer.10.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 470 | 
            +
                "vision_tower.encoder.layer.11.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 471 | 
            +
                "vision_tower.encoder.layer.11.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 472 | 
            +
                "vision_tower.encoder.layer.11.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 473 | 
            +
                "vision_tower.encoder.layer.11.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 474 | 
            +
                "vision_tower.encoder.layer.11.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 475 | 
            +
                "vision_tower.encoder.layer.11.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 476 | 
            +
                "vision_tower.encoder.layer.11.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 477 | 
            +
                "vision_tower.encoder.layer.11.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 478 | 
            +
                "vision_tower.encoder.layer.11.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 479 | 
            +
                "vision_tower.encoder.layer.11.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 480 | 
            +
                "vision_tower.encoder.layer.11.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 481 | 
            +
                "vision_tower.encoder.layer.11.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 482 | 
            +
                "vision_tower.encoder.layer.11.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 483 | 
            +
                "vision_tower.encoder.layer.11.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 484 | 
            +
                "vision_tower.encoder.layer.11.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 485 | 
            +
                "vision_tower.encoder.layer.11.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 486 | 
            +
                "vision_tower.encoder.layer.11.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 487 | 
            +
                "vision_tower.encoder.layer.11.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 488 | 
            +
                "vision_tower.encoder.layer.12.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 489 | 
            +
                "vision_tower.encoder.layer.12.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 490 | 
            +
                "vision_tower.encoder.layer.12.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 491 | 
            +
                "vision_tower.encoder.layer.12.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 492 | 
            +
                "vision_tower.encoder.layer.12.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 493 | 
            +
                "vision_tower.encoder.layer.12.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 494 | 
            +
                "vision_tower.encoder.layer.12.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 495 | 
            +
                "vision_tower.encoder.layer.12.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 496 | 
            +
                "vision_tower.encoder.layer.12.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 497 | 
            +
                "vision_tower.encoder.layer.12.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 498 | 
            +
                "vision_tower.encoder.layer.12.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 499 | 
            +
                "vision_tower.encoder.layer.12.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 500 | 
            +
                "vision_tower.encoder.layer.12.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 501 | 
            +
                "vision_tower.encoder.layer.12.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 502 | 
            +
                "vision_tower.encoder.layer.12.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 503 | 
            +
                "vision_tower.encoder.layer.12.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 504 | 
            +
                "vision_tower.encoder.layer.12.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 505 | 
            +
                "vision_tower.encoder.layer.12.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 506 | 
            +
                "vision_tower.encoder.layer.13.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 507 | 
            +
                "vision_tower.encoder.layer.13.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 508 | 
            +
                "vision_tower.encoder.layer.13.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 509 | 
            +
                "vision_tower.encoder.layer.13.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 510 | 
            +
                "vision_tower.encoder.layer.13.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 511 | 
            +
                "vision_tower.encoder.layer.13.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 512 | 
            +
                "vision_tower.encoder.layer.13.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 513 | 
            +
                "vision_tower.encoder.layer.13.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 514 | 
            +
                "vision_tower.encoder.layer.13.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 515 | 
            +
                "vision_tower.encoder.layer.13.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 516 | 
            +
                "vision_tower.encoder.layer.13.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 517 | 
            +
                "vision_tower.encoder.layer.13.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 518 | 
            +
                "vision_tower.encoder.layer.13.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 519 | 
            +
                "vision_tower.encoder.layer.13.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 520 | 
            +
                "vision_tower.encoder.layer.13.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 521 | 
            +
                "vision_tower.encoder.layer.13.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 522 | 
            +
                "vision_tower.encoder.layer.13.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 523 | 
            +
                "vision_tower.encoder.layer.13.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 524 | 
            +
                "vision_tower.encoder.layer.14.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 525 | 
            +
                "vision_tower.encoder.layer.14.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 526 | 
            +
                "vision_tower.encoder.layer.14.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 527 | 
            +
                "vision_tower.encoder.layer.14.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 528 | 
            +
                "vision_tower.encoder.layer.14.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 529 | 
            +
                "vision_tower.encoder.layer.14.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 530 | 
            +
                "vision_tower.encoder.layer.14.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 531 | 
            +
                "vision_tower.encoder.layer.14.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 532 | 
            +
                "vision_tower.encoder.layer.14.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 533 | 
            +
                "vision_tower.encoder.layer.14.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 534 | 
            +
                "vision_tower.encoder.layer.14.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 535 | 
            +
                "vision_tower.encoder.layer.14.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 536 | 
            +
                "vision_tower.encoder.layer.14.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 537 | 
            +
                "vision_tower.encoder.layer.14.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 538 | 
            +
                "vision_tower.encoder.layer.14.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 539 | 
            +
                "vision_tower.encoder.layer.14.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 540 | 
            +
                "vision_tower.encoder.layer.14.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 541 | 
            +
                "vision_tower.encoder.layer.14.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 542 | 
            +
                "vision_tower.encoder.layer.15.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 543 | 
            +
                "vision_tower.encoder.layer.15.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 544 | 
            +
                "vision_tower.encoder.layer.15.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 545 | 
            +
                "vision_tower.encoder.layer.15.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 546 | 
            +
                "vision_tower.encoder.layer.15.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 547 | 
            +
                "vision_tower.encoder.layer.15.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 548 | 
            +
                "vision_tower.encoder.layer.15.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 549 | 
            +
                "vision_tower.encoder.layer.15.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 550 | 
            +
                "vision_tower.encoder.layer.15.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 551 | 
            +
                "vision_tower.encoder.layer.15.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 552 | 
            +
                "vision_tower.encoder.layer.15.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 553 | 
            +
                "vision_tower.encoder.layer.15.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 554 | 
            +
                "vision_tower.encoder.layer.15.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 555 | 
            +
                "vision_tower.encoder.layer.15.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 556 | 
            +
                "vision_tower.encoder.layer.15.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 557 | 
            +
                "vision_tower.encoder.layer.15.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 558 | 
            +
                "vision_tower.encoder.layer.15.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 559 | 
            +
                "vision_tower.encoder.layer.15.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 560 | 
            +
                "vision_tower.encoder.layer.16.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 561 | 
            +
                "vision_tower.encoder.layer.16.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 562 | 
            +
                "vision_tower.encoder.layer.16.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 563 | 
            +
                "vision_tower.encoder.layer.16.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 564 | 
            +
                "vision_tower.encoder.layer.16.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 565 | 
            +
                "vision_tower.encoder.layer.16.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 566 | 
            +
                "vision_tower.encoder.layer.16.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 567 | 
            +
                "vision_tower.encoder.layer.16.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 568 | 
            +
                "vision_tower.encoder.layer.16.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 569 | 
            +
                "vision_tower.encoder.layer.16.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 570 | 
            +
                "vision_tower.encoder.layer.16.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 571 | 
            +
                "vision_tower.encoder.layer.16.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 572 | 
            +
                "vision_tower.encoder.layer.16.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 573 | 
            +
                "vision_tower.encoder.layer.16.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 574 | 
            +
                "vision_tower.encoder.layer.16.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 575 | 
            +
                "vision_tower.encoder.layer.16.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 576 | 
            +
                "vision_tower.encoder.layer.16.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 577 | 
            +
                "vision_tower.encoder.layer.16.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 578 | 
            +
                "vision_tower.encoder.layer.17.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 579 | 
            +
                "vision_tower.encoder.layer.17.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 580 | 
            +
                "vision_tower.encoder.layer.17.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 581 | 
            +
                "vision_tower.encoder.layer.17.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 582 | 
            +
                "vision_tower.encoder.layer.17.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 583 | 
            +
                "vision_tower.encoder.layer.17.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 584 | 
            +
                "vision_tower.encoder.layer.17.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 585 | 
            +
                "vision_tower.encoder.layer.17.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 586 | 
            +
                "vision_tower.encoder.layer.17.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 587 | 
            +
                "vision_tower.encoder.layer.17.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 588 | 
            +
                "vision_tower.encoder.layer.17.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 589 | 
            +
                "vision_tower.encoder.layer.17.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 590 | 
            +
                "vision_tower.encoder.layer.17.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 591 | 
            +
                "vision_tower.encoder.layer.17.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 592 | 
            +
                "vision_tower.encoder.layer.17.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 593 | 
            +
                "vision_tower.encoder.layer.17.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 594 | 
            +
                "vision_tower.encoder.layer.17.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 595 | 
            +
                "vision_tower.encoder.layer.17.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 596 | 
            +
                "vision_tower.encoder.layer.18.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 597 | 
            +
                "vision_tower.encoder.layer.18.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 598 | 
            +
                "vision_tower.encoder.layer.18.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 599 | 
            +
                "vision_tower.encoder.layer.18.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 600 | 
            +
                "vision_tower.encoder.layer.18.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 601 | 
            +
                "vision_tower.encoder.layer.18.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 602 | 
            +
                "vision_tower.encoder.layer.18.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 603 | 
            +
                "vision_tower.encoder.layer.18.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 604 | 
            +
                "vision_tower.encoder.layer.18.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 605 | 
            +
                "vision_tower.encoder.layer.18.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 606 | 
            +
                "vision_tower.encoder.layer.18.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 607 | 
            +
                "vision_tower.encoder.layer.18.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 608 | 
            +
                "vision_tower.encoder.layer.18.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 609 | 
            +
                "vision_tower.encoder.layer.18.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 610 | 
            +
                "vision_tower.encoder.layer.18.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 611 | 
            +
                "vision_tower.encoder.layer.18.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 612 | 
            +
                "vision_tower.encoder.layer.18.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 613 | 
            +
                "vision_tower.encoder.layer.18.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 614 | 
            +
                "vision_tower.encoder.layer.19.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 615 | 
            +
                "vision_tower.encoder.layer.19.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 616 | 
            +
                "vision_tower.encoder.layer.19.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 617 | 
            +
                "vision_tower.encoder.layer.19.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 618 | 
            +
                "vision_tower.encoder.layer.19.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 619 | 
            +
                "vision_tower.encoder.layer.19.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 620 | 
            +
                "vision_tower.encoder.layer.19.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 621 | 
            +
                "vision_tower.encoder.layer.19.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 622 | 
            +
                "vision_tower.encoder.layer.19.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 623 | 
            +
                "vision_tower.encoder.layer.19.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 624 | 
            +
                "vision_tower.encoder.layer.19.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 625 | 
            +
                "vision_tower.encoder.layer.19.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 626 | 
            +
                "vision_tower.encoder.layer.19.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 627 | 
            +
                "vision_tower.encoder.layer.19.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 628 | 
            +
                "vision_tower.encoder.layer.19.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 629 | 
            +
                "vision_tower.encoder.layer.19.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 630 | 
            +
                "vision_tower.encoder.layer.19.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 631 | 
            +
                "vision_tower.encoder.layer.19.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 632 | 
            +
                "vision_tower.encoder.layer.2.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 633 | 
            +
                "vision_tower.encoder.layer.2.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 634 | 
            +
                "vision_tower.encoder.layer.2.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 635 | 
            +
                "vision_tower.encoder.layer.2.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 636 | 
            +
                "vision_tower.encoder.layer.2.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 637 | 
            +
                "vision_tower.encoder.layer.2.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 638 | 
            +
                "vision_tower.encoder.layer.2.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 639 | 
            +
                "vision_tower.encoder.layer.2.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 640 | 
            +
                "vision_tower.encoder.layer.2.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 641 | 
            +
                "vision_tower.encoder.layer.2.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 642 | 
            +
                "vision_tower.encoder.layer.2.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 643 | 
            +
                "vision_tower.encoder.layer.2.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 644 | 
            +
                "vision_tower.encoder.layer.2.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 645 | 
            +
                "vision_tower.encoder.layer.2.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 646 | 
            +
                "vision_tower.encoder.layer.2.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 647 | 
            +
                "vision_tower.encoder.layer.2.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 648 | 
            +
                "vision_tower.encoder.layer.2.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 649 | 
            +
                "vision_tower.encoder.layer.2.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 650 | 
            +
                "vision_tower.encoder.layer.20.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 651 | 
            +
                "vision_tower.encoder.layer.20.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 652 | 
            +
                "vision_tower.encoder.layer.20.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 653 | 
            +
                "vision_tower.encoder.layer.20.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 654 | 
            +
                "vision_tower.encoder.layer.20.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 655 | 
            +
                "vision_tower.encoder.layer.20.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 656 | 
            +
                "vision_tower.encoder.layer.20.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 657 | 
            +
                "vision_tower.encoder.layer.20.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 658 | 
            +
                "vision_tower.encoder.layer.20.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 659 | 
            +
                "vision_tower.encoder.layer.20.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 660 | 
            +
                "vision_tower.encoder.layer.20.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 661 | 
            +
                "vision_tower.encoder.layer.20.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 662 | 
            +
                "vision_tower.encoder.layer.20.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 663 | 
            +
                "vision_tower.encoder.layer.20.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 664 | 
            +
                "vision_tower.encoder.layer.20.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 665 | 
            +
                "vision_tower.encoder.layer.20.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 666 | 
            +
                "vision_tower.encoder.layer.20.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 667 | 
            +
                "vision_tower.encoder.layer.20.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 668 | 
            +
                "vision_tower.encoder.layer.21.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 669 | 
            +
                "vision_tower.encoder.layer.21.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 670 | 
            +
                "vision_tower.encoder.layer.21.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 671 | 
            +
                "vision_tower.encoder.layer.21.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 672 | 
            +
                "vision_tower.encoder.layer.21.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 673 | 
            +
                "vision_tower.encoder.layer.21.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 674 | 
            +
                "vision_tower.encoder.layer.21.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 675 | 
            +
                "vision_tower.encoder.layer.21.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 676 | 
            +
                "vision_tower.encoder.layer.21.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 677 | 
            +
                "vision_tower.encoder.layer.21.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 678 | 
            +
                "vision_tower.encoder.layer.21.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 679 | 
            +
                "vision_tower.encoder.layer.21.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 680 | 
            +
                "vision_tower.encoder.layer.21.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 681 | 
            +
                "vision_tower.encoder.layer.21.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 682 | 
            +
                "vision_tower.encoder.layer.21.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 683 | 
            +
                "vision_tower.encoder.layer.21.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 684 | 
            +
                "vision_tower.encoder.layer.21.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 685 | 
            +
                "vision_tower.encoder.layer.21.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 686 | 
            +
                "vision_tower.encoder.layer.22.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 687 | 
            +
                "vision_tower.encoder.layer.22.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 688 | 
            +
                "vision_tower.encoder.layer.22.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 689 | 
            +
                "vision_tower.encoder.layer.22.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 690 | 
            +
                "vision_tower.encoder.layer.22.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 691 | 
            +
                "vision_tower.encoder.layer.22.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 692 | 
            +
                "vision_tower.encoder.layer.22.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 693 | 
            +
                "vision_tower.encoder.layer.22.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 694 | 
            +
                "vision_tower.encoder.layer.22.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 695 | 
            +
                "vision_tower.encoder.layer.22.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 696 | 
            +
                "vision_tower.encoder.layer.22.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 697 | 
            +
                "vision_tower.encoder.layer.22.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 698 | 
            +
                "vision_tower.encoder.layer.22.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 699 | 
            +
                "vision_tower.encoder.layer.22.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 700 | 
            +
                "vision_tower.encoder.layer.22.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 701 | 
            +
                "vision_tower.encoder.layer.22.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 702 | 
            +
                "vision_tower.encoder.layer.22.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 703 | 
            +
                "vision_tower.encoder.layer.22.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 704 | 
            +
                "vision_tower.encoder.layer.23.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 705 | 
            +
                "vision_tower.encoder.layer.23.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 706 | 
            +
                "vision_tower.encoder.layer.23.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 707 | 
            +
                "vision_tower.encoder.layer.23.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 708 | 
            +
                "vision_tower.encoder.layer.23.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 709 | 
            +
                "vision_tower.encoder.layer.23.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 710 | 
            +
                "vision_tower.encoder.layer.23.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 711 | 
            +
                "vision_tower.encoder.layer.23.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 712 | 
            +
                "vision_tower.encoder.layer.23.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 713 | 
            +
                "vision_tower.encoder.layer.23.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 714 | 
            +
                "vision_tower.encoder.layer.23.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 715 | 
            +
                "vision_tower.encoder.layer.23.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 716 | 
            +
                "vision_tower.encoder.layer.23.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 717 | 
            +
                "vision_tower.encoder.layer.23.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 718 | 
            +
                "vision_tower.encoder.layer.23.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 719 | 
            +
                "vision_tower.encoder.layer.23.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 720 | 
            +
                "vision_tower.encoder.layer.23.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 721 | 
            +
                "vision_tower.encoder.layer.23.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 722 | 
            +
                "vision_tower.encoder.layer.3.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 723 | 
            +
                "vision_tower.encoder.layer.3.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 724 | 
            +
                "vision_tower.encoder.layer.3.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 725 | 
            +
                "vision_tower.encoder.layer.3.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 726 | 
            +
                "vision_tower.encoder.layer.3.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 727 | 
            +
                "vision_tower.encoder.layer.3.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 728 | 
            +
                "vision_tower.encoder.layer.3.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 729 | 
            +
                "vision_tower.encoder.layer.3.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 730 | 
            +
                "vision_tower.encoder.layer.3.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 731 | 
            +
                "vision_tower.encoder.layer.3.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 732 | 
            +
                "vision_tower.encoder.layer.3.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 733 | 
            +
                "vision_tower.encoder.layer.3.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 734 | 
            +
                "vision_tower.encoder.layer.3.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 735 | 
            +
                "vision_tower.encoder.layer.3.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 736 | 
            +
                "vision_tower.encoder.layer.3.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 737 | 
            +
                "vision_tower.encoder.layer.3.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 738 | 
            +
                "vision_tower.encoder.layer.3.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 739 | 
            +
                "vision_tower.encoder.layer.3.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 740 | 
            +
                "vision_tower.encoder.layer.4.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 741 | 
            +
                "vision_tower.encoder.layer.4.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 742 | 
            +
                "vision_tower.encoder.layer.4.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 743 | 
            +
                "vision_tower.encoder.layer.4.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 744 | 
            +
                "vision_tower.encoder.layer.4.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 745 | 
            +
                "vision_tower.encoder.layer.4.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 746 | 
            +
                "vision_tower.encoder.layer.4.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 747 | 
            +
                "vision_tower.encoder.layer.4.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 748 | 
            +
                "vision_tower.encoder.layer.4.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 749 | 
            +
                "vision_tower.encoder.layer.4.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 750 | 
            +
                "vision_tower.encoder.layer.4.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 751 | 
            +
                "vision_tower.encoder.layer.4.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 752 | 
            +
                "vision_tower.encoder.layer.4.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 753 | 
            +
                "vision_tower.encoder.layer.4.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 754 | 
            +
                "vision_tower.encoder.layer.4.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 755 | 
            +
                "vision_tower.encoder.layer.4.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 756 | 
            +
                "vision_tower.encoder.layer.4.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 757 | 
            +
                "vision_tower.encoder.layer.4.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 758 | 
            +
                "vision_tower.encoder.layer.5.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 759 | 
            +
                "vision_tower.encoder.layer.5.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 760 | 
            +
                "vision_tower.encoder.layer.5.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 761 | 
            +
                "vision_tower.encoder.layer.5.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 762 | 
            +
                "vision_tower.encoder.layer.5.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 763 | 
            +
                "vision_tower.encoder.layer.5.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 764 | 
            +
                "vision_tower.encoder.layer.5.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 765 | 
            +
                "vision_tower.encoder.layer.5.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 766 | 
            +
                "vision_tower.encoder.layer.5.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 767 | 
            +
                "vision_tower.encoder.layer.5.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 768 | 
            +
                "vision_tower.encoder.layer.5.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 769 | 
            +
                "vision_tower.encoder.layer.5.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 770 | 
            +
                "vision_tower.encoder.layer.5.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 771 | 
            +
                "vision_tower.encoder.layer.5.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 772 | 
            +
                "vision_tower.encoder.layer.5.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 773 | 
            +
                "vision_tower.encoder.layer.5.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 774 | 
            +
                "vision_tower.encoder.layer.5.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 775 | 
            +
                "vision_tower.encoder.layer.5.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 776 | 
            +
                "vision_tower.encoder.layer.6.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 777 | 
            +
                "vision_tower.encoder.layer.6.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 778 | 
            +
                "vision_tower.encoder.layer.6.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 779 | 
            +
                "vision_tower.encoder.layer.6.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 780 | 
            +
                "vision_tower.encoder.layer.6.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 781 | 
            +
                "vision_tower.encoder.layer.6.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 782 | 
            +
                "vision_tower.encoder.layer.6.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 783 | 
            +
                "vision_tower.encoder.layer.6.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 784 | 
            +
                "vision_tower.encoder.layer.6.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 785 | 
            +
                "vision_tower.encoder.layer.6.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 786 | 
            +
                "vision_tower.encoder.layer.6.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 787 | 
            +
                "vision_tower.encoder.layer.6.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 788 | 
            +
                "vision_tower.encoder.layer.6.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 789 | 
            +
                "vision_tower.encoder.layer.6.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 790 | 
            +
                "vision_tower.encoder.layer.6.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 791 | 
            +
                "vision_tower.encoder.layer.6.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 792 | 
            +
                "vision_tower.encoder.layer.6.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 793 | 
            +
                "vision_tower.encoder.layer.6.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 794 | 
            +
                "vision_tower.encoder.layer.7.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 795 | 
            +
                "vision_tower.encoder.layer.7.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 796 | 
            +
                "vision_tower.encoder.layer.7.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 797 | 
            +
                "vision_tower.encoder.layer.7.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 798 | 
            +
                "vision_tower.encoder.layer.7.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 799 | 
            +
                "vision_tower.encoder.layer.7.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 800 | 
            +
                "vision_tower.encoder.layer.7.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 801 | 
            +
                "vision_tower.encoder.layer.7.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 802 | 
            +
                "vision_tower.encoder.layer.7.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 803 | 
            +
                "vision_tower.encoder.layer.7.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 804 | 
            +
                "vision_tower.encoder.layer.7.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 805 | 
            +
                "vision_tower.encoder.layer.7.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 806 | 
            +
                "vision_tower.encoder.layer.7.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 807 | 
            +
                "vision_tower.encoder.layer.7.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 808 | 
            +
                "vision_tower.encoder.layer.7.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 809 | 
            +
                "vision_tower.encoder.layer.7.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 810 | 
            +
                "vision_tower.encoder.layer.7.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 811 | 
            +
                "vision_tower.encoder.layer.7.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 812 | 
            +
                "vision_tower.encoder.layer.8.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 813 | 
            +
                "vision_tower.encoder.layer.8.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 814 | 
            +
                "vision_tower.encoder.layer.8.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 815 | 
            +
                "vision_tower.encoder.layer.8.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 816 | 
            +
                "vision_tower.encoder.layer.8.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 817 | 
            +
                "vision_tower.encoder.layer.8.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 818 | 
            +
                "vision_tower.encoder.layer.8.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 819 | 
            +
                "vision_tower.encoder.layer.8.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 820 | 
            +
                "vision_tower.encoder.layer.8.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 821 | 
            +
                "vision_tower.encoder.layer.8.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 822 | 
            +
                "vision_tower.encoder.layer.8.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 823 | 
            +
                "vision_tower.encoder.layer.8.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 824 | 
            +
                "vision_tower.encoder.layer.8.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 825 | 
            +
                "vision_tower.encoder.layer.8.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 826 | 
            +
                "vision_tower.encoder.layer.8.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 827 | 
            +
                "vision_tower.encoder.layer.8.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 828 | 
            +
                "vision_tower.encoder.layer.8.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 829 | 
            +
                "vision_tower.encoder.layer.8.mlp.fc2.weight": "model-00001-of-00002.safetensors",
         | 
| 830 | 
            +
                "vision_tower.encoder.layer.9.attention.k_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 831 | 
            +
                "vision_tower.encoder.layer.9.attention.k_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 832 | 
            +
                "vision_tower.encoder.layer.9.attention.projection_layer.bias": "model-00001-of-00002.safetensors",
         | 
| 833 | 
            +
                "vision_tower.encoder.layer.9.attention.projection_layer.weight": "model-00001-of-00002.safetensors",
         | 
| 834 | 
            +
                "vision_tower.encoder.layer.9.attention.q_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 835 | 
            +
                "vision_tower.encoder.layer.9.attention.q_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 836 | 
            +
                "vision_tower.encoder.layer.9.attention.v_proj.bias": "model-00001-of-00002.safetensors",
         | 
| 837 | 
            +
                "vision_tower.encoder.layer.9.attention.v_proj.weight": "model-00001-of-00002.safetensors",
         | 
| 838 | 
            +
                "vision_tower.encoder.layer.9.lambda_1": "model-00001-of-00002.safetensors",
         | 
| 839 | 
            +
                "vision_tower.encoder.layer.9.lambda_2": "model-00001-of-00002.safetensors",
         | 
| 840 | 
            +
                "vision_tower.encoder.layer.9.layernorm_after.bias": "model-00001-of-00002.safetensors",
         | 
| 841 | 
            +
                "vision_tower.encoder.layer.9.layernorm_after.weight": "model-00001-of-00002.safetensors",
         | 
| 842 | 
            +
                "vision_tower.encoder.layer.9.layernorm_before.bias": "model-00001-of-00002.safetensors",
         | 
| 843 | 
            +
                "vision_tower.encoder.layer.9.layernorm_before.weight": "model-00001-of-00002.safetensors",
         | 
| 844 | 
            +
                "vision_tower.encoder.layer.9.mlp.fc1.bias": "model-00001-of-00002.safetensors",
         | 
| 845 | 
            +
                "vision_tower.encoder.layer.9.mlp.fc1.weight": "model-00001-of-00002.safetensors",
         | 
| 846 | 
            +
                "vision_tower.encoder.layer.9.mlp.fc2.bias": "model-00001-of-00002.safetensors",
         | 
| 847 | 
            +
                "vision_tower.encoder.layer.9.mlp.fc2.weight": "model-00001-of-00002.safetensors"
         | 
| 848 | 
            +
              }
         | 
| 849 | 
            +
            }
         | 
    	
        preprocessor_config.json
    ADDED
    
    | @@ -0,0 +1,34 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "crop_size": null,
         | 
| 3 | 
            +
              "crop_to_patches": false,
         | 
| 4 | 
            +
              "data_format": "channels_first",
         | 
| 5 | 
            +
              "default_to_square": true,
         | 
| 6 | 
            +
              "device": null,
         | 
| 7 | 
            +
              "do_center_crop": null,
         | 
| 8 | 
            +
              "do_convert_rgb": true,
         | 
| 9 | 
            +
              "do_normalize": true,
         | 
| 10 | 
            +
              "do_rescale": true,
         | 
| 11 | 
            +
              "do_resize": true,
         | 
| 12 | 
            +
              "image_mean": [
         | 
| 13 | 
            +
                0.485,
         | 
| 14 | 
            +
                0.456,
         | 
| 15 | 
            +
                0.406
         | 
| 16 | 
            +
              ],
         | 
| 17 | 
            +
              "image_processor_type": "GotOcr2ImageProcessorFast",
         | 
| 18 | 
            +
              "image_std": [
         | 
| 19 | 
            +
                0.229,
         | 
| 20 | 
            +
                0.224,
         | 
| 21 | 
            +
                0.225
         | 
| 22 | 
            +
              ],
         | 
| 23 | 
            +
              "input_data_format": null,
         | 
| 24 | 
            +
              "max_patches": 12,
         | 
| 25 | 
            +
              "min_patches": 1,
         | 
| 26 | 
            +
              "processor_class": "InternVLProcessor",
         | 
| 27 | 
            +
              "resample": 3,
         | 
| 28 | 
            +
              "rescale_factor": 0.00392156862745098,
         | 
| 29 | 
            +
              "return_tensors": null,
         | 
| 30 | 
            +
              "size": {
         | 
| 31 | 
            +
                "height": 448,
         | 
| 32 | 
            +
                "width": 448
         | 
| 33 | 
            +
              }
         | 
| 34 | 
            +
            }
         | 
    	
        processor_config.json
    ADDED
    
    | @@ -0,0 +1,4 @@ | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "image_seq_length": 256,
         | 
| 3 | 
            +
              "processor_class": "InternVLProcessor"
         | 
| 4 | 
            +
            }
         | 
    	
        special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1,44 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "additional_special_tokens": [
         | 
| 3 | 
            +
                "<|im_start|>",
         | 
| 4 | 
            +
                "<|im_end|>",
         | 
| 5 | 
            +
                "<|object_ref_start|>",
         | 
| 6 | 
            +
                "<|object_ref_end|>",
         | 
| 7 | 
            +
                "<|box_start|>",
         | 
| 8 | 
            +
                "<|box_end|>",
         | 
| 9 | 
            +
                "<|quad_start|>",
         | 
| 10 | 
            +
                "<|quad_end|>",
         | 
| 11 | 
            +
                "<|vision_start|>",
         | 
| 12 | 
            +
                "<|vision_end|>",
         | 
| 13 | 
            +
                "<|vision_pad|>",
         | 
| 14 | 
            +
                "<|image_pad|>",
         | 
| 15 | 
            +
                "<|video_pad|>",
         | 
| 16 | 
            +
                "<img>",
         | 
| 17 | 
            +
                "</img>",
         | 
| 18 | 
            +
                "<IMG_CONTEXT>",
         | 
| 19 | 
            +
                "<quad>",
         | 
| 20 | 
            +
                "</quad>",
         | 
| 21 | 
            +
                "<ref>",
         | 
| 22 | 
            +
                "</ref>",
         | 
| 23 | 
            +
                "<box>",
         | 
| 24 | 
            +
                "</box>"
         | 
| 25 | 
            +
              ],
         | 
| 26 | 
            +
              "context_image_token": "<IMG_CONTEXT>",
         | 
| 27 | 
            +
              "end_image_token": "</img>",
         | 
| 28 | 
            +
              "eos_token": {
         | 
| 29 | 
            +
                "content": "<|im_end|>",
         | 
| 30 | 
            +
                "lstrip": false,
         | 
| 31 | 
            +
                "normalized": false,
         | 
| 32 | 
            +
                "rstrip": false,
         | 
| 33 | 
            +
                "single_word": false
         | 
| 34 | 
            +
              },
         | 
| 35 | 
            +
              "pad_token": {
         | 
| 36 | 
            +
                "content": "<|endoftext|>",
         | 
| 37 | 
            +
                "lstrip": false,
         | 
| 38 | 
            +
                "normalized": false,
         | 
| 39 | 
            +
                "rstrip": false,
         | 
| 40 | 
            +
                "single_word": false
         | 
| 41 | 
            +
              },
         | 
| 42 | 
            +
              "start_image_token": "<img>",
         | 
| 43 | 
            +
              "video_token": "<video>"
         | 
| 44 | 
            +
            }
         | 
    	
        tokenizer.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:7b9d18660f656ae5a87df2d5d6ed990e80f292d3473c1a35cae8259a5d28cd67
         | 
| 3 | 
            +
            size 11424484
         | 
    	
        tokenizer_config.json
    ADDED
    
    | @@ -0,0 +1,339 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "add_bos_token": false,
         | 
| 3 | 
            +
              "add_eos_token": false,
         | 
| 4 | 
            +
              "add_prefix_space": false,
         | 
| 5 | 
            +
              "added_tokens_decoder": {
         | 
| 6 | 
            +
                "151643": {
         | 
| 7 | 
            +
                  "content": "<|endoftext|>",
         | 
| 8 | 
            +
                  "lstrip": false,
         | 
| 9 | 
            +
                  "normalized": false,
         | 
| 10 | 
            +
                  "rstrip": false,
         | 
| 11 | 
            +
                  "single_word": false,
         | 
| 12 | 
            +
                  "special": true
         | 
| 13 | 
            +
                },
         | 
| 14 | 
            +
                "151644": {
         | 
| 15 | 
            +
                  "content": "<|im_start|>",
         | 
| 16 | 
            +
                  "lstrip": false,
         | 
| 17 | 
            +
                  "normalized": false,
         | 
| 18 | 
            +
                  "rstrip": false,
         | 
| 19 | 
            +
                  "single_word": false,
         | 
| 20 | 
            +
                  "special": true
         | 
| 21 | 
            +
                },
         | 
| 22 | 
            +
                "151645": {
         | 
| 23 | 
            +
                  "content": "<|im_end|>",
         | 
| 24 | 
            +
                  "lstrip": false,
         | 
| 25 | 
            +
                  "normalized": false,
         | 
| 26 | 
            +
                  "rstrip": false,
         | 
| 27 | 
            +
                  "single_word": false,
         | 
| 28 | 
            +
                  "special": true
         | 
| 29 | 
            +
                },
         | 
| 30 | 
            +
                "151646": {
         | 
| 31 | 
            +
                  "content": "<|object_ref_start|>",
         | 
| 32 | 
            +
                  "lstrip": false,
         | 
| 33 | 
            +
                  "normalized": false,
         | 
| 34 | 
            +
                  "rstrip": false,
         | 
| 35 | 
            +
                  "single_word": false,
         | 
| 36 | 
            +
                  "special": true
         | 
| 37 | 
            +
                },
         | 
| 38 | 
            +
                "151647": {
         | 
| 39 | 
            +
                  "content": "<|object_ref_end|>",
         | 
| 40 | 
            +
                  "lstrip": false,
         | 
| 41 | 
            +
                  "normalized": false,
         | 
| 42 | 
            +
                  "rstrip": false,
         | 
| 43 | 
            +
                  "single_word": false,
         | 
| 44 | 
            +
                  "special": true
         | 
| 45 | 
            +
                },
         | 
| 46 | 
            +
                "151648": {
         | 
| 47 | 
            +
                  "content": "<|box_start|>",
         | 
| 48 | 
            +
                  "lstrip": false,
         | 
| 49 | 
            +
                  "normalized": false,
         | 
| 50 | 
            +
                  "rstrip": false,
         | 
| 51 | 
            +
                  "single_word": false,
         | 
| 52 | 
            +
                  "special": true
         | 
| 53 | 
            +
                },
         | 
| 54 | 
            +
                "151649": {
         | 
| 55 | 
            +
                  "content": "<|box_end|>",
         | 
| 56 | 
            +
                  "lstrip": false,
         | 
| 57 | 
            +
                  "normalized": false,
         | 
| 58 | 
            +
                  "rstrip": false,
         | 
| 59 | 
            +
                  "single_word": false,
         | 
| 60 | 
            +
                  "special": true
         | 
| 61 | 
            +
                },
         | 
| 62 | 
            +
                "151650": {
         | 
| 63 | 
            +
                  "content": "<|quad_start|>",
         | 
| 64 | 
            +
                  "lstrip": false,
         | 
| 65 | 
            +
                  "normalized": false,
         | 
| 66 | 
            +
                  "rstrip": false,
         | 
| 67 | 
            +
                  "single_word": false,
         | 
| 68 | 
            +
                  "special": true
         | 
| 69 | 
            +
                },
         | 
| 70 | 
            +
                "151651": {
         | 
| 71 | 
            +
                  "content": "<|quad_end|>",
         | 
| 72 | 
            +
                  "lstrip": false,
         | 
| 73 | 
            +
                  "normalized": false,
         | 
| 74 | 
            +
                  "rstrip": false,
         | 
| 75 | 
            +
                  "single_word": false,
         | 
| 76 | 
            +
                  "special": true
         | 
| 77 | 
            +
                },
         | 
| 78 | 
            +
                "151652": {
         | 
| 79 | 
            +
                  "content": "<|vision_start|>",
         | 
| 80 | 
            +
                  "lstrip": false,
         | 
| 81 | 
            +
                  "normalized": false,
         | 
| 82 | 
            +
                  "rstrip": false,
         | 
| 83 | 
            +
                  "single_word": false,
         | 
| 84 | 
            +
                  "special": true
         | 
| 85 | 
            +
                },
         | 
| 86 | 
            +
                "151653": {
         | 
| 87 | 
            +
                  "content": "<|vision_end|>",
         | 
| 88 | 
            +
                  "lstrip": false,
         | 
| 89 | 
            +
                  "normalized": false,
         | 
| 90 | 
            +
                  "rstrip": false,
         | 
| 91 | 
            +
                  "single_word": false,
         | 
| 92 | 
            +
                  "special": true
         | 
| 93 | 
            +
                },
         | 
| 94 | 
            +
                "151654": {
         | 
| 95 | 
            +
                  "content": "<|vision_pad|>",
         | 
| 96 | 
            +
                  "lstrip": false,
         | 
| 97 | 
            +
                  "normalized": false,
         | 
| 98 | 
            +
                  "rstrip": false,
         | 
| 99 | 
            +
                  "single_word": false,
         | 
| 100 | 
            +
                  "special": true
         | 
| 101 | 
            +
                },
         | 
| 102 | 
            +
                "151655": {
         | 
| 103 | 
            +
                  "content": "<|image_pad|>",
         | 
| 104 | 
            +
                  "lstrip": false,
         | 
| 105 | 
            +
                  "normalized": false,
         | 
| 106 | 
            +
                  "rstrip": false,
         | 
| 107 | 
            +
                  "single_word": false,
         | 
| 108 | 
            +
                  "special": true
         | 
| 109 | 
            +
                },
         | 
| 110 | 
            +
                "151656": {
         | 
| 111 | 
            +
                  "content": "<|video_pad|>",
         | 
| 112 | 
            +
                  "lstrip": false,
         | 
| 113 | 
            +
                  "normalized": false,
         | 
| 114 | 
            +
                  "rstrip": false,
         | 
| 115 | 
            +
                  "single_word": false,
         | 
| 116 | 
            +
                  "special": true
         | 
| 117 | 
            +
                },
         | 
| 118 | 
            +
                "151657": {
         | 
| 119 | 
            +
                  "content": "<tool_call>",
         | 
| 120 | 
            +
                  "lstrip": false,
         | 
| 121 | 
            +
                  "normalized": false,
         | 
| 122 | 
            +
                  "rstrip": false,
         | 
| 123 | 
            +
                  "single_word": false,
         | 
| 124 | 
            +
                  "special": false
         | 
| 125 | 
            +
                },
         | 
| 126 | 
            +
                "151658": {
         | 
| 127 | 
            +
                  "content": "</tool_call>",
         | 
| 128 | 
            +
                  "lstrip": false,
         | 
| 129 | 
            +
                  "normalized": false,
         | 
| 130 | 
            +
                  "rstrip": false,
         | 
| 131 | 
            +
                  "single_word": false,
         | 
| 132 | 
            +
                  "special": false
         | 
| 133 | 
            +
                },
         | 
| 134 | 
            +
                "151659": {
         | 
| 135 | 
            +
                  "content": "<|fim_prefix|>",
         | 
| 136 | 
            +
                  "lstrip": false,
         | 
| 137 | 
            +
                  "normalized": false,
         | 
| 138 | 
            +
                  "rstrip": false,
         | 
| 139 | 
            +
                  "single_word": false,
         | 
| 140 | 
            +
                  "special": false
         | 
| 141 | 
            +
                },
         | 
| 142 | 
            +
                "151660": {
         | 
| 143 | 
            +
                  "content": "<|fim_middle|>",
         | 
| 144 | 
            +
                  "lstrip": false,
         | 
| 145 | 
            +
                  "normalized": false,
         | 
| 146 | 
            +
                  "rstrip": false,
         | 
| 147 | 
            +
                  "single_word": false,
         | 
| 148 | 
            +
                  "special": false
         | 
| 149 | 
            +
                },
         | 
| 150 | 
            +
                "151661": {
         | 
| 151 | 
            +
                  "content": "<|fim_suffix|>",
         | 
| 152 | 
            +
                  "lstrip": false,
         | 
| 153 | 
            +
                  "normalized": false,
         | 
| 154 | 
            +
                  "rstrip": false,
         | 
| 155 | 
            +
                  "single_word": false,
         | 
| 156 | 
            +
                  "special": false
         | 
| 157 | 
            +
                },
         | 
| 158 | 
            +
                "151662": {
         | 
| 159 | 
            +
                  "content": "<|fim_pad|>",
         | 
| 160 | 
            +
                  "lstrip": false,
         | 
| 161 | 
            +
                  "normalized": false,
         | 
| 162 | 
            +
                  "rstrip": false,
         | 
| 163 | 
            +
                  "single_word": false,
         | 
| 164 | 
            +
                  "special": false
         | 
| 165 | 
            +
                },
         | 
| 166 | 
            +
                "151663": {
         | 
| 167 | 
            +
                  "content": "<|repo_name|>",
         | 
| 168 | 
            +
                  "lstrip": false,
         | 
| 169 | 
            +
                  "normalized": false,
         | 
| 170 | 
            +
                  "rstrip": false,
         | 
| 171 | 
            +
                  "single_word": false,
         | 
| 172 | 
            +
                  "special": false
         | 
| 173 | 
            +
                },
         | 
| 174 | 
            +
                "151664": {
         | 
| 175 | 
            +
                  "content": "<|file_sep|>",
         | 
| 176 | 
            +
                  "lstrip": false,
         | 
| 177 | 
            +
                  "normalized": false,
         | 
| 178 | 
            +
                  "rstrip": false,
         | 
| 179 | 
            +
                  "single_word": false,
         | 
| 180 | 
            +
                  "special": false
         | 
| 181 | 
            +
                },
         | 
| 182 | 
            +
                "151665": {
         | 
| 183 | 
            +
                  "content": "<tool_response>",
         | 
| 184 | 
            +
                  "lstrip": false,
         | 
| 185 | 
            +
                  "normalized": false,
         | 
| 186 | 
            +
                  "rstrip": false,
         | 
| 187 | 
            +
                  "single_word": false,
         | 
| 188 | 
            +
                  "special": false
         | 
| 189 | 
            +
                },
         | 
| 190 | 
            +
                "151666": {
         | 
| 191 | 
            +
                  "content": "</tool_response>",
         | 
| 192 | 
            +
                  "lstrip": false,
         | 
| 193 | 
            +
                  "normalized": false,
         | 
| 194 | 
            +
                  "rstrip": false,
         | 
| 195 | 
            +
                  "single_word": false,
         | 
| 196 | 
            +
                  "special": false
         | 
| 197 | 
            +
                },
         | 
| 198 | 
            +
                "151667": {
         | 
| 199 | 
            +
                  "content": "<think>",
         | 
| 200 | 
            +
                  "lstrip": false,
         | 
| 201 | 
            +
                  "normalized": false,
         | 
| 202 | 
            +
                  "rstrip": false,
         | 
| 203 | 
            +
                  "single_word": false,
         | 
| 204 | 
            +
                  "special": false
         | 
| 205 | 
            +
                },
         | 
| 206 | 
            +
                "151668": {
         | 
| 207 | 
            +
                  "content": "</think>",
         | 
| 208 | 
            +
                  "lstrip": false,
         | 
| 209 | 
            +
                  "normalized": false,
         | 
| 210 | 
            +
                  "rstrip": false,
         | 
| 211 | 
            +
                  "single_word": false,
         | 
| 212 | 
            +
                  "special": false
         | 
| 213 | 
            +
                },
         | 
| 214 | 
            +
                "151669": {
         | 
| 215 | 
            +
                  "content": "<img>",
         | 
| 216 | 
            +
                  "lstrip": false,
         | 
| 217 | 
            +
                  "normalized": false,
         | 
| 218 | 
            +
                  "rstrip": false,
         | 
| 219 | 
            +
                  "single_word": false,
         | 
| 220 | 
            +
                  "special": true
         | 
| 221 | 
            +
                },
         | 
| 222 | 
            +
                "151670": {
         | 
| 223 | 
            +
                  "content": "</img>",
         | 
| 224 | 
            +
                  "lstrip": false,
         | 
| 225 | 
            +
                  "normalized": false,
         | 
| 226 | 
            +
                  "rstrip": false,
         | 
| 227 | 
            +
                  "single_word": false,
         | 
| 228 | 
            +
                  "special": true
         | 
| 229 | 
            +
                },
         | 
| 230 | 
            +
                "151671": {
         | 
| 231 | 
            +
                  "content": "<IMG_CONTEXT>",
         | 
| 232 | 
            +
                  "lstrip": false,
         | 
| 233 | 
            +
                  "normalized": false,
         | 
| 234 | 
            +
                  "rstrip": false,
         | 
| 235 | 
            +
                  "single_word": false,
         | 
| 236 | 
            +
                  "special": true
         | 
| 237 | 
            +
                },
         | 
| 238 | 
            +
                "151672": {
         | 
| 239 | 
            +
                  "content": "<quad>",
         | 
| 240 | 
            +
                  "lstrip": false,
         | 
| 241 | 
            +
                  "normalized": false,
         | 
| 242 | 
            +
                  "rstrip": false,
         | 
| 243 | 
            +
                  "single_word": false,
         | 
| 244 | 
            +
                  "special": true
         | 
| 245 | 
            +
                },
         | 
| 246 | 
            +
                "151673": {
         | 
| 247 | 
            +
                  "content": "</quad>",
         | 
| 248 | 
            +
                  "lstrip": false,
         | 
| 249 | 
            +
                  "normalized": false,
         | 
| 250 | 
            +
                  "rstrip": false,
         | 
| 251 | 
            +
                  "single_word": false,
         | 
| 252 | 
            +
                  "special": true
         | 
| 253 | 
            +
                },
         | 
| 254 | 
            +
                "151674": {
         | 
| 255 | 
            +
                  "content": "<ref>",
         | 
| 256 | 
            +
                  "lstrip": false,
         | 
| 257 | 
            +
                  "normalized": false,
         | 
| 258 | 
            +
                  "rstrip": false,
         | 
| 259 | 
            +
                  "single_word": false,
         | 
| 260 | 
            +
                  "special": true
         | 
| 261 | 
            +
                },
         | 
| 262 | 
            +
                "151675": {
         | 
| 263 | 
            +
                  "content": "</ref>",
         | 
| 264 | 
            +
                  "lstrip": false,
         | 
| 265 | 
            +
                  "normalized": false,
         | 
| 266 | 
            +
                  "rstrip": false,
         | 
| 267 | 
            +
                  "single_word": false,
         | 
| 268 | 
            +
                  "special": true
         | 
| 269 | 
            +
                },
         | 
| 270 | 
            +
                "151676": {
         | 
| 271 | 
            +
                  "content": "<box>",
         | 
| 272 | 
            +
                  "lstrip": false,
         | 
| 273 | 
            +
                  "normalized": false,
         | 
| 274 | 
            +
                  "rstrip": false,
         | 
| 275 | 
            +
                  "single_word": false,
         | 
| 276 | 
            +
                  "special": true
         | 
| 277 | 
            +
                },
         | 
| 278 | 
            +
                "151677": {
         | 
| 279 | 
            +
                  "content": "</box>",
         | 
| 280 | 
            +
                  "lstrip": false,
         | 
| 281 | 
            +
                  "normalized": false,
         | 
| 282 | 
            +
                  "rstrip": false,
         | 
| 283 | 
            +
                  "single_word": false,
         | 
| 284 | 
            +
                  "special": true
         | 
| 285 | 
            +
                },
         | 
| 286 | 
            +
                "151678": {
         | 
| 287 | 
            +
                  "content": "<video>",
         | 
| 288 | 
            +
                  "lstrip": false,
         | 
| 289 | 
            +
                  "normalized": false,
         | 
| 290 | 
            +
                  "rstrip": false,
         | 
| 291 | 
            +
                  "single_word": false,
         | 
| 292 | 
            +
                  "special": true
         | 
| 293 | 
            +
                }
         | 
| 294 | 
            +
              },
         | 
| 295 | 
            +
              "additional_special_tokens": [
         | 
| 296 | 
            +
                "<|im_start|>",
         | 
| 297 | 
            +
                "<|im_end|>",
         | 
| 298 | 
            +
                "<|object_ref_start|>",
         | 
| 299 | 
            +
                "<|object_ref_end|>",
         | 
| 300 | 
            +
                "<|box_start|>",
         | 
| 301 | 
            +
                "<|box_end|>",
         | 
| 302 | 
            +
                "<|quad_start|>",
         | 
| 303 | 
            +
                "<|quad_end|>",
         | 
| 304 | 
            +
                "<|vision_start|>",
         | 
| 305 | 
            +
                "<|vision_end|>",
         | 
| 306 | 
            +
                "<|vision_pad|>",
         | 
| 307 | 
            +
                "<|image_pad|>",
         | 
| 308 | 
            +
                "<|video_pad|>",
         | 
| 309 | 
            +
                "<img>",
         | 
| 310 | 
            +
                "</img>",
         | 
| 311 | 
            +
                "<IMG_CONTEXT>",
         | 
| 312 | 
            +
                "<quad>",
         | 
| 313 | 
            +
                "</quad>",
         | 
| 314 | 
            +
                "<ref>",
         | 
| 315 | 
            +
                "</ref>",
         | 
| 316 | 
            +
                "<box>",
         | 
| 317 | 
            +
                "</box>"
         | 
| 318 | 
            +
              ],
         | 
| 319 | 
            +
              "bos_token": null,
         | 
| 320 | 
            +
              "clean_up_tokenization_spaces": false,
         | 
| 321 | 
            +
              "context_image_token": "<IMG_CONTEXT>",
         | 
| 322 | 
            +
              "end_image_token": "</img>",
         | 
| 323 | 
            +
              "eos_token": "<|im_end|>",
         | 
| 324 | 
            +
              "errors": "replace",
         | 
| 325 | 
            +
              "extra_special_tokens": {
         | 
| 326 | 
            +
                "context_image_token": "<IMG_CONTEXT>",
         | 
| 327 | 
            +
                "end_image_token": "</img>",
         | 
| 328 | 
            +
                "start_image_token": "<img>",
         | 
| 329 | 
            +
                "video_token": "<video>"
         | 
| 330 | 
            +
              },
         | 
| 331 | 
            +
              "model_max_length": 40960,
         | 
| 332 | 
            +
              "pad_token": "<|endoftext|>",
         | 
| 333 | 
            +
              "processor_class": "InternVLProcessor",
         | 
| 334 | 
            +
              "split_special_tokens": false,
         | 
| 335 | 
            +
              "start_image_token": "<img>",
         | 
| 336 | 
            +
              "tokenizer_class": "Qwen2Tokenizer",
         | 
| 337 | 
            +
              "unk_token": null,
         | 
| 338 | 
            +
              "video_token": "<video>"
         | 
| 339 | 
            +
            }
         | 
    	
        video_preprocessor_config.json
    ADDED
    
    | @@ -0,0 +1,70 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "_valid_kwargs_names": [
         | 
| 3 | 
            +
                "do_convert_rgb",
         | 
| 4 | 
            +
                "do_resize",
         | 
| 5 | 
            +
                "size",
         | 
| 6 | 
            +
                "size_divisor",
         | 
| 7 | 
            +
                "default_to_square",
         | 
| 8 | 
            +
                "resample",
         | 
| 9 | 
            +
                "do_rescale",
         | 
| 10 | 
            +
                "rescale_factor",
         | 
| 11 | 
            +
                "do_normalize",
         | 
| 12 | 
            +
                "image_mean",
         | 
| 13 | 
            +
                "image_std",
         | 
| 14 | 
            +
                "do_pad",
         | 
| 15 | 
            +
                "do_center_crop",
         | 
| 16 | 
            +
                "crop_size",
         | 
| 17 | 
            +
                "data_format",
         | 
| 18 | 
            +
                "input_data_format",
         | 
| 19 | 
            +
                "device"
         | 
| 20 | 
            +
              ],
         | 
| 21 | 
            +
              "crop_size": null,
         | 
| 22 | 
            +
              "data_format": "channels_first",
         | 
| 23 | 
            +
              "default_to_square": true,
         | 
| 24 | 
            +
              "device": null,
         | 
| 25 | 
            +
              "do_center_crop": null,
         | 
| 26 | 
            +
              "do_convert_rgb": true,
         | 
| 27 | 
            +
              "do_normalize": true,
         | 
| 28 | 
            +
              "do_pad": null,
         | 
| 29 | 
            +
              "do_rescale": true,
         | 
| 30 | 
            +
              "do_resize": true,
         | 
| 31 | 
            +
              "image_mean": [
         | 
| 32 | 
            +
                0.48145466,
         | 
| 33 | 
            +
                0.4578275,
         | 
| 34 | 
            +
                0.40821073
         | 
| 35 | 
            +
              ],
         | 
| 36 | 
            +
              "image_std": [
         | 
| 37 | 
            +
                0.26862954,
         | 
| 38 | 
            +
                0.26130258,
         | 
| 39 | 
            +
                0.27577711
         | 
| 40 | 
            +
              ],
         | 
| 41 | 
            +
              "input_data_format": null,
         | 
| 42 | 
            +
              "model_valid_processing_keys": [
         | 
| 43 | 
            +
                "do_convert_rgb",
         | 
| 44 | 
            +
                "do_resize",
         | 
| 45 | 
            +
                "size",
         | 
| 46 | 
            +
                "size_divisor",
         | 
| 47 | 
            +
                "default_to_square",
         | 
| 48 | 
            +
                "resample",
         | 
| 49 | 
            +
                "do_rescale",
         | 
| 50 | 
            +
                "rescale_factor",
         | 
| 51 | 
            +
                "do_normalize",
         | 
| 52 | 
            +
                "image_mean",
         | 
| 53 | 
            +
                "image_std",
         | 
| 54 | 
            +
                "do_pad",
         | 
| 55 | 
            +
                "do_center_crop",
         | 
| 56 | 
            +
                "crop_size",
         | 
| 57 | 
            +
                "data_format",
         | 
| 58 | 
            +
                "input_data_format",
         | 
| 59 | 
            +
                "device"
         | 
| 60 | 
            +
              ],
         | 
| 61 | 
            +
              "processor_class": "InternVLProcessor",
         | 
| 62 | 
            +
              "resample": 3,
         | 
| 63 | 
            +
              "rescale_factor": 0.00392156862745098,
         | 
| 64 | 
            +
              "size": {
         | 
| 65 | 
            +
                "height": 384,
         | 
| 66 | 
            +
                "width": 384
         | 
| 67 | 
            +
              },
         | 
| 68 | 
            +
              "size_divisor": null,
         | 
| 69 | 
            +
              "video_processor_type": "InternVLVideoProcessor"
         | 
| 70 | 
            +
            }
         | 
    	
        vocab.json
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 

