ClementRomac
/

gia2-small

Transformers

gia2

custom_code

Model card Files Files and versions

xet

Community

ClementRomac HF Staff commited on Sep 28, 2023

Commit

e37f4c9

1 Parent(s): cc0f9c4

Upload config

Browse files

Files changed (2) hide show

config.json +53 -0
configuration_gia2.py +130 -0

config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+  "activation_function": "gelu_new",
+  "attention_dropout": 0.0,
+  "attention_layers": [
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local"
+  ],
+  "attention_types": [
+    [
+      [
+        "global",
+        "local"
+      ],
+      6
+    ]
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_gia2.Gia2Config"
+  },
+  "bos_token_id": 50256,
+  "classifier_dropout": 0.1,
+  "embed_dropout": 0.0,
+  "eos_token_id": 50256,
+  "hidden_size": 768,
+  "image_size": 224,
+  "initializer_range": 0.02,
+  "intermediate_size": null,
+  "layer_norm_epsilon": 1e-05,
+  "max_continuous_size": 377,
+  "max_discrete_value": 18,
+  "max_position_embeddings": 2048,
+  "model_type": "gia2",
+  "num_channels": 3,
+  "num_heads": 12,
+  "num_layers": 12,
+  "patch_size": 16,
+  "resid_dropout": 0.0,
+  "tokenizer_class": "GPT2Tokenizer",
+  "transformers_version": "4.29.1",
+  "use_cache": true,
+  "vocab_size": 50257,
+  "window_size": 256
+}

configuration_gia2.py ADDED Viewed

	@@ -0,0 +1,130 @@

+from transformers import GPTNeoConfig
+class Gia2Config(GPTNeoConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`Gia2Model`]. It is used to instantiate a Gia2
+    model according to the specified arguments, defining the model architecture. Instantiating a configuration with
+    the defaults will yield a similar configuration to that of the ... (TODO)
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+    Args:
+        vocab_size (`int`, *optional*, defaults to 50257):
+            Vocabulary size of the GPT Neo model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`GPTNeoModel`]. Vocabulary size of the model. Defines the different
+            tokens that can be represented by the *inputs_ids* passed to the forward method of [`GPTNeoModel`].
+        max_position_embeddings (`int`, *optional*, defaults to 2048):
+            The maximum sequence length that this model might ever be used with. Typically set this to something large
+            just in case (e.g., 512 or 1024 or 2048).
+        hidden_size (`int`, *optional*, defaults to 2048):
+            Dimensionality of the encoder layers and the pooler layer.
+        num_layers (`int`, *optional*, defaults to 24):
+            Number of hidden layers in the Transformer encoder.
+        attention_types (`List`, *optional*, defaults to `[[["global", "local"], 12]]`):
+            The type of attention for each layer in a `List` of the following format `[[["attention_type"],
+            num_layerss]]` e.g. for a 24 layer model `[[["global"], 24]]` or `[[["global", "local"], 12]]` Choose the
+            value of `attention_type` from `["global", "local"]`
+        num_heads (`int`, *optional*, defaults to 16):
+            Number of attention heads for each attention layer in the Transformer encoder.
+        intermediate_size (`int`, *optional*, defaults to 8192):
+            Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
+        window_size (`int`, *optional*, defaults to 256):
+            The size of the sliding window for local attention.
+        activation_function (`str` or `function`, *optional*, defaults to `"gelu_new"`):
+            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
+            `"relu"`, `"selu"` and `"gelu_new"` are supported.
+        resid_dropout (`float`, *optional*, defaults to 0.0):
+            Residual dropout used in the attention pattern.
+        embed_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for the attention probabilities.
+        classifier_dropout (`float`, *optional*, defaults to 0.1):
+            Argument used when doing token classification, used in the model [`GPTNeoForTokenClassification`]. The
+            dropout ratio for the hidden layer.
+        layer_norm_epsilon (`float`, *optional*, defaults to 1e-5):
+            The epsilon used by the layer normalization layers.
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values attentions (not used by all models). Only
+            relevant if `config.is_decoder=True`.
+        bos_token_id (`int`, *optional*, defaults to 50256):
+            The id of the beginning of sentence token in the vocabulary.
+        eos_token_id (`int`, *optional*, defaults to 50256):
+            The id of the end of sentence token in the vocabulary.
+        max_continuous_size (`int`, *optional*, default to 376):
+            The maximum size of the continuous values.
+        max_discrete_value (`int`, *optional*, default to 18):
+            The maximum value of the discrete values.
+        image_size (`int`, *optional*, defaults to 224):
+            The size (resolution) of each image.
+        patch_size (`int`, *optional*, defaults to 16):
+            The size (resolution) of each patch.
+        tokenizer_class (`str`, *optional*, defaults to `"GPT2Tokenizer"`):
+            The name of the tokenizer class to use.
+    """
+    model_type = "gia2"
+    def __init__(
+        self,
+        vocab_size=50257,
+        max_position_embeddings=2048,
+        hidden_size=2048,
+        num_layers=24,
+        attention_types=[[["global", "local"], 12]],
+        num_heads=16,
+        intermediate_size=None,
+        window_size=256,
+        activation_function="gelu_new",
+        resid_dropout=0.0,
+        embed_dropout=0.0,
+        attention_dropout=0.0,
+        classifier_dropout=0.1,
+        layer_norm_epsilon=1e-5,
+        initializer_range=0.02,
+        use_cache=True,
+        bos_token_id=50256,
+        eos_token_id=50256,
+        max_continuous_size=377,
+        max_discrete_value=18,
+        image_size=224,
+        num_channels=3,
+        patch_size=16,
+        tokenizer_class="GPT2Tokenizer",
+        **kwargs,
+    ):
+        super().__init__(
+            vocab_size,
+            max_position_embeddings,
+            hidden_size,
+            num_layers,
+            attention_types,
+            num_heads,
+            intermediate_size,
+            window_size,
+            activation_function,
+            resid_dropout,
+            embed_dropout,
+            attention_dropout,
+            classifier_dropout,
+            layer_norm_epsilon,
+            initializer_range,
+            use_cache,
+            bos_token_id,
+            eos_token_id,
+            tokenizer_class=tokenizer_class,
+            **kwargs,
+        )
+        self.max_continuous_size = max_continuous_size
+        self.max_discrete_value = max_discrete_value
+        self.image_size = image_size
+        self.num_channels = num_channels
+        self.patch_size = patch_size
+Gia2Config.register_for_auto_class()