Upload folder using huggingface_hub
Browse files- .ipynb_checkpoints/config-checkpoint.py +42 -0
- .ipynb_checkpoints/model-checkpoint.py +16 -0
- __pycache__/config.cpython-311.pyc +0 -0
- __pycache__/model.cpython-311.pyc +0 -0
- config.json +6 -0
- config.py +42 -0
- model.py +16 -0
.ipynb_checkpoints/config-checkpoint.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import PretrainedConfig
|
| 2 |
+
|
| 3 |
+
class CSDConfig(PretrainedConfig):
|
| 4 |
+
model_type = "CSDModel"
|
| 5 |
+
|
| 6 |
+
def __init__(
|
| 7 |
+
self,
|
| 8 |
+
attention_dropout:float=0.0,
|
| 9 |
+
dropout:float=0.0,
|
| 10 |
+
hidden_act:str= "quick_gelu",
|
| 11 |
+
hidden_size:int= 1024,
|
| 12 |
+
image_size:int= 224,
|
| 13 |
+
initializer_factor:float= 1.0,
|
| 14 |
+
initializer_range:float=0.02,
|
| 15 |
+
intermediate_size:int=4096,
|
| 16 |
+
layer_norm_eps:float=1e-05,
|
| 17 |
+
num_attention_heads:int=16,
|
| 18 |
+
num_channels:int=3,
|
| 19 |
+
num_hidden_layers:int=24,
|
| 20 |
+
patch_size:int= 14,
|
| 21 |
+
projection_dim:int=768,
|
| 22 |
+
style_projection_dim:int=768,
|
| 23 |
+
content_projection_dim:int=768,
|
| 24 |
+
**kwargs,
|
| 25 |
+
):
|
| 26 |
+
super().__init__(**kwargs)
|
| 27 |
+
self.attention_dropout=attention_dropout
|
| 28 |
+
self.dropout=dropout
|
| 29 |
+
self.hidden_act=hidden_act
|
| 30 |
+
self.hidden_size=hidden_size
|
| 31 |
+
self.image_size=image_size
|
| 32 |
+
self.initializer_factor=initializer_factor
|
| 33 |
+
self.initializer_range=initializer_range
|
| 34 |
+
self.intermediate_size=intermediate_size
|
| 35 |
+
self.layer_norm_eps=layer_norm_eps
|
| 36 |
+
self.num_attention_heads=num_attention_heads
|
| 37 |
+
self.num_channels=num_channels
|
| 38 |
+
self.num_hidden_layers=num_hidden_layers
|
| 39 |
+
self.patch_size=patch_size
|
| 40 |
+
self.projection_dim=projection_dim
|
| 41 |
+
self.style_projection_dim=style_projection_dim
|
| 42 |
+
self.content_projection_dim=content_projection_dim
|
.ipynb_checkpoints/model-checkpoint.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch.nn as nn
|
| 2 |
+
from .config import CSDConfig
|
| 3 |
+
from transformers import PreTrainedModel, CLIPVisionModel
|
| 4 |
+
|
| 5 |
+
class CSDModel(PreTrainedModel):
|
| 6 |
+
config_class = CSDConfig
|
| 7 |
+
def __init__(self, config: CSDConfig):
|
| 8 |
+
super().__init__(config)
|
| 9 |
+
self.backbone = CLIPVisionModel(config)
|
| 10 |
+
self.out_style = nn.Linear(config.hidden_size, config.style_projection_dim, bias=False)
|
| 11 |
+
self.out_content = nn.Linear(config.hidden_size, config.content_projection_dim, bias=False)
|
| 12 |
+
|
| 13 |
+
def forward(self, pixel_values):
|
| 14 |
+
features = self.backbone(pixel_values)
|
| 15 |
+
style_embeds = self.out_style(features)
|
| 16 |
+
content_embeds = self.out_content(features)
|
__pycache__/config.cpython-311.pyc
ADDED
|
Binary file (2.07 kB). View file
|
|
|
__pycache__/model.cpython-311.pyc
ADDED
|
Binary file (1.7 kB). View file
|
|
|
config.json
CHANGED
|
@@ -1,8 +1,13 @@
|
|
| 1 |
{
|
|
|
|
| 2 |
"architectures": [
|
| 3 |
"CSDModel"
|
| 4 |
],
|
| 5 |
"attention_dropout": 0.0,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
"content_projection_dim": 768,
|
| 7 |
"dropout": 0.0,
|
| 8 |
"hidden_act": "quick_gelu",
|
|
@@ -12,6 +17,7 @@
|
|
| 12 |
"initializer_range": 0.02,
|
| 13 |
"intermediate_size": 4096,
|
| 14 |
"layer_norm_eps": 1e-05,
|
|
|
|
| 15 |
"num_attention_heads": 16,
|
| 16 |
"num_channels": 3,
|
| 17 |
"num_hidden_layers": 24,
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "./",
|
| 3 |
"architectures": [
|
| 4 |
"CSDModel"
|
| 5 |
],
|
| 6 |
"attention_dropout": 0.0,
|
| 7 |
+
"auto_map": {
|
| 8 |
+
"AutoConfig": "config.CSDConfig",
|
| 9 |
+
"AutoModel": "model.CSDModel"
|
| 10 |
+
},
|
| 11 |
"content_projection_dim": 768,
|
| 12 |
"dropout": 0.0,
|
| 13 |
"hidden_act": "quick_gelu",
|
|
|
|
| 17 |
"initializer_range": 0.02,
|
| 18 |
"intermediate_size": 4096,
|
| 19 |
"layer_norm_eps": 1e-05,
|
| 20 |
+
"model_type": "CSDModel",
|
| 21 |
"num_attention_heads": 16,
|
| 22 |
"num_channels": 3,
|
| 23 |
"num_hidden_layers": 24,
|
config.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import PretrainedConfig
|
| 2 |
+
|
| 3 |
+
class CSDConfig(PretrainedConfig):
|
| 4 |
+
model_type = "CSDModel"
|
| 5 |
+
|
| 6 |
+
def __init__(
|
| 7 |
+
self,
|
| 8 |
+
attention_dropout:float=0.0,
|
| 9 |
+
dropout:float=0.0,
|
| 10 |
+
hidden_act:str= "quick_gelu",
|
| 11 |
+
hidden_size:int= 1024,
|
| 12 |
+
image_size:int= 224,
|
| 13 |
+
initializer_factor:float= 1.0,
|
| 14 |
+
initializer_range:float=0.02,
|
| 15 |
+
intermediate_size:int=4096,
|
| 16 |
+
layer_norm_eps:float=1e-05,
|
| 17 |
+
num_attention_heads:int=16,
|
| 18 |
+
num_channels:int=3,
|
| 19 |
+
num_hidden_layers:int=24,
|
| 20 |
+
patch_size:int= 14,
|
| 21 |
+
projection_dim:int=768,
|
| 22 |
+
style_projection_dim:int=768,
|
| 23 |
+
content_projection_dim:int=768,
|
| 24 |
+
**kwargs,
|
| 25 |
+
):
|
| 26 |
+
super().__init__(**kwargs)
|
| 27 |
+
self.attention_dropout=attention_dropout
|
| 28 |
+
self.dropout=dropout
|
| 29 |
+
self.hidden_act=hidden_act
|
| 30 |
+
self.hidden_size=hidden_size
|
| 31 |
+
self.image_size=image_size
|
| 32 |
+
self.initializer_factor=initializer_factor
|
| 33 |
+
self.initializer_range=initializer_range
|
| 34 |
+
self.intermediate_size=intermediate_size
|
| 35 |
+
self.layer_norm_eps=layer_norm_eps
|
| 36 |
+
self.num_attention_heads=num_attention_heads
|
| 37 |
+
self.num_channels=num_channels
|
| 38 |
+
self.num_hidden_layers=num_hidden_layers
|
| 39 |
+
self.patch_size=patch_size
|
| 40 |
+
self.projection_dim=projection_dim
|
| 41 |
+
self.style_projection_dim=style_projection_dim
|
| 42 |
+
self.content_projection_dim=content_projection_dim
|
model.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch.nn as nn
|
| 2 |
+
from .config import CSDConfig
|
| 3 |
+
from transformers import PreTrainedModel, CLIPVisionModel
|
| 4 |
+
|
| 5 |
+
class CSDModel(PreTrainedModel):
|
| 6 |
+
config_class = CSDConfig
|
| 7 |
+
def __init__(self, config: CSDConfig):
|
| 8 |
+
super().__init__(config)
|
| 9 |
+
self.backbone = CLIPVisionModel(config)
|
| 10 |
+
self.out_style = nn.Linear(config.hidden_size, config.style_projection_dim, bias=False)
|
| 11 |
+
self.out_content = nn.Linear(config.hidden_size, config.content_projection_dim, bias=False)
|
| 12 |
+
|
| 13 |
+
def forward(self, pixel_values):
|
| 14 |
+
features = self.backbone(pixel_values)
|
| 15 |
+
style_embeds = self.out_style(features)
|
| 16 |
+
content_embeds = self.out_content(features)
|