Spaces:
Runtime error
Runtime error
Kottu
commited on
Update clipGPT.py
Browse files- clipGPT.py +16 -6
clipGPT.py
CHANGED
|
@@ -7,6 +7,7 @@ import torch
|
|
| 7 |
import torch.nn as nn
|
| 8 |
from torch.utils.data import Dataset, DataLoader
|
| 9 |
from torch.nn import functional as F
|
|
|
|
| 10 |
|
| 11 |
import pandas as pd
|
| 12 |
import numpy as np
|
|
@@ -16,19 +17,30 @@ import nltk
|
|
| 16 |
nltk.download('punkt')
|
| 17 |
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
class ClipGPT2Model(nn.Module):
|
| 20 |
def __init__(self, img_feature_length, img_feature_size = 512):
|
| 21 |
super(ClipGPT2Model, self).__init__()
|
| 22 |
-
torch.cuda.empty_cache()
|
| 23 |
-
gc.collect()
|
| 24 |
self.img_feature_length = img_feature_length
|
| 25 |
-
|
| 26 |
self.gpt = GPT2LMHeadModel.from_pretrained('gpt2')
|
| 27 |
self.gpt_embedding_size = self.gpt.transformer.wte.weight.shape[1]
|
| 28 |
self.clip_project = Adapter((img_feature_size,
|
| 29 |
(self.gpt_embedding_size * img_feature_length) // 2,
|
| 30 |
self.gpt_embedding_size * img_feature_length))
|
| 31 |
-
torch.cuda.empty_cache()
|
| 32 |
def get_dummy_token(self,
|
| 33 |
batch_size: int,
|
| 34 |
device: torch.device) -> torch.Tensor:
|
|
@@ -39,8 +51,6 @@ class ClipGPT2Model(nn.Module):
|
|
| 39 |
feature: torch.Tensor,
|
| 40 |
mask = None,
|
| 41 |
labels = None):
|
| 42 |
-
torch.cuda.empty_cache()
|
| 43 |
-
gc.collect()
|
| 44 |
|
| 45 |
embedding_text = self.gpt.transformer.wte(tokens)
|
| 46 |
feature_projections = self.clip_project(feature).view(-1, self.img_feature_length, self.gpt_embedding_size)
|
|
|
|
| 7 |
import torch.nn as nn
|
| 8 |
from torch.utils.data import Dataset, DataLoader
|
| 9 |
from torch.nn import functional as F
|
| 10 |
+
from typing import Tuple
|
| 11 |
|
| 12 |
import pandas as pd
|
| 13 |
import numpy as np
|
|
|
|
| 17 |
nltk.download('punkt')
|
| 18 |
|
| 19 |
|
| 20 |
+
class Adapter(nn.Module):
|
| 21 |
+
def forward(self, x):
|
| 22 |
+
return self.model(x)
|
| 23 |
+
|
| 24 |
+
def __init__(self, sizes: Tuple[int, ...], bias=True, act=nn.Tanh):
|
| 25 |
+
super(Adapter, self).__init__()
|
| 26 |
+
layers = []
|
| 27 |
+
for i in range(len(sizes) -1):
|
| 28 |
+
layers.append(nn.Linear(sizes[i], sizes[i + 1], bias=bias))
|
| 29 |
+
if i < len(sizes) - 2:
|
| 30 |
+
layers.append(act())
|
| 31 |
+
self.model = nn.Sequential(*layers)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
|
| 35 |
class ClipGPT2Model(nn.Module):
|
| 36 |
def __init__(self, img_feature_length, img_feature_size = 512):
|
| 37 |
super(ClipGPT2Model, self).__init__()
|
|
|
|
|
|
|
| 38 |
self.img_feature_length = img_feature_length
|
|
|
|
| 39 |
self.gpt = GPT2LMHeadModel.from_pretrained('gpt2')
|
| 40 |
self.gpt_embedding_size = self.gpt.transformer.wte.weight.shape[1]
|
| 41 |
self.clip_project = Adapter((img_feature_size,
|
| 42 |
(self.gpt_embedding_size * img_feature_length) // 2,
|
| 43 |
self.gpt_embedding_size * img_feature_length))
|
|
|
|
| 44 |
def get_dummy_token(self,
|
| 45 |
batch_size: int,
|
| 46 |
device: torch.device) -> torch.Tensor:
|
|
|
|
| 51 |
feature: torch.Tensor,
|
| 52 |
mask = None,
|
| 53 |
labels = None):
|
|
|
|
|
|
|
| 54 |
|
| 55 |
embedding_text = self.gpt.transformer.wte(tokens)
|
| 56 |
feature_projections = self.clip_project(feature).view(-1, self.img_feature_length, self.gpt_embedding_size)
|