| from transformers import PreTrainedModel, PretrainedConfig | |
| from transformers.modeling_outputs import BaseModelOutput | |
| import torch | |
| import torch.nn as nn | |
| from models.model.transformer import Transformer | |
| from models.model.sparse_autoencoder import SparseAutoencoder | |
| class CustomConfig(PretrainedConfig): | |
| model_type = "custom_model" | |
| def __init__(self, hidden_size=768, num_attention_heads=12, num_hidden_layers=12, intermediate_size=3072, hidden_dropout_prob=0.1, num_act_classes=5, num_emotion_classes=7, **kwargs): | |
| super().__init__(**kwargs) | |
| self.hidden_size = hidden_size | |
| self.num_attention_heads = num_attention_heads | |
| self.num_hidden_layers = num_hidden_layers | |
| self.intermediate_size = intermediate_size | |
| self.hidden_dropout_prob = hidden_dropout_prob | |
| self.num_act_classes = num_act_classes | |
| self.num_emotion_classes = num_emotion_classes | |
| class CustomModel(PreTrainedModel): | |
| config_class = CustomConfig | |
| def __init__(self, config): | |
| super().__init__(config) | |
| self.transformer = Transformer( | |
| src_pad_idx=0, | |
| trg_pad_idx=0, | |
| trg_sos_idx=101, | |
| enc_voc_size=30522, | |
| dec_voc_size=30522, | |
| d_model=config.hidden_size, | |
| max_len=128, | |
| ffn_hidden=config.intermediate_size, | |
| n_head=config.num_attention_heads, | |
| n_layers=config.num_hidden_layers, | |
| drop_prob=config.hidden_dropout_prob, | |
| device='cuda' if torch.cuda.is_available() else 'cpu' | |
| ) | |
| self.batch_norm = nn.BatchNorm1d(config.hidden_size) | |
| self.dropout = nn.Dropout(p=config.hidden_dropout_prob) | |
| self.act_classifier = nn.Linear(config.hidden_size, config.num_act_classes) | |
| self.emotion_classifier = nn.Linear(config.hidden_size, config.num_emotion_classes) | |
| self.sparse_autoencoder = SparseAutoencoder( | |
| input_size=config.hidden_size, | |
| hidden_size=config.hidden_size // 2, | |
| sparsity_param=0.05, | |
| beta=3 | |
| ) | |
| self.init_weights() | |
| def forward(self, input_ids=None, attention_mask=None, **kwargs): | |
| transformer_output = self.transformer.encoder(input_ids, attention_mask) | |
| transformer_output = self.batch_norm(transformer_output.view(-1, transformer_output.size(-1))) | |
| transformer_output = self.dropout(transformer_output) | |
| reconstructed, kl_div, encoded = self.sparse_autoencoder(transformer_output) | |
| cls_output = reconstructed[:, 0, :] | |
| act_output = self.act_classifier(cls_output) | |
| emotion_output = self.emotion_classifier(cls_output) | |
| return BaseModelOutput(last_hidden_state=cls_output, act_output=act_output, emotion_output=emotion_output, kl_div=kl_div) | |