| |
| import torch |
| from torch import nn, optim |
| from torch.utils.data import Dataset, DataLoader |
| from transformers import AutoTokenizer |
| from evo_model import EvoDecoderModel |
|
|
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") |
|
|
| class TextDataset(Dataset): |
| def __init__(self, texts, tokenizer, max_len=512): |
| self.tokenizer = tokenizer |
| self.inputs = [tokenizer.encode(t, truncation=True, max_length=max_len, padding='max_length') for t in texts] |
|
|
| def __len__(self): |
| return len(self.inputs) |
|
|
| def __getitem__(self, idx): |
| x = torch.tensor(self.inputs[idx][:-1]) |
| y = torch.tensor(self.inputs[idx][1:]) |
| return x, y |
|
|
| |
| texts = [ |
| "User: How are you?\nAssistant: I'm doing well, thank you.", |
| "User: What is AI?\nAssistant: AI stands for artificial intelligence.", |
| |
| ] |
| dataset = TextDataset(texts, tokenizer) |
| loader = DataLoader(dataset, batch_size=2, shuffle=True) |
|
|
| |
| model = EvoDecoderModel(vocab_size=tokenizer.vocab_size, d_model=512).to(device) |
| optimizer = optim.AdamW(model.parameters(), lr=5e-5) |
| criterion = nn.CrossEntropyLoss() |
|
|
| |
| epochs = 5 |
| for epoch in range(epochs): |
| total_loss = 0 |
| model.train() |
| for x, y in loader: |
| x, y = x.to(device), y.to(device) |
| optimizer.zero_grad() |
| logits = model(x) |
| loss = criterion(logits.view(-1, logits.size(-1)), y.view(-1)) |
| loss.backward() |
| optimizer.step() |
| total_loss += loss.item() |
| print(f"Epoch {epoch+1} Loss: {total_loss/len(loader):.4f}") |
|
|
| torch.save(model.state_dict(), "evo_decoder_model.pt") |
| print("✅ Model saved to evo_decoder_model.pt") |
|
|