Spaces:

arcAman07
/

KanyeGEN

Runtime error

App Files Files Community

arcAman07 commited on Apr 10, 2023

Commit

9d1893a

1 Parent(s): a3c6bef

added entire model

Browse files

Files changed (4) hide show

app.py +57 -0
model.py +119 -0
requirements.txt +1 -0
train.py +83 -0

app.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import gradio as gr
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from model import Transformer
+# hyperparameters
+batch_size = 16 # how many independent sequences will we process in parallel?
+block_size = 64 # what is the maximum context length for predictions?
+max_iters = 5000
+eval_interval = 100
+learning_rate = 1e-3
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+eval_iters = 200
+n_embd = 128
+n_head = 8
+n_layer = 4
+dropout = 0.0
+vocab = 101
+# ------------
+with open('/Users/deepaksharma/Documents/Python/Kaggle/GenerateKanyeLyrics/Kanye West Lyrics.txt','r',encoding='utf-8') as f:
+    text = f.read()
+chars = sorted(list(set(text)))
+stoi = {ch:i for i,ch in enumerate(chars)}
+itos = {i:ch for i,ch in enumerate(chars)}
+encode = lambda s: [stoi[c] for c in s]
+decode = lambda l: ''.join([itos[c] for c in l])
+model = Transformer(n_embd,n_layer)
+model.load_state_dict(torch.load('model_weights.pth'))
+model.eval()
+def generate_kanye_lyrics(text, max_tokens=500):
+    if len(text)<64:
+        initial_text = ""
+        padding = 64-len(text)
+        initial_list = []
+        for i in range(0, padding):
+            initial_list.append(0)
+        context = initial_list + encode(text)
+    else:
+        padding = 0
+        initial_text = text[0:len(text)-block_size]
+        context = text[-block_size:]
+        context = encode(context)
+    context = torch.tensor(context, dtype=torch.long)
+    lyrics = torch.stack([context for _ in range(1)], dim=0)
+    return initial_text + decode(model.generate(lyrics, max_tokens=int(max_tokens))[0].tolist())[padding:]
+demo = gr.Interface(fn=generate_kanye_lyrics, inputs=[gr.Textbox(lines=2, placeholder="Enter Starting lyrics ..."),gr.Number()], outputs="text")
+demo.launch()

model.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# hyperparameters
+batch_size = 16 # how many independent sequences will we process in parallel?
+block_size = 64 # what is the maximum context length for predictions?
+max_iters = 5000
+eval_interval = 100
+learning_rate = 1e-3
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+eval_iters = 200
+n_embd = 128
+n_head = 8
+n_layer = 4
+dropout = 0.0
+vocab = 101
+# ------------
+class Head(nn.Module):
+    def __init__(self, head_size):
+        super(Head,self).__init__()
+        self.head_size = head_size
+        self.dropout = nn.Dropout(dropout)
+        self.key = nn.Linear(n_embd, head_size, bias=False)
+        self.query = nn.Linear(n_embd, head_size, bias=False)
+        self.value = nn.Linear(n_embd, head_size, bias=False)
+        self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
+    def forward(self,x):
+        k = self.key(x)
+        q = self.query(x)
+        wei = q @ k.transpose(-2,-1) * (self.head_size ** -0.5)
+        wei = wei.masked_fill(self.tril == 0, float('-inf'))
+        wei = F.softmax(wei, dim=-1)
+        wei = self.dropout(wei)
+        v = self.value(x)
+        out = wei @ v
+        return out
+class MultiHeadAttention(nn.Module):
+    def __init__(self, n_head, head_size):
+        super(MultiHeadAttention,self).__init__()
+        self.head_size = head_size
+        self.n_head = n_head
+        self.heads = nn.ModuleList([Head(head_size) for _ in range(n_head)])
+        self.out = nn.Linear(n_embd, n_embd)
+        self.dropout = nn.Dropout(dropout)
+    def forward(self,x):
+        out = torch.cat([h(x) for h in self.heads], dim=-1)
+        out = self.out(out)
+        out = self.dropout(out)
+        return out
+class FeedForwardLayer(nn.Module):
+    def __init__(self, n_embd):
+        super(FeedForwardLayer, self).__init__()
+        self.n_embd = n_embd
+        self.fc1 = nn.Linear(n_embd, 4*n_embd)
+        self.fc2 = nn.Linear(4*n_embd,n_embd)
+        self.dropout = nn.Dropout(dropout)
+    def forward(self, x):
+        out = self.fc1(x)
+        out = F.gelu(out)
+        out = self.fc2(out)
+        out = self.dropout(out)
+        return out
+class Block(nn.Module):
+    def __init__(self):
+        super(Block, self).__init__()
+        self.attn = MultiHeadAttention(n_head, n_embd // n_head)
+        self.ff = FeedForwardLayer(n_embd)
+        self.ln1 = nn.LayerNorm(n_embd)
+        self.ln2 = nn.LayerNorm(n_embd)
+    def forward(self,x):
+        x = x + self.attn(self.ln1(x))
+        x = x + self.ff(self.ln2(x))
+        return x
+class Transformer(nn.Module):
+    def __init__(self, n_embd, n_layer):
+        super(Transformer, self).__init__()
+        self.n_embd = n_embd
+        self.n_layer = n_layer
+        self.token_embedding = nn.Embedding(vocab, n_embd)
+        self.position_embedding = nn.Embedding(block_size,n_embd)
+        self.blocks = nn.Sequential(*[Block() for _ in range(n_layer)])
+        self.ln_f = nn.LayerNorm(n_embd) # final layer norm
+        self.ffwd = nn.Linear(n_embd, vocab)
+    def forward(self, idx, targets=None):
+        B,T = idx.shape
+        x = self.token_embedding(idx) + self.position_embedding(torch.arange(T, device=idx.device))
+        x = self.blocks(x)
+        x = self.ln_f(x)
+        logits = self.ffwd(x)
+        if targets is None:
+            loss = None
+        else:
+            B,T,C = logits.shape
+            logits = logits.view(B*T, C)
+            targets = targets.view(B*T)
+            loss = F.cross_entropy(logits, targets, ignore_index=0)
+        return logits,loss
+    def generate(self, idx, max_tokens):
+        for _ in range(max_tokens):
+            idx_cond = idx[:, -block_size:]
+            logits, _ = self(idx_cond)
+            logits = logits[:,-1,:]
+            probs = F.softmax(logits, dim=-1)
+            idx_next = torch.multinomial(probs, num_samples=1)
+            idx = torch.cat([idx, idx_next], dim=-1)
+        return idx
+print(torch. __version__ )

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ torch==1.13.0

train.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from model import Transformer
+with open('/Users/deepaksharma/Documents/Python/Kaggle/GenerateKanyeLyrics/Kanye West Lyrics.txt','r',encoding='utf-8') as f:
+    text = f.read()
+chars = sorted(list(set(text)))
+stoi = {ch:i for i,ch in enumerate(chars)}
+itos = {i:ch for i,ch in enumerate(chars)}
+encode = lambda s: [stoi[c] for c in s]
+decode = lambda l: ''.join([itos[c] for c in l])
+data = torch.tensor(encode(text), dtype=torch.long)
+n = int(0.9*len(text))
+train_data = data[:n]
+val_data = data[n:]
+def get_batch(split):
+    if split == 'train':
+        data = train_data
+    elif split == 'val':
+        data = val_data
+    else:
+        raise ValueError("Invalid split")
+    ix = torch.randint(len(data)-block_size,(batch_size,))
+    x = torch.stack([data[i:i+block_size] for i in ix])
+    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
+    return x, y
+# hyperparameters
+batch_size = 16 # how many independent sequences will we process in parallel?
+block_size = 64 # what is the maximum context length for predictions?
+max_iters = 5000
+eval_interval = 100
+learning_rate = 1e-3
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+eval_iters = 200
+n_embd = 128
+n_head = 8
+n_layer = 4
+dropout = 0.0
+vocab = len(chars)
+# ------------
+model = Transformer(n_embd,n_layer)
+print("Total params: ", sum(p.numel() for p in model.parameters()))
+optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
+for steps in range(20000):
+    x,y = get_batch('train')
+    logits, loss = model(x, y)
+    optimizer.zero_grad()
+    loss.backward()
+    optimizer.step()
+    if steps % 100 == 0:
+        print("Step: ", steps, " Loss: ", loss.item())
+# Print model's state_dict
+print("Model's state_dict:")
+for param_tensor in model.state_dict():
+    print(param_tensor, "\t", model.state_dict()[param_tensor].size())
+# Print optimizer's state_dict
+print("Optimizer's state_dict:")
+for var_name in optimizer.state_dict():
+    print(var_name, "\t", optimizer.state_dict()[var_name])
+torch.save(model.state_dict(), 'kanye_weights.pth')
+lyrics = encode("Bitch I am back on my comma , sipping on my CocaCola, driving on a hangover ")
+lyrics = torch.tensor(lyrics, dtype=torch.long)
+lyrics = torch.stack([lyrics for _ in range(1)], dim=0)
+print(decode(model.generate(lyrics, max_tokens=1000)[0].tolist()))