Spaces:

patchbanks
/

pop_k

Sleeping

App Files Files Community

patchbanks commited on Jan 31, 2025

Commit

7deef83

1 Parent(s): 6438498

Upload 12 files

Browse files

Files changed (12) hide show

.gitattributes +3 -0
app.py +258 -0
model_run.py +376 -0
packages.txt +1 -0
requirements.txt +4 -0
sf2/.DS_Store +0 -0
sf2/piano.sf2 +3 -0
temp/.DS_Store +0 -0
temp/output.mid +0 -0
temp/output.wav +3 -0
temp/output_fx.wav +3 -0
utils.py +125 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+sf2/piano.sf2 filter=lfs diff=lfs merge=lfs -text
+temp/output_fx.wav filter=lfs diff=lfs merge=lfs -text
+temp/output.wav filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,258 @@

+from contextlib import nullcontext
+from torch.nn import functional as F
+from utils import TOKENIZER, Dataset
+from pedalboard import Pedalboard, Reverb, Compressor, Gain, Limiter
+from pedalboard.io import AudioFile
+import pandas as pd
+import subprocess
+import pretty_midi
+import gradio as gr
+import time
+import copy
+import types
+import torch
+import random
+import os
+torch.backends.cudnn.benchmark = True
+torch.backends.cudnn.allow_tf32 = True
+torch.backends.cuda.matmul.allow_tf32 = True
+in_space = os.getenv("SYSTEM") == "spaces"
+n_layer = 8
+n_embd = 768
+ctx_len = 1536
+top_k = 16
+os.environ['RWKV_FLOAT_MODE'] = 'fp32'
+os.environ['RWKV_RUN_DEVICE'] = 'cpu'
+model_type = 'RWKV'
+MODEL_NAME = 'model'
+LENGTH_PER_TRIAL = round((2000) / 13) * 13
+TEMPERATURE = 1.0
+from model_run import RWKV_RNN
+model = RWKV_RNN(MODEL_NAME, os.environ['RWKV_RUN_DEVICE'], model_type, n_layer, n_embd, ctx_len)
+tokenizer = TOKENIZER()
+temp_dir = 'temp'
+if not os.path.exists(temp_dir):
+    os.makedirs(temp_dir)
+def clear_midi(dir):
+    for file in os.listdir(dir):
+        if file.endswith('.mid'):
+            os.remove(os.path.join(dir, file))
+clear_midi(temp_dir)
+ctx_seed = "000000000000\n"
+ctx = tokenizer.encode(ctx_seed)
+src_len = len(ctx)
+src_ctx = ctx.copy()
+def generate_midi(LENGTH_PER_TRIAL, src_ctx, model, src_len, ctx_len, TEMPERATURE, top_k, tokenizer, ctx_seed, bpm):
+    midi_seq = []
+    for TRIAL in range(1):
+        t_begin = time.time_ns()
+        if TRIAL > 0:
+            midi_seq.append("\n")
+        ctx = src_ctx.copy()
+        model.clear()
+        midi_tokens = []
+        if TRIAL == 0:
+            init_state = types.SimpleNamespace()
+            for i in range(src_len):
+                x = ctx[:i+1]
+                if i == src_len - 1:
+                    init_state.out = model.run(x)
+                else:
+                    model.run(x)
+            model.save(init_state)
+        else:
+            model.load(init_state)
+        midi_seq.append(ctx_seed)
+        for i in range(src_len, src_len + LENGTH_PER_TRIAL):
+            x = ctx[:i+1]
+            x = x[-ctx_len:]
+            if i == src_len:
+                out = copy.deepcopy(init_state.out)
+            else:
+                out = model.run(x)
+            char = tokenizer.sample_logits(out, x, ctx_len, temperature=TEMPERATURE, top_k=top_k).item()
+            midi_tokens.append(char)
+            if len(midi_tokens) > 2:
+                midi_tokens.pop(0)
+            if midi_tokens == [11, 10]:  # stop token pattern
+                break
+            midi_seq.append(tokenizer.decode([int(char)]))
+            if midi_tokens != [11, 10]:
+                ctx += [char]
+        t_end = time.time_ns()
+    trim_seq = "".join(midi_seq)
+    events = trim_seq.split("\n")
+    midi_events = []
+    sequence = []
+    rndm_num = 895645
+    for event in events:
+        if event.strip() == "":
+            midi_events.append(sequence)
+            sequence = []
+            rndm_num = random.randint(100000, 999999)
+        try:
+            pitch = int(event[0:2])
+            velocity = int(event[2:4])
+            start = int(event[4:8])
+            end = int(event[8:12])
+        except ValueError:
+            pitch = 0
+            velocity = 0
+            start = 0
+            end = 0
+        sequence.append({'file_name': f'rwkv_{rndm_num}', 'pitch': pitch, 'velocity': velocity, 'start': start, 'end': end})
+    if sequence:
+        midi_events.append(sequence)
+    midi_events = pd.DataFrame([pd.Series(event) for sequence in midi_events for event in sequence])
+    midi_events = midi_events[['file_name', 'pitch', 'velocity', 'start', 'end']]
+    midi_events = midi_events.sort_values(by=['file_name', 'start']).reset_index(drop=True)
+    midi_events = midi_events[(midi_events['start'] < 3072) & (midi_events['end'] <= 3072)]
+    for file_name, events in midi_events.groupby('file_name'):
+        midi_obj = pretty_midi.PrettyMIDI(initial_tempo=bpm, resolution=96)
+        instrument = pretty_midi.Instrument(0)
+        midi_obj.instruments.append(instrument)
+        for _, event in events.iterrows():
+            note = pretty_midi.Note(
+                pitch=event['pitch'],
+                velocity=event['velocity'],
+                start=midi_obj.tick_to_time(event['start']),
+                end=midi_obj.tick_to_time(event['end'])
+            )
+            instrument.notes.append(note)
+        midi_path = os.path.join(temp_dir, 'output.mid')
+        midi_obj.write(midi_path)
+    return midi_path
+def render_wav(midi_file, uploaded_sf2=None):
+    sf2_dir = 'sf2'
+    audio_format = 's16'
+    sample_rate = '44100'
+    gain = '2.0'
+    if uploaded_sf2:
+        sf2_file = uploaded_sf2
+    else:
+        sf2_files = [f for f in os.listdir(os.path.join(sf2_dir)) if f.endswith('.sf2')]
+        if not sf2_files:
+            raise ValueError("No SoundFont (.sf2) file found in directory.")
+        sf2_file = os.path.join(sf2_dir, random.choice(sf2_files))
+    print(f"Using SoundFont: {sf2_file}")
+    output_wav = os.path.join(temp_dir, 'output.wav')
+    with open(os.devnull, 'w') as devnull:
+        command = [
+            'fluidsynth', '-ni', sf2_file, midi_file, '-F', output_wav, '-r', str(sample_rate),
+            '-o', f'audio.file.format={audio_format}', '-g', str(gain)
+        ]
+        subprocess.call(command, stdout=devnull, stderr=devnull)
+    return output_wav
+def generate_and_return_files(bpm, temperature, top_k, uploaded_sf2=None):
+    midi_events = generate_midi(
+        LENGTH_PER_TRIAL, src_ctx, model, src_len, ctx_len, temperature, top_k,
+        tokenizer, ctx_seed, bpm
+    )
+    midi_file = 'temp/output.mid'
+    wav_raw = render_wav(midi_file, uploaded_sf2)
+    wav_fx = os.path.join(temp_dir, 'output_fx.wav')
+    sfx_settings = [
+        {
+            'board': Pedalboard([
+                Reverb(room_size=0.50, wet_level=0.40, dry_level=0.70, width=1.0),
+                Compressor(threshold_db=-3.0, ratio=8.0, attack_ms=0.0, release_ms=300.0),
+            ])
+        }
+    ]
+    for setting in sfx_settings:
+        board = setting['board']
+        with AudioFile(wav_raw) as f:
+            with AudioFile(wav_fx, 'w', f.samplerate, f.num_channels) as o:
+                while f.tell() < f.frames:
+                    chunk = f.read(int(f.samplerate))
+                    effected = board(chunk, f.samplerate, reset=False)
+                    o.write(effected)
+    return midi_file, wav_fx
+custom_css = """
+#generate-btn {
+    background-color: #6366f1 !important;
+    color: white !important;
+    border: none !important;
+    font-size: 16px;
+    padding: 10px 20px;
+    border-radius: 5px;
+    cursor: pointer;
+}
+#generate-btn:hover {
+    background-color: #4f51c5 !important;
+}
+"""
+with gr.Blocks(css=custom_css, theme="soft") as iface:
+    gr.Markdown("<h1 style='font-weight: bold; text-align: center;'>Pop-K</h1>")
+    gr.Markdown("<p style='text-align:center;'>Pop-K is a small RWKV model that generates pop melodies in C major and A minor.</p>")
+    with gr.Row():
+        with gr.Column(scale=1):
+            bpm = gr.Slider(minimum=50, maximum=200, step=1, value=120, label="BPM")
+            temperature = gr.Slider(minimum=0.1, maximum=2.0, step=0.01, value=1.0, label="Temperature")
+            top_k = gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Top-K")
+        with gr.Column(scale=1):
+            midi_file = gr.File(label="MIDI File Output")
+            audio_file = gr.Audio(label="Generated Audio Output", type="filepath")
+            generate_button = gr.Button("Generate", elem_id="generate-btn")
+    generate_button.click(
+        fn=generate_and_return_files,
+        inputs=[bpm, temperature, top_k],
+        outputs=[midi_file, audio_file]
+    )
+iface.launch(share=True)

model_run.py ADDED Viewed

	@@ -0,0 +1,376 @@

+import types
+import copy
+import torch
+import math, os
+from torch.nn import functional as F
+import torch.nn as nn
+RWKV_HEAD_QK_DIM = 1536
+DEBUG_TIME = False
+if os.environ['RWKV_RUN_DEVICE'] == 'cuda':
+    T_MAX = 1536
+    from torch.utils.cpp_extension import load
+    wkv_cuda = load(name="wkv", sources=["cuda/wkv_op.cpp", "cuda/wkv_cuda.cu"],
+                    verbose=True, extra_cuda_cflags=['-res-usage', '--maxrregcount 60', '--use_fast_math', '-O3', '-Xptxas -O3', f'-DTmax={T_MAX}'])
+    class WKV(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, B, T, C, w, u, k, v):
+            ctx.B = B
+            ctx.T = T
+            ctx.C = C
+            assert T <= T_MAX
+            assert B * C % min(C, 1024) == 0
+            if '32' in os.environ['RWKV_FLOAT_MODE']:
+                w = -torch.exp(w.contiguous())
+                u = u.contiguous()
+                k = k.contiguous()
+                v = v.contiguous()
+            else:
+                w = -torch.exp(w.float().contiguous())
+                u = u.float().contiguous()
+                k = k.float().contiguous()
+                v = v.float().contiguous()
+            ctx.save_for_backward(w, u, k, v)
+            y = torch.empty((B, T, C), device='cuda', memory_format=torch.contiguous_format)
+            wkv_cuda.forward(B, T, C, w, u, k, v, y)
+            if '32' in os.environ['RWKV_FLOAT_MODE']:
+                return y
+            elif os.environ['RWKV_FLOAT_MODE'] == 'fp16':
+                return y.half()
+            elif os.environ['RWKV_FLOAT_MODE'] == 'bf16':
+                return y.bfloat16()
+        @staticmethod
+        def backward(ctx, gy):
+            B = ctx.B
+            T = ctx.T
+            C = ctx.C
+            assert T <= T_MAX
+            assert B * C % min(C, 1024) == 0
+            w, u, k, v = ctx.saved_tensors
+            gw = torch.zeros((B, C), device='cuda').contiguous()
+            gu = torch.zeros((B, C), device='cuda').contiguous()
+            gk = torch.zeros((B, T, C), device='cuda').contiguous()
+            gv = torch.zeros((B, T, C), device='cuda').contiguous()
+            if '32' in os.environ['RWKV_FLOAT_MODE']:
+                wkv_cuda.backward(B, T, C, w, u, k, v, gy.contiguous(), gw, gu, gk, gv)
+            else:
+                wkv_cuda.backward(B, T, C, w, u, k, v, gy.float().contiguous(), gw, gu, gk, gv)
+            gw = torch.sum(gw, dim=0)
+            gu = torch.sum(gu, dim=0)
+            if '32' in os.environ['RWKV_FLOAT_MODE']:
+                return (None, None, None, gw, gu, gk, gv)
+            elif os.environ['RWKV_FLOAT_MODE'] == 'fp16':
+                return (None, None, None, gw.half(), gu.half(), gk.half(), gv.half())
+            elif os.environ['RWKV_FLOAT_MODE'] == 'bf16':
+                return (None, None, None, gw.bfloat16(), gu.bfloat16(), gk.bfloat16(), gv.bfloat16())
+    def RUN_CUDA(B, T, C, w, u, k, v):
+        return WKV.apply(B, T, C, w.cuda(), u.cuda(), k.cuda(), v.cuda())
+RWKV_CFG = types.SimpleNamespace()
+class RWKV_ChannelMix(nn.Module):
+    def __init__(self, layer_id):
+        super().__init__()
+        self.layer_id = layer_id
+        self.time_shift = nn.ZeroPad2d((0,0,1,-1))
+        self.time_mix_k = nn.Parameter(torch.ones(1, 1, RWKV_CFG.n_embd))
+        self.time_mix_r = nn.Parameter(torch.ones(1, 1, RWKV_CFG.n_embd))
+        hidden_sz = 4 * RWKV_CFG.n_embd
+        self.key = nn.Linear(RWKV_CFG.n_embd, hidden_sz, bias=False)
+        self.receptance = nn.Linear(RWKV_CFG.n_embd, RWKV_CFG.n_embd, bias=False)
+        self.value = nn.Linear(hidden_sz, RWKV_CFG.n_embd, bias=False)
+    def forward(self, x):
+        xx = self.time_shift(x)
+        xk = x * self.time_mix_k + xx * (1 - self.time_mix_k)
+        xr = x * self.time_mix_r + xx * (1 - self.time_mix_r)
+        k = self.key(xk)
+        k = torch.square(torch.relu(k))
+        kv = self.value(k)
+        rkv = torch.sigmoid(self.receptance(xr)) * kv
+        return rkv
+class RWKV_TimeMix(nn.Module):
+    def __init__(self, layer_id):
+        super().__init__()
+        self.layer_id = layer_id
+        self.time_decay = nn.Parameter(torch.ones(RWKV_CFG.n_embd))
+        self.time_first = nn.Parameter(torch.ones(RWKV_CFG.n_embd) * math.log(0.3))
+        self.time_shift = nn.ZeroPad2d((0,0,1,-1))
+        self.time_mix_k = nn.Parameter(torch.ones(1,1,RWKV_CFG.n_embd))
+        self.time_mix_v = nn.Parameter(torch.ones(1,1,RWKV_CFG.n_embd))
+        self.time_mix_r = nn.Parameter(torch.ones(1,1,RWKV_CFG.n_embd))
+        self.key = nn.Linear(RWKV_CFG.n_embd, RWKV_CFG.n_embd, bias=False)
+        self.value = nn.Linear(RWKV_CFG.n_embd, RWKV_CFG.n_embd, bias=False)
+        self.receptance = nn.Linear(RWKV_CFG.n_embd, RWKV_CFG.n_embd, bias=False)
+        self.output = nn.Linear(RWKV_CFG.n_embd, RWKV_CFG.n_embd, bias=False)
+    def forward(self, x):
+        B, T, C = x.size()
+        xx = self.time_shift(x)
+        xk = x * self.time_mix_k + xx * (1 - self.time_mix_k)
+        xv = x * self.time_mix_v + xx * (1 - self.time_mix_v)
+        xr = x * self.time_mix_r + xx * (1 - self.time_mix_r)
+        k = self.key(xk)
+        v = self.value(xv)
+        r = self.receptance(xr)
+        rwkv = torch.sigmoid(r) * RUN_CUDA(B, T, C, self.time_decay, self.time_first, k, v)
+        rwkv = self.output(rwkv)
+        return rwkv
+class Block(nn.Module):
+    def __init__(self, layer_id):
+        super().__init__()
+        self.layer_id = layer_id
+        self.ln1 = nn.LayerNorm(RWKV_CFG.n_embd)
+        self.ln2 = nn.LayerNorm(RWKV_CFG.n_embd)
+        if self.layer_id == 0:
+            self.ln0 = nn.LayerNorm(RWKV_CFG.n_embd)
+        if self.layer_id == 0 and RWKV_CFG.model_type == 'RWKV-ffnPre':
+            self.ffnPre = RWKV_ChannelMix(layer_id+1000)
+        else:
+            self.att = RWKV_TimeMix(layer_id)
+        self.ffn = RWKV_ChannelMix(layer_id)
+    def forward(self, x):
+        if self.layer_id == 0:
+            x = self.ln0(x)
+        if self.layer_id == 0 and RWKV_CFG.model_type == 'RWKV-ffnPre':
+            x = x + self.ffnPre(self.ln1(x))
+        else:
+            x = x + self.att(self.ln1(x))
+        x = x + self.ffn(self.ln2(x))
+        return x
+class RWKV_GPT(nn.Module):
+    def __init__(self, MODEL_NAME, RUN_DEVICE, model_type, vocab_size, n_layer, n_embd, ctx_len):
+        global RWKV_CFG
+        super().__init__()
+        RWKV_CFG.RUN_DEVICE = RUN_DEVICE
+        RWKV_CFG.model_type = model_type
+        RWKV_CFG.vocab_size = vocab_size
+        RWKV_CFG.n_layer = n_layer
+        RWKV_CFG.n_embd = n_embd
+        RWKV_CFG.ctx_len = ctx_len
+        print('\nloading RWKV-GPT', MODEL_NAME)
+        self.emb = nn.Embedding(vocab_size, n_embd)
+        self.blocks = nn.Sequential(*[Block(i) for i in range(n_layer)])
+        self.ln_out = nn.LayerNorm(n_embd)
+        self.head = nn.Linear(n_embd, vocab_size, bias=False)
+        if RWKV_HEAD_QK_DIM > 0:
+            self.head_q = nn.Linear(n_embd, RWKV_HEAD_QK_DIM, bias=False)
+            self.head_q.scale_init = 0
+            self.head_k = nn.Linear(n_embd, RWKV_HEAD_QK_DIM, bias=False)
+            self.head_k.scale_init = 0.1
+            self.register_buffer("copy_mask", torch.tril(
+                torch.ones(ctx_len, ctx_len)))
+        self.ctx_len = ctx_len
+        self.eval()
+        self.load_state_dict(torch.load(MODEL_NAME + '.pth'))
+        self.eval()
+    def forward(self, idx):
+        B, T = idx.size()
+        assert T <= self.ctx_len, "Cannot forward, because len(input) > model ctx_len."
+        x = self.emb(idx)
+        x = self.blocks(x)
+        x = self.ln_out(x)
+        if RWKV_HEAD_QK_DIM > 0:
+            q = self.head_q(x)[:, :T, :]
+            k = self.head_k(x)[:, :T, :]
+            c = (q @ k.transpose(-2, -1)) * (1.0 / RWKV_HEAD_QK_DIM)
+            c = c.masked_fill(self.copy_mask[:T, :T] == 0, 0)
+            if '32' in os.environ['RWKV_FLOAT_MODE']:
+                c = c @ F.one_hot(idx, num_classes=RWKV_CFG.vocab_size)
+            elif os.environ['RWKV_FLOAT_MODE'] == 'fp16':
+                c = c @ F.one_hot(idx, num_classes=RWKV_CFG.vocab_size).half()
+            elif os.environ['RWKV_FLOAT_MODE'] == 'bf16':
+                c = c @ F.one_hot(idx, num_classes=RWKV_CFG.vocab_size).bfloat16()
+            x = self.head(x) + c
+        else:
+            x = self.head(x)
+        return x
+class RWKV_RNN():
+    def __init__(self, MODEL_NAME, RUN_DEVICE, model_type, n_layer, n_embd, ctx_len):
+        self.RUN_DEVICE = RUN_DEVICE
+        self.model_type = model_type
+        self.n_layer = n_layer
+        self.n_embd = n_embd
+        self.ctx_len = ctx_len
+        self.w = types.SimpleNamespace()
+        #w = torch.load(MODEL_NAME + '.pth',map_location=torch.device(RUN_DEVICE))
+        w = torch.load(MODEL_NAME + '.pth', map_location=torch.device(RUN_DEVICE), weights_only=True)
+        for x in w.keys():
+            w[x] = w[x].float()
+            if '.time_' in x:
+                w[x] = w[x].squeeze()
+            if '.time_decay' in x:
+                w[x] = -torch.exp(w[x])
+            if DEBUG_TIME and '.time_' in x:
+                print(x, w[x].squeeze().cpu().numpy())
+            xx = x.split('.')
+            here = self.w
+            for i in range(len(xx)):
+                if xx[i].isdigit():
+                    ii = int(xx[i])
+                    if ii not in here:
+                        here[ii] = types.SimpleNamespace()
+                    here = here[ii]
+                else:
+                    if i == len(xx) - 1:
+                        setattr(here, xx[i], w[x])
+                    elif not hasattr(here, xx[i]):
+                        if xx[i+1].isdigit():
+                            setattr(here, xx[i], {})
+                        else:
+                            setattr(here, xx[i], types.SimpleNamespace())
+                    here = getattr(here, xx[i])
+        self.clear()
+    def clear(self):
+        self.xx = {}
+        self.aa = {}
+        self.bb = {}
+        self.pp = {}
+        self.hk = None
+    def save(self, target):
+        target.xx = copy.deepcopy(self.xx)
+        target.aa = copy.deepcopy(self.aa)
+        target.bb = copy.deepcopy(self.bb)
+        target.pp = copy.deepcopy(self.pp)
+        target.hk = copy.deepcopy(self.hk)
+    def load(self, target):
+        self.xx = copy.deepcopy(target.xx)
+        self.aa = copy.deepcopy(target.aa)
+        self.bb = copy.deepcopy(target.bb)
+        self.pp = copy.deepcopy(target.pp)
+        self.hk = copy.deepcopy(target.hk)
+    def LN(self, xx, w):
+        return F.layer_norm(xx, (self.n_embd,), weight=w.weight, bias=w.bias)
+    def FF(self, xx, w, name):
+        if name not in self.xx:
+            self.xx[name] = torch.zeros(self.n_embd, device=self.RUN_DEVICE)
+        xk = xx * w.time_mix_k + self.xx[name] * (1 - w.time_mix_k)
+        xr = xx * w.time_mix_r + self.xx[name] * (1 - w.time_mix_r)
+        self.xx[name] = xx
+        r = torch.sigmoid(w.receptance.weight @ xr)
+        k = torch.square(torch.relu(w.key.weight @ xk))
+        kv = w.value.weight @ k
+        return r * kv
+    def SA(self, xx, w, name):
+        if name not in self.xx:
+            self.xx[name] = torch.zeros(self.n_embd, device=self.RUN_DEVICE)
+            self.aa[name] = torch.zeros(self.n_embd, device=self.RUN_DEVICE)
+            self.bb[name] = torch.zeros(self.n_embd, device=self.RUN_DEVICE)
+            self.pp[name] = torch.zeros(self.n_embd, device=self.RUN_DEVICE) - 1e30
+        xk = xx * w.time_mix_k + self.xx[name] * (1 - w.time_mix_k)
+        xv = xx * w.time_mix_v + self.xx[name] * (1 - w.time_mix_v)
+        xr = xx * w.time_mix_r + self.xx[name] * (1 - w.time_mix_r)
+        self.xx[name] = xx
+        r = torch.sigmoid(w.receptance.weight @ xr)
+        k = w.key.weight @ xk
+        v = w.value.weight @ xv
+        pp = self.pp[name]
+        aa = self.aa[name]
+        bb = self.bb[name]
+        ww = w.time_first + k
+        p = torch.maximum(pp, ww)
+        e1 = torch.exp(pp - p)
+        e2 = torch.exp(ww - p)
+        a = e1 * aa + e2 * v
+        b = e1 * bb + e2
+        ww = pp + w.time_decay
+        p = torch.maximum(ww, k)
+        e1 = torch.exp(ww - p)
+        e2 = torch.exp(k - p)
+        self.aa[name] = e1 * aa + e2 * v
+        self.bb[name] = e1 * bb + e2
+        self.pp[name] = p
+        rwkv = r * a / b
+        return w.output.weight @ rwkv
+    def run(self, ctx):
+        w = self.w
+        x = w.emb.weight[ctx[-1]]
+        for i in range(self.n_layer):
+            if i == 0:
+                x = self.LN(x, w.blocks[i].ln0)
+            if i == 0 and self.model_type == 'RWKV-ffnPre':
+                x = x + self.FF(self.LN(x, w.blocks[i].ln1), w.blocks[i].ffnPre, f'ffnPre.{i}')
+            else:
+                x = x + self.SA(self.LN(x, w.blocks[i].ln1), w.blocks[i].att, f'att.{i}')
+            x = x + self.FF(self.LN(x, w.blocks[i].ln2), w.blocks[i].ffn, f'ffn.{i}')
+        x = self.LN(x, w.ln_out)
+        if RWKV_HEAD_QK_DIM > 0:
+            if self.hk == None:
+                self.hk = (w.head_k.weight @ x).unsqueeze(0)
+            else:
+                self.hk = torch.cat(
+                    [self.hk, (w.head_k.weight @ x).unsqueeze(0)], dim=0)
+            if self.hk.shape[0] > self.ctx_len:
+                self.hk = self.hk[-self.ctx_len:, :]
+            q = w.head_q.weight @ x
+            x = w.head.weight @ x
+            x = x.cpu().numpy().tolist()
+            c = (self.hk @ q) / RWKV_HEAD_QK_DIM
+            for i in range(len(c)):
+                x[ctx[i]] += c[i]
+        else:
+            x = w.head.weight @ x
+            x = x.cpu().numpy().tolist()
+        return x

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ fluidsynth

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+pretty_midi==0.2.10
+pedalboard==0.9.3
+torch
+gradio

sf2/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

sf2/piano.sf2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39568d475db895ab5e372dfbb1611d90b4a267306595dd7d619e99c0816ae1f9
+size 74921906

temp/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

temp/output.mid ADDED Viewed

Binary file (256 Bytes). View file

temp/output.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae0bc1d9d63ad001452ef8414d0c91c3248867d126992d8f48c4276fb5cc0c36
+size 2823724

temp/output_fx.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60bb94895397db75ff8913f3e0aad7da1d7f917b18db71c3db19609944b8096d
+size 2823784

utils.py ADDED Viewed

	@@ -0,0 +1,125 @@

+from torch.nn import functional as F
+from torch.utils.data import Dataset
+import numpy as np
+import random
+import torch
+import re
+stoi = {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, '\n': 10, '000000000000': 11}
+itos = {0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8: '8', 9: '9', 10: '\n', 11: '000000000000'}
+tok_chars = re.compile(r'000000000000|\d{1}|\n')
+def encode(text, stoi, tokenizer):
+    matches = tokenizer.findall(text)
+    return [stoi[c] for c in matches if c in stoi]
+def decode(encoded, itos):
+    return ''.join([itos[i] for i in encoded])
+class Dataset:
+    def __init__(self, data, ctx_len, epoch_length_fixed, time_aug=True):
+        self.ctx_len = ctx_len
+        self.epoch_length_fixed = epoch_length_fixed
+        self.start_token = '000000000000'
+        self.tokenizer = tok_chars
+        self.stoi = stoi
+        self.itos = itos
+        self.vocab_size = len(stoi)
+        print('vocab size:', self.vocab_size)
+        self.data = encode(data, self.stoi, self.tokenizer)
+        self.data_size = len(self.data)
+        print(f'data has {self.data_size} tokens')
+    def __len__(self):
+        return self.epoch_length_fixed
+    def __getitem__(self, idx):
+        cues = []
+        idx_randm = random.randint(0, len(self.data) - (self.ctx_len) * 4)
+        i = idx_randm
+        while True:
+            if self.data[i] == self.stoi[self.start_token]:
+                cues = [i]
+                break
+            else:
+                i = (i + 1) % len(self.data)
+        if not cues:
+            return None
+        start_idx = cues[0]
+        dix = self.data[start_idx : start_idx + self.ctx_len + 2]
+        # 96 tick resolution
+        time_shift = [
+            [0, 0, 0, 0, 0, 7, 6, 8, 0, 7, 6, 8, 0],
+            [0, 0, 0, 0, 1, 5, 3, 6, 1, 5, 3, 6, 0],
+        ]
+        data_aug = random.choice([True, False])
+        t = dix[2:2 + self.ctx_len] # testing
+        if data_aug:
+            ts_rndm = random.choice(time_shift)
+            ts = ts_rndm * ((self.ctx_len - 1) // len(ts_rndm) + 1)
+            tsx = torch.tensor(ts[:self.ctx_len])
+            for j in reversed(range(len(t))):
+                if j % 13 not in range(2, 12):
+                    continue
+                aug_int = t[j] + tsx[j]
+                if aug_int >= 10 and (aug_int not in [10, 11] or j not in [9, 10]):
+                    left_int = aug_int // 10
+                    right_int = aug_int % 10
+                    if j > 0:
+                        t[j - 1] += left_int
+                    t[j] = right_int
+                else:
+                    t[j] = aug_int
+            x = t
+            y = t[1:] + [t[-1]]
+        else:
+            x = dix[:-1][:self.ctx_len]
+            y = dix[1:][:self.ctx_len]
+        x = torch.tensor(x, dtype=torch.int64)
+        y = torch.tensor(y, dtype=torch.int64)
+        return x, y
+class TOKENIZER():
+    def __init__(self):
+        self.tokenizer = tok_chars
+        self.stoi = stoi
+        self.itos = itos
+        self.vocab_size = len(self.stoi)
+    def encode(self, text):
+        matches = self.tokenizer.findall(text)
+        return [self.stoi[c] for c in matches if c in self.stoi]
+    def decode(self, encoded):
+        return ''.join([self.itos[i] for i in encoded])
+    def sample_logits(self, out, x, ctx_len, temperature=1.0, top_k=50):
+        probs = F.softmax(torch.tensor(out), dim=-1)
+        if top_k > 0:
+            top_k = min(top_k, probs.size(-1))
+            sorted_probs, sorted_indices = torch.topk(probs, top_k)
+            probs.fill_(0)
+            probs.scatter_(dim=-1, index=sorted_indices, src=sorted_probs)
+        if temperature != 1.0:
+            probs = probs.pow(1.0 / temperature)
+        return torch.multinomial(probs, num_samples=1)[0]