ionvop commited on
Commit
8182033
·
verified ·
1 Parent(s): f7f7a9d

Upload 6 files

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  models/Chiyu_v2_48k/voice.index filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  models/Chiyu_v2_48k/voice.index filter=lfs diff=lfs merge=lfs -text
37
+ models/added_IVF256_Flat_nprobe_1_Chiyu_v2_48k_v2.index filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # System deps
6
+ RUN apt-get update && apt-get install -y ffmpeg git && rm -rf /var/lib/apt/lists/*
7
+
8
+ # Python deps
9
+ COPY requirements.txt .
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ # Copy app + models
13
+ COPY app/ ./app/
14
+ COPY models/ ./models/
15
+
16
+ EXPOSE 7860
17
+
18
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/main.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ import tempfile
4
+ import subprocess
5
+ from fastapi import FastAPI, Form
6
+ from fastapi.responses import FileResponse
7
+ import edge_tts
8
+
9
+ app = FastAPI()
10
+
11
+ MODEL_PATH = "models/Chiyu_v2_48k.pth"
12
+ INDEX_PATH = "models/added_IVF256_Flat_nprobe_1_Chiyu_v2_48k_v2.index"
13
+
14
+
15
+ @app.post("/speak")
16
+ async def speak(text: str = Form(...)):
17
+ # Step 1: Generate TTS with edge-tts
18
+ tts = edge_tts.Communicate(text, voice="en-US-AriaNeural")
19
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_tts:
20
+ await tts.save(tmp_tts.name)
21
+ tts_path = tmp_tts.name
22
+
23
+ # Step 2: Run RVC conversion
24
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_out:
25
+ out_path = tmp_out.name
26
+
27
+ cmd = [
28
+ "python3",
29
+ "app/rvc_infer.py",
30
+ "--input", tts_path,
31
+ "--output", out_path,
32
+ "--model", MODEL_PATH,
33
+ "--index", INDEX_PATH,
34
+ ]
35
+ subprocess.run(cmd, check=True)
36
+
37
+ # Step 3: Return audio file
38
+ return FileResponse(out_path, media_type="audio/wav", filename="output.wav")
app/rvc_infer.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import torch
3
+ import librosa
4
+ import soundfile as sf
5
+ import numpy as np
6
+
7
+ # Core RVC imports
8
+ from rvc.model_infer import SynthesizerTrn
9
+ from rvc import utils
10
+ from rvc.modules.vc.pipeline import VC
11
+ import faiss
12
+
13
+
14
+ def main():
15
+ parser = argparse.ArgumentParser()
16
+ parser.add_argument("--input", required=True)
17
+ parser.add_argument("--output", required=True)
18
+ parser.add_argument("--model", required=True)
19
+ parser.add_argument("--index", required=True)
20
+ args = parser.parse_args()
21
+
22
+ # Load input audio
23
+ audio, sr = librosa.load(args.input, sr=48000)
24
+
25
+ # Initialize model
26
+ device = torch.device("cpu") # <- HF free tier is CPU only
27
+ vc = VC(args.model, args.index, device)
28
+
29
+ # Convert
30
+ converted, _ = vc.vc_single(
31
+ sid=0, # Speaker ID (default: 0)
32
+ input_audio=audio,
33
+ input_sr=sr,
34
+ f0_up_key=0, # Pitch shift (0 = none)
35
+ f0_method="crepe",# Pitch extractor ("pm", "harvest", "crepe")
36
+ index_rate=0.75, # Weight for index feature
37
+ filter_radius=3,
38
+ resample_sr=0,
39
+ rms_mix_rate=0.25,
40
+ protect=0.33,
41
+ )
42
+
43
+ # Save output
44
+ sf.write(args.output, converted, 48000)
45
+
46
+
47
+ if __name__ == "__main__":
48
+ main()
models/Chiyu_v2_48k.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69cafbdd228bcd96736f064fc7943d34fcccf8fd8cdf95ef1941559d1a577dfb
3
+ size 57581999
models/added_IVF256_Flat_nprobe_1_Chiyu_v2_48k_v2.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c5d3991205e84fd10517ed16ee1cdc738529845effb5e42813f926db6d842eb
3
+ size 31588619
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ edge-tts
4
+ librosa
5
+ soundfile
6
+ torch
7
+ torchaudio
8
+ faiss-cpu
9
+ numpy