File size: 1,060 Bytes
c92b796
 
7f9aa47
06e266d
c92b796
 
99db8ef
 
33eb4c0
 
99db8ef
c92b796
99db8ef
 
 
c92b796
 
1ec7848
58fe651
33eb4c0
 
 
 
 
 
 
c92b796
495f0fb
c92b796
 
7de6da2
c92b796
 
 
 
 
 
c634ab8
c92b796
 
33eb4c0
 
c97bccb
c92b796
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from fastapi import FastAPI, Response
import asyncio
import hashlib
import os
from tts_with_rvc import TTS_RVC


app = FastAPI()
CACHE_DIR = "cache"
os.makedirs(CACHE_DIR, exist_ok=True)


@app.get("/")
def read_root():
    return {"message": "Hello from FastAPI on Hugging Face Spaces 🚀"}


@app.get("/speak")
async def speak(text: str):
    text_hash = hashlib.sha256(text.encode("utf-8")).hexdigest()
    cache_path = os.path.join(CACHE_DIR, f"{text_hash}.wav")

    if os.path.exists(cache_path):
        with open(cache_path, "rb") as f:
            return Response(content=f.read(), media_type="audio/wav")
    
    tts = TTS_RVC(
        model_path="app/models/chu2.pth",
        voice="ja-JP-NanamiNeural",
        device="cpu",
        index_path="app/models/chu2.index",
        f0_method="pm"
    )

    path = tts(
        text=text,
        pitch=6,
        tts_rate=30
    )

    os.rename(path, cache_path)

    with open(cache_path, "rb") as f:
        audio_bytes = f.read()

    return Response(content=audio_bytes, media_type="audio/wav")