Spaces:
Build error
Build error
first commit
Browse files- app.py +143 -0
- gradio intro.mp3 +0 -0
- requirements.txt +91 -0
app.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
| 3 |
+
from datasets import load_dataset
|
| 4 |
+
from duckduckgo_search import DDGS
|
| 5 |
+
from newspaper import Article
|
| 6 |
+
import scipy
|
| 7 |
+
from transformers import (
|
| 8 |
+
MT5Tokenizer,
|
| 9 |
+
AdamW,
|
| 10 |
+
MT5ForConditionalGeneration,
|
| 11 |
+
pipeline
|
| 12 |
+
)
|
| 13 |
+
from transformers import VitsModel, AutoTokenizer
|
| 14 |
+
import IPython.display as ipd
|
| 15 |
+
import torch
|
| 16 |
+
import numpy as np
|
| 17 |
+
import gradio as gr
|
| 18 |
+
import os
|
| 19 |
+
|
| 20 |
+
class Webapp:
|
| 21 |
+
def __init__(self):
|
| 22 |
+
self.DEVICE = 0 if torch.cuda.is_available() else "cpu"
|
| 23 |
+
self.REF_MODEL = 'google/mt5-small'
|
| 24 |
+
self.MODEL_NAME = 'Ahmedasd/arabic-summarization-hhh-100-batches'
|
| 25 |
+
self.model_id = "openai/whisper-base"
|
| 26 |
+
self.tts_model_id = "SeyedAli/Arabic-Speech-synthesis"
|
| 27 |
+
self.tts_model = VitsModel.from_pretrained(self.tts_model_id).to(self.DEVICE)
|
| 28 |
+
self.tts_tokenizer = AutoTokenizer.from_pretrained(self.tts_model_id)
|
| 29 |
+
|
| 30 |
+
self.summ_tokenizer = MT5Tokenizer.from_pretrained(self.REF_MODEL)
|
| 31 |
+
self.summ_model = MT5ForConditionalGeneration.from_pretrained(self.MODEL_NAME).to(self.DEVICE)
|
| 32 |
+
|
| 33 |
+
self.torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
self.stt_model = WhisperForConditionalGeneration.from_pretrained(self.model_id)
|
| 38 |
+
self.stt_model.to(self.DEVICE)
|
| 39 |
+
|
| 40 |
+
self.processor = WhisperProcessor.from_pretrained(self.model_id)
|
| 41 |
+
self.forced_decoder_ids = self.processor.get_decoder_prompt_ids(language="arabic", task="transcribe")
|
| 42 |
+
def speech_to_text(self, input):
|
| 43 |
+
print('gradio audio type: ', type(input))
|
| 44 |
+
print('gradio audio: ', input)
|
| 45 |
+
new_sample_rate = 16000
|
| 46 |
+
new_length = int(len(input[1]) * new_sample_rate / 48000)
|
| 47 |
+
audio_sr_16000 = scipy.signal.resample(input[1], new_length)
|
| 48 |
+
print('input audio16000: ', audio_sr_16000)
|
| 49 |
+
input_features = self.processor(audio_sr_16000, sampling_rate=new_sample_rate, return_tensors="pt").input_features.to(self.DEVICE)
|
| 50 |
+
predicted_ids = self.stt_model.generate(input_features, forced_decoder_ids=self.forced_decoder_ids)
|
| 51 |
+
transcription = self.processor.batch_decode(predicted_ids, skip_special_tokens=True)
|
| 52 |
+
return transcription
|
| 53 |
+
def get_articles(self, query, num):
|
| 54 |
+
with DDGS(timeout=20) as ddgs:
|
| 55 |
+
try:
|
| 56 |
+
results = ddgs.news(query, max_results=num)
|
| 57 |
+
urls = [r['url'] for r in results]
|
| 58 |
+
print('successful connection!')
|
| 59 |
+
except Exception as error:
|
| 60 |
+
urls = ['https://www.bbc.com/arabic/media-65576589']
|
| 61 |
+
|
| 62 |
+
articles = []
|
| 63 |
+
for url in urls:
|
| 64 |
+
article = Article(url)
|
| 65 |
+
article.download()
|
| 66 |
+
article.parse()
|
| 67 |
+
articles.append(article.text.replace('\n',''))
|
| 68 |
+
return articles
|
| 69 |
+
def summarize(self, text, model):
|
| 70 |
+
text_encoding = self.summ_tokenizer(
|
| 71 |
+
text,
|
| 72 |
+
max_length=512,
|
| 73 |
+
padding='max_length',
|
| 74 |
+
truncation=True,
|
| 75 |
+
return_attention_mask=True,
|
| 76 |
+
add_special_tokens=True,
|
| 77 |
+
return_tensors='pt'
|
| 78 |
+
)
|
| 79 |
+
generated_ids = self.summ_model.generate(
|
| 80 |
+
input_ids=text_encoding['input_ids'].to(self.DEVICE),
|
| 81 |
+
attention_mask = text_encoding['attention_mask'].to(self.DEVICE),
|
| 82 |
+
max_length=128,
|
| 83 |
+
# num_beams=2,
|
| 84 |
+
repetition_penalty=2.5,
|
| 85 |
+
# length_penalty=1.0,
|
| 86 |
+
# early_stopping=True
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
preds = [self.summ_tokenizer.decode(gen_id, skip_special_tokens=True, clean_up_tokenization_spaces=True)
|
| 90 |
+
for gen_id in generated_ids
|
| 91 |
+
]
|
| 92 |
+
return "".join(preds)
|
| 93 |
+
def summarize_articles(self, articles: int, model):
|
| 94 |
+
summaries = []
|
| 95 |
+
for article in articles:
|
| 96 |
+
summaries.append(self.summarize(article, model))
|
| 97 |
+
return summaries
|
| 98 |
+
def text_to_speech(self, text):
|
| 99 |
+
inputs = self.tts_tokenizer(text, return_tensors="pt").to(self.DEVICE)
|
| 100 |
+
print('text_to_speech text: ', text)
|
| 101 |
+
with torch.no_grad():
|
| 102 |
+
wav = self.tts_model(**inputs).waveform
|
| 103 |
+
print('text_to_speech wav: ', wav)
|
| 104 |
+
return {'wav':wav, 'rate':self.tts_model.config.sampling_rate}
|
| 105 |
+
def topic_voice_to_summary_voices(self, topic_voice, number_articles):
|
| 106 |
+
topic = self.speech_to_text(topic_voice)
|
| 107 |
+
print('topic: ', topic)
|
| 108 |
+
articles = self.get_articles(topic, number_articles)
|
| 109 |
+
print('articles: ', articles)
|
| 110 |
+
summaries = self.summarize_articles(articles, self.summ_model)
|
| 111 |
+
print('summaries: ', summaries)
|
| 112 |
+
voices_wav_rate = [self.text_to_speech(summary) for summary in summaries]
|
| 113 |
+
|
| 114 |
+
return voices_wav_rate
|
| 115 |
+
def run(self):
|
| 116 |
+
with gr.Blocks(title = 'أخبار مسموعة', analytics_enabled=True, theme = gr.themes.Glass, css = 'dir: rtl;') as demo:
|
| 117 |
+
gr.Markdown(
|
| 118 |
+
"""
|
| 119 |
+
# أخبار مسموعة
|
| 120 |
+
اذكر الموضوع الذي تريد البحث عنه وسوف نخبرك بملخصات الأخبار بشأنه.
|
| 121 |
+
""", rtl = True)
|
| 122 |
+
intro_voice = gr.Audio(type='filepath', value = os.getcwd() + '/gradio intro.mp3', visible = False, autoplay = True)
|
| 123 |
+
topic_voice = gr.Audio(type="numpy", sources = 'microphone', label ='سجل موضوع للبحث')
|
| 124 |
+
num_articles = gr.Slider(minimum=1, maximum=10, value=1, step = 1, label = "عدد المقالات")
|
| 125 |
+
output_audio = gr.Audio(streaming = True, autoplay = True, label = 'الملخصات')
|
| 126 |
+
|
| 127 |
+
# Events
|
| 128 |
+
# generate summaries
|
| 129 |
+
@topic_voice.stop_recording(inputs = [topic_voice, num_articles], outputs = output_audio)
|
| 130 |
+
def get_summ_audio(topic_voice, num_articles):
|
| 131 |
+
summ_voices = self.topic_voice_to_summary_voices(topic_voice, num_articles)
|
| 132 |
+
m =15000
|
| 133 |
+
print('summ voices: ', summ_voices)
|
| 134 |
+
print('wav: ')
|
| 135 |
+
print('max: ', (np.array(summ_voices[0]['wav'][0].cpu()*m, dtype = np.int16)).max())
|
| 136 |
+
print('min: ', (np.array(summ_voices[0]['wav'][0].cpu()*m, dtype = np.int16)).min())
|
| 137 |
+
print('len: ', len(np.array(summ_voices[0]['wav'][0].cpu(), dtype = np.int16)))
|
| 138 |
+
summ_audio = [(voice['rate'], np.squeeze(np.array(voice['wav'].cpu()*m, dtype = np.int16))) for voice in summ_voices]
|
| 139 |
+
return summ_audio[0] #only first
|
| 140 |
+
return demo
|
| 141 |
+
|
| 142 |
+
app = Webapp()
|
| 143 |
+
app.run().launch()
|
gradio intro.mp3
ADDED
|
Binary file (69.6 kB). View file
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
traitlets==5.7.1
|
| 2 |
+
pygments==2.16.1
|
| 3 |
+
ptyprocess==0.7.0
|
| 4 |
+
pexpect==4.9.0
|
| 5 |
+
pathlib==1.0.1
|
| 6 |
+
decorator==4.4.2
|
| 7 |
+
pickleshare==0.7.5
|
| 8 |
+
backcall==0.2.0
|
| 9 |
+
wcwidth==0.2.12
|
| 10 |
+
platformdirs==4.1.0
|
| 11 |
+
ipykernel==5.5.6
|
| 12 |
+
tornado==6.3.2
|
| 13 |
+
six==1.16.0
|
| 14 |
+
setuptools==67.7.2
|
| 15 |
+
psutil==5.9.5
|
| 16 |
+
pyparsing==3.1.1
|
| 17 |
+
certifi==2023.11.17
|
| 18 |
+
httplib2==0.22.0
|
| 19 |
+
numpy==1.23.5
|
| 20 |
+
packaging==23.2
|
| 21 |
+
defusedxml==0.7.1
|
| 22 |
+
cffi==1.16.0
|
| 23 |
+
cycler==0.12.1
|
| 24 |
+
kiwisolver==1.4.5
|
| 25 |
+
debugpy==1.6.6
|
| 26 |
+
portpicker==1.5.2
|
| 27 |
+
astunparse==1.6.3
|
| 28 |
+
tqdm==4.66.1
|
| 29 |
+
mpmath==1.3.0
|
| 30 |
+
sympy==1.12
|
| 31 |
+
pydot==1.4.2
|
| 32 |
+
torch==2.1.0+cu121
|
| 33 |
+
urllib3==2.0.7
|
| 34 |
+
chardet==5.2.0
|
| 35 |
+
idna==3.6
|
| 36 |
+
requests==2.31.0
|
| 37 |
+
ipywidgets==7.7.1
|
| 38 |
+
pydantic==1.10.13
|
| 39 |
+
filelock==3.13.1
|
| 40 |
+
cloudpickle==2.2.1
|
| 41 |
+
etils==1.6.0
|
| 42 |
+
rich==13.7.0
|
| 43 |
+
transformers==4.35.2
|
| 44 |
+
tokenizers==0.15.0
|
| 45 |
+
safetensors==0.4.1
|
| 46 |
+
regex==2023.6.3
|
| 47 |
+
fsspec==2023.6.0
|
| 48 |
+
pytz==2023.3.post1
|
| 49 |
+
pyarrow==10.0.1
|
| 50 |
+
numexpr==2.8.8
|
| 51 |
+
pandas==1.5.3
|
| 52 |
+
soundfile==0.12.1
|
| 53 |
+
multidict==6.0.4
|
| 54 |
+
yarl==1.9.4
|
| 55 |
+
frozenlist==1.4.0
|
| 56 |
+
aiosignal==1.3.1
|
| 57 |
+
aiohttp==3.9.1
|
| 58 |
+
xxhash==3.4.1
|
| 59 |
+
lxml==4.9.3
|
| 60 |
+
soupsieve==2.5
|
| 61 |
+
webencodings==0.5.1
|
| 62 |
+
html5lib==1.1
|
| 63 |
+
scipy==1.11.4
|
| 64 |
+
wrapt==1.14.1
|
| 65 |
+
gast==0.5.4
|
| 66 |
+
termcolor==2.4.0
|
| 67 |
+
cryptography==41.0.7
|
| 68 |
+
cachetools==5.3.2
|
| 69 |
+
uritemplate==4.1.1
|
| 70 |
+
oauth2client==4.1.3
|
| 71 |
+
pyasn1==0.5.1
|
| 72 |
+
rsa==4.9
|
| 73 |
+
tblib==3.0.0
|
| 74 |
+
h5py==3.9.0
|
| 75 |
+
flatbuffers==23.5.26
|
| 76 |
+
joblib==1.3.2
|
| 77 |
+
threadpoolctl==3.2.0
|
| 78 |
+
sniffio==1.3.0
|
| 79 |
+
anyio==3.7.1
|
| 80 |
+
click==8.1.7
|
| 81 |
+
markupsafe==2.1.3
|
| 82 |
+
jinja2==3.1.2
|
| 83 |
+
attrs==23.1.0
|
| 84 |
+
referencing==0.32.0
|
| 85 |
+
webcolors==1.13
|
| 86 |
+
jsonschema==4.19.2
|
| 87 |
+
entrypoints==0.4
|
| 88 |
+
toolz==0.12.0
|
| 89 |
+
altair==4.2.2
|
| 90 |
+
mdurl==0.1.2
|
| 91 |
+
typer==0.9.0
|