Update functions.py
Browse files- functions.py +7 -2
functions.py
CHANGED
|
@@ -132,7 +132,7 @@ def load_models():
|
|
| 132 |
ner_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english")
|
| 133 |
emb_tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-xl')
|
| 134 |
sent_pipe = pipeline("text-classification",model=q_model, tokenizer=q_tokenizer)
|
| 135 |
-
sum_pipe = pipeline("summarization",model="philschmid/flan-t5-base-samsum")
|
| 136 |
ner_pipe = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, grouped_entities=True)
|
| 137 |
cross_encoder = CrossEncoder('cross-encoder/mmarco-mMiniLMv2-L12-H384-v1') #cross-encoder/ms-marco-MiniLM-L-12-v2
|
| 138 |
sbert = SentenceTransformer('all-MiniLM-L6-v2')
|
|
@@ -366,7 +366,12 @@ def chunk_and_preprocess_text(text, model_name= 'philschmid/flan-t5-base-samsum'
|
|
| 366 |
def summarize_text(text_to_summarize,max_len,min_len):
|
| 367 |
'''Summarize text with HF model'''
|
| 368 |
|
| 369 |
-
summarized_text = sum_pipe(text_to_summarize,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
| 371 |
|
| 372 |
return summarized_text
|
|
|
|
| 132 |
ner_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english")
|
| 133 |
emb_tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-xl')
|
| 134 |
sent_pipe = pipeline("text-classification",model=q_model, tokenizer=q_tokenizer)
|
| 135 |
+
sum_pipe = pipeline("summarization",model="philschmid/flan-t5-base-samsum",clean_up_tokenization_spaces=True)
|
| 136 |
ner_pipe = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, grouped_entities=True)
|
| 137 |
cross_encoder = CrossEncoder('cross-encoder/mmarco-mMiniLMv2-L12-H384-v1') #cross-encoder/ms-marco-MiniLM-L-12-v2
|
| 138 |
sbert = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
|
| 366 |
def summarize_text(text_to_summarize,max_len,min_len):
|
| 367 |
'''Summarize text with HF model'''
|
| 368 |
|
| 369 |
+
summarized_text = sum_pipe(text_to_summarize,
|
| 370 |
+
max_length=max_len,
|
| 371 |
+
min_length=min_len,
|
| 372 |
+
do_sample=False,
|
| 373 |
+
early_stopping=True,
|
| 374 |
+
num_beams=4)
|
| 375 |
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
| 376 |
|
| 377 |
return summarized_text
|