Spaces:

nickmuchi
/

Earnings-Call-Analysis-Whisperer

Running

nickmuchi commited on May 13, 2023

Commit

1c1d7c0

1 Parent(s): e6fd33c

Update functions.py

Files changed (1) hide show

functions.py CHANGED Viewed

@@ -329,6 +329,8 @@ def chunk_and_preprocess_text(text, model_name= 'philschmid/flan-t5-base-samsum'
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     sentences = sent_tokenize(text)
     # initialize
     length = 0
@@ -340,9 +342,9 @@ def chunk_and_preprocess_text(text, model_name= 'philschmid/flan-t5-base-samsum'
         count += 1
         combined_length = len(tokenizer.tokenize(sentence)) + length # add the no. of sentence tokens to the length counter
-    if combined_length  <= tokenizer.max_len_single_sentence: # if it doesn't exceed
-        chunk += sentence + " " # add the sentence to the chunk
-        length = combined_length # update the length counter
       # if it is the last sentence
         if count == len(sentences) - 1:

     tokenizer = AutoTokenizer.from_pretrained(model_name)
     sentences = sent_tokenize(text)
+    print("sentences: {sentences}")
     # initialize
     length = 0
         count += 1
         combined_length = len(tokenizer.tokenize(sentence)) + length # add the no. of sentence tokens to the length counter
+        if combined_length  <= tokenizer.max_len_single_sentence: # if it doesn't exceed
+            chunk += sentence + " " # add the sentence to the chunk
+            length = combined_length # update the length counter
       # if it is the last sentence
         if count == len(sentences) - 1: