Spaces:
Paused
Paused
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -22,22 +22,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
| 22 |
import datasets
|
| 23 |
from datasets import load_dataset
|
| 24 |
|
| 25 |
-
def reset_state():
|
| 26 |
-
return [], [], "Reset Done"
|
| 27 |
-
|
| 28 |
-
def reset_textbox():
|
| 29 |
-
return gr.update(value=""),""
|
| 30 |
-
|
| 31 |
-
def cancel_outputing():
|
| 32 |
-
return "Stop Done"
|
| 33 |
-
|
| 34 |
-
def transfer_input(inputs):
|
| 35 |
-
textbox = reset_textbox()
|
| 36 |
-
return (
|
| 37 |
-
inputs,
|
| 38 |
-
gr.update(value=""),
|
| 39 |
-
gr.Button.update(visible=True),
|
| 40 |
-
)
|
| 41 |
|
| 42 |
def is_stop_word_or_prefix(s: str, stop_words: list) -> bool:
|
| 43 |
for stop_word in stop_words:
|
|
@@ -206,7 +190,34 @@ def predict(text,
|
|
| 206 |
yield a,b,"Generate: Success"
|
| 207 |
except:
|
| 208 |
pass
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
def convert_to_markdown(text):
|
| 212 |
text = text.replace("$","$")
|
|
|
|
| 22 |
import datasets
|
| 23 |
from datasets import load_dataset
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
def is_stop_word_or_prefix(s: str, stop_words: list) -> bool:
|
| 27 |
for stop_word in stop_words:
|
|
|
|
| 190 |
yield a,b,"Generate: Success"
|
| 191 |
except:
|
| 192 |
pass
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
#Funktion, die den gegebenen Text aus dem Datenset gruppiert
|
| 196 |
+
def group_texts(examples):
|
| 197 |
+
# Concatenate all texts.
|
| 198 |
+
concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
|
| 199 |
+
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
| 200 |
+
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
| 201 |
+
# customize this part to your needs.
|
| 202 |
+
total_length = (total_length // block_size) * block_size
|
| 203 |
+
# Split by chunks of max_len.
|
| 204 |
+
result = {
|
| 205 |
+
k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
|
| 206 |
+
for k, t in concatenated_examples.items()
|
| 207 |
+
}
|
| 208 |
+
result["labels"] = result["input_ids"].copy()
|
| 209 |
+
return result
|
| 210 |
+
|
| 211 |
+
#Funktion, die der trainer braucht, um das Training zu evaluieren - mit einer Metrik
|
| 212 |
+
def compute_metrics(eval_pred):
|
| 213 |
+
#Metrik berechnen, um das training messen zu können - wird es besser???
|
| 214 |
+
metric = evaluate.load("accuracy") #3 Arten von gegebener Metrik: f1 oder roc_auc oder accuracy
|
| 215 |
+
logits, labels = eval_pred
|
| 216 |
+
predictions = np.argmax(logits, axis=-1)
|
| 217 |
+
#Call compute on metric to calculate the accuracy of your predictions.
|
| 218 |
+
#Before passing your predictions to compute, you need to convert the predictions to logits (remember all Transformers models return logits):
|
| 219 |
+
return metric.compute(predictions=predictions, references=labels)
|
| 220 |
+
|
| 221 |
|
| 222 |
def convert_to_markdown(text):
|
| 223 |
text = text.replace("$","$")
|