Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,31 +21,7 @@ def tokenize_function(examples):
|
|
| 21 |
return tokenizer(examples["text"])
|
| 22 |
|
| 23 |
|
| 24 |
-
|
| 25 |
-
def group_texts(examples):
|
| 26 |
-
# Concatenate all texts.
|
| 27 |
-
concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
|
| 28 |
-
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
| 29 |
-
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
| 30 |
-
# customize this part to your needs.
|
| 31 |
-
total_length = (total_length // block_size) * block_size
|
| 32 |
-
# Split by chunks of max_len.
|
| 33 |
-
result = {
|
| 34 |
-
k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
|
| 35 |
-
for k, t in concatenated_examples.items()
|
| 36 |
-
}
|
| 37 |
-
result["labels"] = result["input_ids"].copy()
|
| 38 |
-
return result
|
| 39 |
-
|
| 40 |
-
#Funktion, die der trainer braucht, um das Training zu evaluieren - mit einer Metrik
|
| 41 |
-
def compute_metrics(eval_pred):
|
| 42 |
-
#Metrik berechnen, um das training messen zu können - wird es besser???
|
| 43 |
-
metric = evaluate.load("accuracy") #3 Arten von gegebener Metrik: f1 oder roc_auc oder accuracy
|
| 44 |
-
logits, labels = eval_pred
|
| 45 |
-
predictions = np.argmax(logits, axis=-1)
|
| 46 |
-
#Call compute on metric to calculate the accuracy of your predictions.
|
| 47 |
-
#Before passing your predictions to compute, you need to convert the predictions to logits (remember all Transformers models return logits):
|
| 48 |
-
return metric.compute(predictions=predictions, references=labels)
|
| 49 |
|
| 50 |
#neues Model testen nach dem Training
|
| 51 |
########################################################################
|
|
|
|
| 21 |
return tokenizer(examples["text"])
|
| 22 |
|
| 23 |
|
| 24 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
#neues Model testen nach dem Training
|
| 27 |
########################################################################
|