Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,7 +8,7 @@ import gradio as gr
|
|
| 8 |
import torch
|
| 9 |
from utils import *
|
| 10 |
from presets import *
|
| 11 |
-
from transformers import Trainer, TrainingArguments,
|
| 12 |
import numpy as np
|
| 13 |
import evaluate
|
| 14 |
import pandas as pd
|
|
@@ -197,7 +197,8 @@ lm_datasets = tokenized_datasets.map(
|
|
| 197 |
)
|
| 198 |
|
| 199 |
# Batches von Daten zusammenfassen
|
| 200 |
-
|
|
|
|
| 201 |
|
| 202 |
|
| 203 |
print ("###############lm datasets####################")
|
|
|
|
| 8 |
import torch
|
| 9 |
from utils import *
|
| 10 |
from presets import *
|
| 11 |
+
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling
|
| 12 |
import numpy as np
|
| 13 |
import evaluate
|
| 14 |
import pandas as pd
|
|
|
|
| 197 |
)
|
| 198 |
|
| 199 |
# Batches von Daten zusammenfassen
|
| 200 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 201 |
+
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
|
| 202 |
|
| 203 |
|
| 204 |
print ("###############lm datasets####################")
|