alexkueck commited on
Commit
9dff459
·
1 Parent(s): d60bd16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -8,7 +8,7 @@ import gradio as gr
8
  import torch
9
  from utils import *
10
  from presets import *
11
- from transformers import Trainer, TrainingArguments, DataCollatorWithPadding
12
  import numpy as np
13
  import evaluate
14
  import pandas as pd
@@ -197,7 +197,8 @@ lm_datasets = tokenized_datasets.map(
197
  )
198
 
199
  # Batches von Daten zusammenfassen
200
- data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
 
201
 
202
 
203
  print ("###############lm datasets####################")
 
8
  import torch
9
  from utils import *
10
  from presets import *
11
+ from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling
12
  import numpy as np
13
  import evaluate
14
  import pandas as pd
 
197
  )
198
 
199
  # Batches von Daten zusammenfassen
200
+ tokenizer.pad_token = tokenizer.eos_token
201
+ data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
202
 
203
 
204
  print ("###############lm datasets####################")