Generalized
Browse files- geneformer/tokenizer.py +1 -4
geneformer/tokenizer.py
CHANGED
|
@@ -288,10 +288,7 @@ class TranscriptomeTokenizer:
|
|
| 288 |
# create dataset
|
| 289 |
def dict_generator():
|
| 290 |
for i in range(len(tokenized_cells)):
|
| 291 |
-
yield {
|
| 292 |
-
'input_ids': dataset_dict['input_ids'][i],
|
| 293 |
-
'cell_type': dataset_dict['cell_type'][i]
|
| 294 |
-
}
|
| 295 |
output_dataset = Dataset.from_generator(dict_generator, num_proc=self.nproc)
|
| 296 |
|
| 297 |
# truncate dataset
|
|
|
|
| 288 |
# create dataset
|
| 289 |
def dict_generator():
|
| 290 |
for i in range(len(tokenized_cells)):
|
| 291 |
+
yield {k: dataset_dict[k][i] for k in dataset_dict.keys()}
|
|
|
|
|
|
|
|
|
|
| 292 |
output_dataset = Dataset.from_generator(dict_generator, num_proc=self.nproc)
|
| 293 |
|
| 294 |
# truncate dataset
|