do_padding: true pad_value: -2 do_mlm: true do_binning: true mlm_probability: 0.5 mask_value: -1 max_length: 2048 sampling: true data_style: both num_bins: 51 right_binning: false use_junk_tokens: false use_chem_token: true drug_to_id_path: remote: s3://vevo-ml-datasets/mosaicfm_v2/datasets/drug_to_id_pad.json local: drug_to_id_pad.json keep_first_n_tokens: 2 pad_token_id: 0