{ "output_dir": "hindi_tokenizer_sota", "vocab_size": 50000, "model_type": "unigram", "character_coverage": 0.9995, "num_iterations": 2000000, "max_examples": 300000, "sample_factor": 1.0, "create_hf_config": true, "debug": false, "seed": 42 }