| { | |
| "query_token_id": "[unused0]", | |
| "doc_token_id": "[unused1]", | |
| "query_token": "[Q]", | |
| "doc_token": "[D]", | |
| "ncells": null, | |
| "centroid_score_threshold": null, | |
| "ndocs": null, | |
| "index_path": null, | |
| "nbits": 1, | |
| "kmeans_niters": 4, | |
| "resume": false, | |
| "max_sampled_pid": -1, | |
| "max_num_partitions": -1, | |
| "use_lagacy_build_ivf": false, | |
| "reuse_centroids_from": null, | |
| "similarity": "cosine", | |
| "bsize": 2, | |
| "accumsteps": 1, | |
| "lr": 5e-6, | |
| "maxsteps": 400000, | |
| "save_every": null, | |
| "resume_optimizer": false, | |
| "fix_broken_optimizer_state": false, | |
| "warmup": null, | |
| "warmup_bert": null, | |
| "relu": false, | |
| "nway": 6, | |
| "n_query_alternative": 1, | |
| "use_ib_negatives": false, | |
| "kd_loss": "KLD", | |
| "reranker": false, | |
| "distillation_alpha": 1.0, | |
| "ignore_scores": false, | |
| "model_name": "xlm-roberta-large", | |
| "force_resize_embeddings": true, | |
| "shuffle_passages": true, | |
| "sampling_max_beta": 1.0, | |
| "over_one_epoch": true, | |
| "multilang": true, | |
| "nolangreg": true, | |
| "query_maxlen": 32, | |
| "attend_to_mask_tokens": false, | |
| "interaction": "colbert", | |
| "dim": 128, | |
| "doc_maxlen": 220, | |
| "mask_punctuation": true, | |
| "checkpoint": "xlm-roberta-large", | |
| "triples": "\/expscratch\/eyang\/workspace\/plaid-aux\/training_triples\/msmarco-passages\/triples_mt5xxl-monot5-mmarco-engeng.jsonl", | |
| "collection": "Combination(all)[irds:neumarco\/zh\/train:docs+irds:neumarco\/fa\/train:docs+irds:neumarco\/ru\/train:docs]", | |
| "queries": "irds:msmarco-passage\/train:queries", | |
| "index_name": null, | |
| "debug": false, | |
| "overwrite": false, | |
| "root": "\/expscratch\/eyang\/workspace\/plaid-aux\/experiments", | |
| "experiment": "mtt-tdistill", | |
| "index_root": null, | |
| "name": "multi.allentriesnoreg-KLD-shuf-5e-6\/mt5xxl-monot5-mmarco-engeng\/16bat.6way", | |
| "rank": 0, | |
| "nranks": 8, | |
| "amp": true, | |
| "ivf_num_processes": 20, | |
| "ivf_use_tempdir": false, | |
| "ivf_merging_ways": 2, | |
| "gpus": 8, | |
| "meta": { | |
| "hostname": "r5n03", | |
| "git_branch": "eugene-training", | |
| "git_hash": "d4f2493b700ceeea4592ffaf34d73dcd5c7926ba", | |
| "git_commit_datetime": "2023-11-22 22:38:49-05:00", | |
| "current_datetime": "Nov 23, 2023 ; 4:28PM EST (-0500)", | |
| "cmd": "train.py --model_name xlm-roberta-large --training_triples \/expscratch\/eyang\/workspace\/plaid-aux\/training_triples\/msmarco-passages\/triples_mt5xxl-monot5-mmarco-engeng.jsonl --training_queries msmarco-passage\/train --training_collection neumarco\/zh\/train neumarco\/fa\/train neumarco\/ru\/train --training_collection_mixing all --other_args nolangreg=True --maxsteps 400000 --learning_rate 5e-6 --kd_loss KLD --per_device_batch_size 2 --nway 6 --run_tag multi.allentriesnoreg-KLD-shuf-5e-6\/mt5xxl-monot5-mmarco-engeng --experiment mtt-tdistill", | |
| "version": "colbert-v0.4" | |
| } | |
| } | |