Training in progress, step 500

be9e137 verified 16 days ago

5.9 kB

	_n_gpu: 4
	accelerator_config:
	dispatch_batches: null
	even_batches: true
	gradient_accumulation_kwargs: null
	non_blocking: false
	split_batches: false
	use_configured_state: false
	use_seedable_sampler: true
	activation_offloading: false
	adafactor: false
	adam_beta1: 0.9
	adam_beta2: 0.999
	adam_epsilon: 1.0e-08
	assistant_only_loss: false
	auto_find_batch_size: false
	auto_infer_class_weights: false
	average_tokens_across_devices: true
	batch_eval_metrics: false
	bf16: true
	bf16_full_eval: false
	chat_template_path: null
	chosen_column: null
	class_weights: null
	completion_only_loss: null
	data_dir: /project2/jonmay_1426/ashokd/RAU/llm-utils/data
	data_seed: null
	dataloader_drop_last: false
	dataloader_num_workers: 0
	dataloader_persistent_workers: false
	dataloader_pin_memory: true
	dataloader_prefetch_factor: null
	dataset_kwargs: null
	dataset_num_proc: null
	dataset_text_field: text
	ddp_backend: null
	ddp_broadcast_buffers: null
	ddp_bucket_cap_mb: null
	ddp_find_unused_parameters: null
	ddp_timeout: 1800
	debug: []
	deepspeed: null
	disable_tqdm: false
	do_eval: true
	do_lora: true
	do_predict: false
	do_train: false
	early_stopping_patience: null
	early_stopping_threshold: 0.0
	env_dir: /project2/jonmay_1426/ashokd/llm-utils/env/.venv/
	eos_token: null
	eval_accumulation_steps: null
	eval_delay: 0
	eval_do_concat_batches: true
	eval_max_new_tokens: 512
	eval_on_start: false
	eval_packing: null
	eval_steps: 500
	eval_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
	- steps
	eval_use_gather_object: false
	evaluate_before_training: false
	figure_dir: results/figures
	figure_force_save: true
	fp16: false
	fp16_backend: auto
	fp16_full_eval: false
	fp16_opt_level: O1
	fsdp: []
	fsdp_config:
	min_num_params: 0
	xla: false
	xla_fsdp_grad_ckpt: false
	xla_fsdp_v2: false
	fsdp_min_num_params: 0
	fsdp_transformer_layer_cls_to_wrap: null
	full_determinism: false
	ga_forget_column: forget
	gradient_accumulation_steps: 1
	gradient_checkpointing: true
	gradient_checkpointing_kwargs: null
	greater_is_better: null
	group_by_length: false
	half_precision_backend: auto
	hub_always_push: false
	hub_model_id: Llama-3.1-8B-Instruct_rt_forget-full
	hub_private_repo: null
	hub_revision: null
	hub_strategy: !!python/object/apply:transformers.trainer_utils.HubStrategy
	- every_save
	hub_token: null
	huggingface_hub_username: Anonymous
	ignore_data_skip: false
	image_input_column: image
	include_for_metrics: []
	include_inputs_for_metrics: false
	include_num_input_tokens_seen: 'no'
	include_tokens_per_second: false
	input_column: input
	jit_mode_eval: false
	label_names: null
	label_smoothing_factor: 0.0
	learning_rate: 0.0002
	length_column_name: length
	liger_kernel_config: null
	load_best_model_at_end: false
	local_rank: 0
	log_dir: results/logs
	log_file: results/logs/log.txt
	log_level: passive
	log_level_replica: warning
	log_on_each_node: true
	log_verbose: false
	logger: !!python/object/apply:logging.getLogger
	- LLM-Utils
	logging_dir: /project2/jonmay_1426/ashokd/RAU/models/rwku/Llama-3.1-8B-Instruct_rt_forget-full/runs/Nov10_14-01-23_a11-01.hpc.usc.edu
	logging_first_step: false
	logging_nan_inf_filter: true
	logging_steps: 250
	logging_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy
	- steps
	lora_alpha: 16
	lora_dropout: 0.05
	lora_r: 8
	lora_target_modules: null
	loss_type: nll
	lr_scheduler_kwargs: {}
	lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType
	- cosine
	max_grad_norm: 1.0
	max_input_length: 512
	max_length: 1024
	max_steps: -1
	max_test_samples: null
	max_train_samples: null
	max_valid_samples: null
	metric_for_best_model: null
	modality: lm
	model_dir: /project2/jonmay_1426/ashokd/RAU/llm-utils/models
	model_dtype: float16
	model_init_kwargs: null
	model_name: meta-llama/Llama-3.1-8B-Instruct
	mp_parameters: ''
	n_eval_output_batches: 1
	neftune_noise_alpha: null
	no_cuda: false
	num_train_epochs: 1.0
	num_workers: 4
	openai_tmp_dir: ./openai_tmp_files
	optim: !!python/object/apply:transformers.training_args.OptimizerNames
	- adamw_torch
	optim_args: null
	optim_target_modules: null
	output_column: chosen
	output_dir: /project2/jonmay_1426/ashokd/RAU/models/rwku/Llama-3.1-8B-Instruct_rt_forget-full
	overwrite_output_dir: false
	packing: false
	packing_strategy: bfd
	pad_to_multiple_of: null
	pad_token: null
	padding_free: false
	parallelism_config: null
	past_index: -1
	per_device_eval_batch_size: 4
	per_device_train_batch_size: 4
	per_gpu_eval_batch_size: null
	per_gpu_train_batch_size: null
	prediction_loss_only: false
	pretrain_with_output: false
	project: huggingface
	project_root: /home1/ashokd/projects/RAU/llm-utils
	push_to_hub: true
	push_to_hub_model_id: null
	push_to_hub_organization: null
	push_to_hub_token: null
	random_seed: null
	ray_scope: last
	rejected_column: null
	remove_unused_columns: true
	report_to: wandb
	restore_callback_states_from_checkpoint: false
	results_dir: results
	resume_from_checkpoint: false
	run_name: Llama-3.1-8B-Instruct_rt_forget-full
	run_start_time: 2025-11-10-H22-M01-S22
	save_on_each_node: false
	save_only_model: false
	save_safetensors: true
	save_steps: 500
	save_strategy: !!python/object/apply:transformers.trainer_utils.SaveStrategy
	- steps
	save_total_limit: 2
	seed: 42
	shuffle_buffer: 5000
	skip_memory_metrics: true
	storage_dir: /project2/jonmay_1426/ashokd/RAU/llm-utils
	streaming: true
	test_file: null
	tf32: null
	tmp_dir: /project2/jonmay_1426/ashokd/RAU/llm-utils/tmp
	torch_compile: false
	torch_compile_backend: null
	torch_compile_mode: null
	torch_empty_cache_steps: null
	torchdynamo: null
	tpu_metrics_debug: false
	tpu_num_cores: null
	trackio_space_id: trackio
	train_file: /project2/jonmay_1426/ashokd/RAU/data/rwku/unlearn/forget_training.csv
	train_validation_split: 0.9
	training_kind: sft
	use_bnb: false
	use_cpu: false
	use_legacy_prediction_loop: false
	use_liger_kernel: false
	use_mps_device: false
	use_peft: true
	validation_file: null
	validation_test_split: null
	warmup_ratio: 0.1
	warmup_steps: 0
	weight_decay: 0.01