code_repo_finetuning / config /default_config.yaml
tensense's picture
Upload folder using huggingface_hub
4e909c7 verified
raw
history blame
1.52 kB
data_generation:
design_proposals:
count: 100
requirement_types:
- 新功能开发
- 性能优化
- 架构重构
- API设计
- 错误处理
qa_pairs:
count: 500
diversity_threshold: 0.7
max_code_lines: 40
min_code_lines: 5
dataset:
format: jsonl
output_dir: ./data/training_data
test_split: 0.1
train_split: 0.8
val_split: 0.1
evaluation:
metrics:
- rouge
- bleu
- exact_match
sample_size: 50
gpu:
devices:
- 0
- 1
memory_per_gpu: 48
llm_api:
batch_size: 4
max_workers: 2
model: Qwen/Qwen3-8B
provider: local
model:
base_model: Qwen/Qwen3-8B
enable_thinking: true
max_length: 2048
temperature: 0.7
thinking_budget: 4096
top_p: 0.9
project:
name: code_repo_training_data_generator
version: 1.0.0
repository:
exclude_dirs:
- .git
- __pycache__
- node_modules
- .venv
- venv
- build
- dist
languages:
- python
- markdown
local_path: ./repos/Laddr
url: https://github.com/AgnetLabs/Laddr
training:
batch_size: 2
bf16: true
deepspeed_config: ./deepspeed_config_optimized.json
eval_steps: 100
gradient_accumulation_steps: 8
learning_rate: 1e-3
logging_steps: 10
lora:
alpha: 128
bias: none
dropout: 0.05
r: 64
target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
- gate_proj
- up_proj
- down_proj
max_grad_norm: 1.0
num_epochs: 3
output_dir: ./output/finetuned_model
save_steps: 100
warmup_ratio: 0.05
weight_decay: 0.01