| { | |
| "original_model": "/home1/deqingfu/nim_llm/config/llama_3_2_tiny.json", | |
| "model_type": "llama", | |
| "saved_at": "/home1/deqingfu/nim_llm", | |
| "config": { | |
| "model": "/home1/deqingfu/nim_llm/config/llama_3_2_tiny.json", | |
| "num_layers": null, | |
| "random_init": false, | |
| "min_piles": 3, | |
| "initial_piles": 10, | |
| "target_piles": 20, | |
| "max_stones": 32, | |
| "samples_per_pile": 100000, | |
| "eval_samples": 200, | |
| "method": "ppo", | |
| "skip_sft": false, | |
| "pretrain_only": true, | |
| "sft_only": false, | |
| "rl_only": false, | |
| "epochs": 3, | |
| "batch_size": 16, | |
| "learning_rate": 5e-05, | |
| "refresh_data_per_epoch": false, | |
| "train_assistant_header": false, | |
| "curriculum_learning": true, | |
| "curriculum_type": "cumulative", | |
| "dpo_beta": 0.1, | |
| "dpo_batch_size": 8, | |
| "dpo_learning_rate": 5e-06, | |
| "dpo_samples": 1000, | |
| "ppo_learning_rate": 1e-06, | |
| "ppo_batch_size": 8, | |
| "ppo_samples": 1000, | |
| "grpo_learning_rate": 1e-06, | |
| "grpo_group_size": 4, | |
| "grpo_samples": 1000, | |
| "output_dir": "/scratch1/deqingfu/nim/pretrain_only", | |
| "log_level": "INFO", | |
| "use_wandb": true, | |
| "wandb_project": "nim-llm", | |
| "hf_push": true, | |
| "hf_repo": "deqing/nim_llm_pretrained" | |
| } | |
| } |