{ "original_model": "/home1/deqingfu/nim_llm/config/llama_3_2_tiny.json", "model_type": "llama", "saved_at": "/home1/deqingfu/nim_llm", "config": { "model": "/home1/deqingfu/nim_llm/config/llama_3_2_tiny.json", "num_layers": null, "random_init": false, "min_piles": 3, "initial_piles": 10, "target_piles": 20, "max_stones": 32, "samples_per_pile": 100000, "eval_samples": 200, "method": "ppo", "skip_sft": false, "pretrain_only": true, "sft_only": false, "rl_only": false, "epochs": 3, "batch_size": 16, "learning_rate": 5e-05, "refresh_data_per_epoch": false, "train_assistant_header": false, "curriculum_learning": true, "curriculum_type": "cumulative", "dpo_beta": 0.1, "dpo_batch_size": 8, "dpo_learning_rate": 5e-06, "dpo_samples": 1000, "ppo_learning_rate": 1e-06, "ppo_batch_size": 8, "ppo_samples": 1000, "grpo_learning_rate": 1e-06, "grpo_group_size": 4, "grpo_samples": 1000, "output_dir": "/scratch1/deqingfu/nim/pretrain_only", "log_level": "INFO", "use_wandb": true, "wandb_project": "nim-llm", "hf_push": true, "hf_repo": "deqing/nim_llm_pretrained" } }