agr505 commited on
Commit
c734efa
·
verified ·
1 Parent(s): 47916ed

Training in progress, step 1650, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1128faf5e4f88e09f035caa83996e406d072d8727ad0f66f62a5acb34604ae9
3
  size 516567560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:270eea5abc85b30c5b391ee943200b572ee6369f4bdcf1baf2ea7257ec5ee51a
3
  size 516567560
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3ea2143d213566e75a8dee50e22b9bad965083cb2af7968e50389d7f34152b1
3
  size 1033289547
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffad7d3de12e2fddbb9835651d0e1268205875bf3c3d51aa6abe70df83c6268f
3
  size 1033289547
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a4e2d2649c871f0f92d612c20c48748bb85a6235bdb14748f5eb6eb22a94f0f
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b87c9b5dbfbe5f2edd77d557ec08621b8fbc8489a95b736859b5639a0921c1bf
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.03936911001205679,
6
  "eval_steps": 500,
7
- "global_step": 1600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -232,6 +232,13 @@
232
  "learning_rate": 4.9891791269792073e-05,
233
  "loss": 0.0697,
234
  "step": 1600
 
 
 
 
 
 
 
235
  }
236
  ],
237
  "logging_steps": 50,
@@ -251,7 +258,7 @@
251
  "attributes": {}
252
  }
253
  },
254
- "total_flos": 769805801994240.0,
255
  "train_batch_size": 4,
256
  "trial_name": null,
257
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.040599394699933566,
6
  "eval_steps": 500,
7
+ "global_step": 1650,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
232
  "learning_rate": 4.9891791269792073e-05,
233
  "loss": 0.0697,
234
  "step": 1600
235
+ },
236
+ {
237
+ "epoch": 0.040599394699933566,
238
+ "grad_norm": 0.6289940476417542,
239
+ "learning_rate": 4.988253022856661e-05,
240
+ "loss": 0.0729,
241
+ "step": 1650
242
  }
243
  ],
244
  "logging_steps": 50,
 
258
  "attributes": {}
259
  }
260
  },
261
+ "total_flos": 792849378816000.0,
262
  "train_batch_size": 4,
263
  "trial_name": null,
264
  "trial_params": null