{"loss": 1.45158839, "grad_norm": 1.3470062, "learning_rate": 2.2e-07, "token_acc": 0.66037736, "epoch": 0.00112486, "global_step/max_steps": "1/8890", "percentage": "0.01%", "elapsed_time": "2s", "remaining_time": "6h 21m 54s", "memory(GiB)": 18.51, "train_speed(iter/s)": 0.387921} {"loss": 1.72301364, "grad_norm": 1.07448745, "learning_rate": 4.5e-07, "token_acc": 0.58580508, "epoch": 0.00224972, "global_step/max_steps": "2/8890", "percentage": "0.02%", "elapsed_time": "3s", "remaining_time": "4h 38m 59s", "memory(GiB)": 20.13, "train_speed(iter/s)": 0.530954} {"loss": 1.58599162, "grad_norm": 1.28290844, "learning_rate": 6.7e-07, "token_acc": 0.61802575, "epoch": 0.00337458, "global_step/max_steps": "3/8890", "percentage": "0.03%", "elapsed_time": "4s", "remaining_time": "4h 2m 41s", "memory(GiB)": 20.13, "train_speed(iter/s)": 0.6103} {"loss": 1.49142611, "grad_norm": 1.06370139, "learning_rate": 9e-07, "token_acc": 0.63291139, "epoch": 0.00449944, "global_step/max_steps": "4/8890", "percentage": "0.04%", "elapsed_time": "5s", "remaining_time": "3h 37m 47s", "memory(GiB)": 20.13, "train_speed(iter/s)": 0.680022} {"loss": 1.21051407, "grad_norm": 1.0405091, "learning_rate": 1.12e-06, "token_acc": 0.69294118, "epoch": 0.0056243, "global_step/max_steps": "5/8890", "percentage": "0.06%", "elapsed_time": "7s", "remaining_time": "3h 27m 58s", "memory(GiB)": 20.16, "train_speed(iter/s)": 0.712027} {"loss": 1.31009984, "grad_norm": 1.1803242, "learning_rate": 1.35e-06, "token_acc": 0.67270375, "epoch": 0.00674916, "global_step/max_steps": "6/8890", "percentage": "0.07%", "elapsed_time": "8s", "remaining_time": "3h 18m 59s", "memory(GiB)": 20.16, "train_speed(iter/s)": 0.744064} {"loss": 1.44096935, "grad_norm": 1.14562976, "learning_rate": 1.57e-06, "token_acc": 0.64665912, "epoch": 0.00787402, "global_step/max_steps": "7/8890", "percentage": "0.08%", "elapsed_time": "9s", "remaining_time": "3h 15m 24s", "memory(GiB)": 20.16, "train_speed(iter/s)": 0.757616} {"loss": 1.46833873, "grad_norm": 1.32652652, "learning_rate": 1.8e-06, "token_acc": 0.67064083, "epoch": 0.00899888, "global_step/max_steps": "8/8890", "percentage": "0.09%", "elapsed_time": "10s", "remaining_time": "3h 12m 7s", "memory(GiB)": 20.16, "train_speed(iter/s)": 0.770515} {"loss": 1.3861146, "grad_norm": 1.15362608, "learning_rate": 2.02e-06, "token_acc": 0.66004963, "epoch": 0.01012373, "global_step/max_steps": "9/8890", "percentage": "0.10%", "elapsed_time": "11s", "remaining_time": "3h 7m 57s", "memory(GiB)": 20.16, "train_speed(iter/s)": 0.787485} {"loss": 1.45334172, "grad_norm": 1.04039979, "learning_rate": 2.25e-06, "token_acc": 0.63361662, "epoch": 0.01124859, "global_step/max_steps": "10/8890", "percentage": "0.11%", "elapsed_time": "12s", "remaining_time": "3h 5m 56s", "memory(GiB)": 20.16, "train_speed(iter/s)": 0.795943} {"loss": 1.85912061, "grad_norm": 1.21472192, "learning_rate": 2.47e-06, "token_acc": 0.57192575, "epoch": 0.01237345, "global_step/max_steps": "11/8890", "percentage": "0.12%", "elapsed_time": "13s", "remaining_time": "3h 4m 29s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.802088} {"loss": 1.4078933, "grad_norm": 1.30844748, "learning_rate": 2.7e-06, "token_acc": 0.63544669, "epoch": 0.01349831, "global_step/max_steps": "12/8890", "percentage": "0.13%", "elapsed_time": "14s", "remaining_time": "3h 1m 52s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.81353} {"loss": 1.73233652, "grad_norm": 1.01392555, "learning_rate": 2.92e-06, "token_acc": 0.58123108, "epoch": 0.01462317, "global_step/max_steps": "13/8890", "percentage": "0.15%", "elapsed_time": "15s", "remaining_time": "3h 0m 57s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.817588} {"loss": 1.85604298, "grad_norm": 1.09627438, "learning_rate": 3.15e-06, "token_acc": 0.56615385, "epoch": 0.01574803, "global_step/max_steps": "14/8890", "percentage": "0.16%", "elapsed_time": "16s", "remaining_time": "2h 58m 5s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.830628} {"loss": 1.8151319, "grad_norm": 1.21413291, "learning_rate": 3.37e-06, "token_acc": 0.56620429, "epoch": 0.01687289, "global_step/max_steps": "15/8890", "percentage": "0.17%", "elapsed_time": "17s", "remaining_time": "2h 56m 30s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.838019} {"loss": 1.79180348, "grad_norm": 1.03494871, "learning_rate": 3.6e-06, "token_acc": 0.57781753, "epoch": 0.01799775, "global_step/max_steps": "16/8890", "percentage": "0.18%", "elapsed_time": "18s", "remaining_time": "2h 52m 48s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.855827} {"loss": 1.59681177, "grad_norm": 1.20576119, "learning_rate": 3.82e-06, "token_acc": 0.62705436, "epoch": 0.01912261, "global_step/max_steps": "17/8890", "percentage": "0.19%", "elapsed_time": "19s", "remaining_time": "2h 52m 36s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.856726} {"loss": 1.46578455, "grad_norm": 1.40065551, "learning_rate": 4.04e-06, "token_acc": 0.63622754, "epoch": 0.02024747, "global_step/max_steps": "18/8890", "percentage": "0.20%", "elapsed_time": "20s", "remaining_time": "2h 52m 29s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.857274} {"loss": 1.42401028, "grad_norm": 0.96123779, "learning_rate": 4.27e-06, "token_acc": 0.64540338, "epoch": 0.02137233, "global_step/max_steps": "19/8890", "percentage": "0.21%", "elapsed_time": "22s", "remaining_time": "2h 52m 37s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.856472} {"loss": 1.64584661, "grad_norm": 1.35670543, "learning_rate": 4.49e-06, "token_acc": 0.58104738, "epoch": 0.02249719, "global_step/max_steps": "20/8890", "percentage": "0.22%", "elapsed_time": "23s", "remaining_time": "2h 51m 44s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.86083} {"loss": 1.63032436, "grad_norm": 1.2446835, "learning_rate": 4.72e-06, "token_acc": 0.5908046, "epoch": 0.02362205, "global_step/max_steps": "21/8890", "percentage": "0.24%", "elapsed_time": "24s", "remaining_time": "2h 50m 46s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.865522} {"loss": 1.51997566, "grad_norm": 1.08684242, "learning_rate": 4.94e-06, "token_acc": 0.61756098, "epoch": 0.02474691, "global_step/max_steps": "22/8890", "percentage": "0.25%", "elapsed_time": "25s", "remaining_time": "2h 50m 34s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.86644} {"loss": 1.52184033, "grad_norm": 1.20151687, "learning_rate": 5.17e-06, "token_acc": 0.6300813, "epoch": 0.02587177, "global_step/max_steps": "23/8890", "percentage": "0.26%", "elapsed_time": "26s", "remaining_time": "2h 49m 45s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.870579} {"loss": 1.72524989, "grad_norm": 1.24224663, "learning_rate": 5.39e-06, "token_acc": 0.59411765, "epoch": 0.02699663, "global_step/max_steps": "24/8890", "percentage": "0.27%", "elapsed_time": "27s", "remaining_time": "2h 49m 40s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.87084} {"loss": 1.36845577, "grad_norm": 1.04788768, "learning_rate": 5.62e-06, "token_acc": 0.64930556, "epoch": 0.02812148, "global_step/max_steps": "25/8890", "percentage": "0.28%", "elapsed_time": "28s", "remaining_time": "2h 48m 31s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.876698} {"loss": 1.70304203, "grad_norm": 1.2565279, "learning_rate": 5.84e-06, "token_acc": 0.57126437, "epoch": 0.02924634, "global_step/max_steps": "26/8890", "percentage": "0.29%", "elapsed_time": "29s", "remaining_time": "2h 48m 29s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.876782} {"loss": 1.51013136, "grad_norm": 1.22672403, "learning_rate": 6.07e-06, "token_acc": 0.60768335, "epoch": 0.0303712, "global_step/max_steps": "27/8890", "percentage": "0.30%", "elapsed_time": "30s", "remaining_time": "2h 48m 24s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.877105} {"loss": 1.57358158, "grad_norm": 1.55711699, "learning_rate": 6.29e-06, "token_acc": 0.58780842, "epoch": 0.03149606, "global_step/max_steps": "28/8890", "percentage": "0.31%", "elapsed_time": "31s", "remaining_time": "2h 48m 19s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.877494} {"loss": 1.60772491, "grad_norm": 1.12017643, "learning_rate": 6.52e-06, "token_acc": 0.60978148, "epoch": 0.03262092, "global_step/max_steps": "29/8890", "percentage": "0.33%", "elapsed_time": "33s", "remaining_time": "2h 48m 16s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.877634} {"loss": 1.54295564, "grad_norm": 1.31441462, "learning_rate": 6.74e-06, "token_acc": 0.59418458, "epoch": 0.03374578, "global_step/max_steps": "30/8890", "percentage": "0.34%", "elapsed_time": "34s", "remaining_time": "2h 48m 15s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.877603} {"loss": 1.53144372, "grad_norm": 1.07292604, "learning_rate": 6.97e-06, "token_acc": 0.60412574, "epoch": 0.03487064, "global_step/max_steps": "31/8890", "percentage": "0.35%", "elapsed_time": "35s", "remaining_time": "2h 48m 44s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.874983} {"loss": 1.59217095, "grad_norm": 1.28910172, "learning_rate": 7.19e-06, "token_acc": 0.59459459, "epoch": 0.0359955, "global_step/max_steps": "32/8890", "percentage": "0.36%", "elapsed_time": "36s", "remaining_time": "2h 48m 46s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.874757} {"loss": 1.27850223, "grad_norm": 1.61507869, "learning_rate": 7.42e-06, "token_acc": 0.66377816, "epoch": 0.03712036, "global_step/max_steps": "33/8890", "percentage": "0.37%", "elapsed_time": "37s", "remaining_time": "2h 48m 40s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.875146} {"loss": 1.48177826, "grad_norm": 1.26469707, "learning_rate": 7.64e-06, "token_acc": 0.61607143, "epoch": 0.03824522, "global_step/max_steps": "34/8890", "percentage": "0.38%", "elapsed_time": "38s", "remaining_time": "2h 48m 23s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.876521} {"loss": 1.32373512, "grad_norm": 1.39010537, "learning_rate": 7.87e-06, "token_acc": 0.64156206, "epoch": 0.03937008, "global_step/max_steps": "35/8890", "percentage": "0.39%", "elapsed_time": "39s", "remaining_time": "2h 47m 39s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.880284} {"loss": 1.70803654, "grad_norm": 1.30288124, "learning_rate": 8.09e-06, "token_acc": 0.58693245, "epoch": 0.04049494, "global_step/max_steps": "36/8890", "percentage": "0.40%", "elapsed_time": "40s", "remaining_time": "2h 47m 37s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.880361} {"loss": 1.30690134, "grad_norm": 1.35376823, "learning_rate": 8.31e-06, "token_acc": 0.64261745, "epoch": 0.0416198, "global_step/max_steps": "37/8890", "percentage": "0.42%", "elapsed_time": "42s", "remaining_time": "2h 47m 45s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.879574} {"loss": 1.50811148, "grad_norm": 1.21635628, "learning_rate": 8.54e-06, "token_acc": 0.61775586, "epoch": 0.04274466, "global_step/max_steps": "38/8890", "percentage": "0.43%", "elapsed_time": "43s", "remaining_time": "2h 47m 58s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.878297} {"loss": 1.19471824, "grad_norm": 1.52855492, "learning_rate": 8.76e-06, "token_acc": 0.65696784, "epoch": 0.04386952, "global_step/max_steps": "39/8890", "percentage": "0.44%", "elapsed_time": "44s", "remaining_time": "2h 47m 37s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.880044} {"loss": 1.50608683, "grad_norm": 1.12458599, "learning_rate": 8.99e-06, "token_acc": 0.61894737, "epoch": 0.04499438, "global_step/max_steps": "40/8890", "percentage": "0.45%", "elapsed_time": "45s", "remaining_time": "2h 47m 33s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.880266} {"loss": 1.464167, "grad_norm": 1.23676121, "learning_rate": 9.21e-06, "token_acc": 0.62881563, "epoch": 0.04611924, "global_step/max_steps": "41/8890", "percentage": "0.46%", "elapsed_time": "46s", "remaining_time": "2h 47m 15s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.881748} {"loss": 1.15121412, "grad_norm": 1.09234822, "learning_rate": 9.44e-06, "token_acc": 0.69167804, "epoch": 0.04724409, "global_step/max_steps": "42/8890", "percentage": "0.47%", "elapsed_time": "47s", "remaining_time": "2h 47m 18s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.881444} {"loss": 1.42301965, "grad_norm": 1.01351643, "learning_rate": 9.66e-06, "token_acc": 0.62107905, "epoch": 0.04836895, "global_step/max_steps": "43/8890", "percentage": "0.48%", "elapsed_time": "48s", "remaining_time": "2h 47m 25s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.880704} {"loss": 1.30224299, "grad_norm": 1.01791513, "learning_rate": 9.89e-06, "token_acc": 0.62580645, "epoch": 0.04949381, "global_step/max_steps": "44/8890", "percentage": "0.49%", "elapsed_time": "49s", "remaining_time": "2h 47m 21s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.88096} {"loss": 1.34960473, "grad_norm": 1.06680465, "learning_rate": 1.011e-05, "token_acc": 0.6096423, "epoch": 0.05061867, "global_step/max_steps": "45/8890", "percentage": "0.51%", "elapsed_time": "51s", "remaining_time": "2h 47m 16s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.881247} {"loss": 1.34552455, "grad_norm": 0.92787421, "learning_rate": 1.034e-05, "token_acc": 0.64650059, "epoch": 0.05174353, "global_step/max_steps": "46/8890", "percentage": "0.52%", "elapsed_time": "52s", "remaining_time": "2h 46m 59s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.882649} {"loss": 1.17554212, "grad_norm": 0.87304157, "learning_rate": 1.056e-05, "token_acc": 0.67933492, "epoch": 0.05286839, "global_step/max_steps": "47/8890", "percentage": "0.53%", "elapsed_time": "53s", "remaining_time": "2h 47m 0s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.882451} {"loss": 1.42980075, "grad_norm": 0.87771857, "learning_rate": 1.079e-05, "token_acc": 0.62180974, "epoch": 0.05399325, "global_step/max_steps": "48/8890", "percentage": "0.54%", "elapsed_time": "54s", "remaining_time": "2h 47m 8s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.881698} {"loss": 1.48835492, "grad_norm": 0.76856655, "learning_rate": 1.101e-05, "token_acc": 0.62635771, "epoch": 0.05511811, "global_step/max_steps": "49/8890", "percentage": "0.55%", "elapsed_time": "55s", "remaining_time": "2h 47m 48s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.878085} {"loss": 1.27124202, "grad_norm": 0.83282173, "learning_rate": 1.124e-05, "token_acc": 0.67298106, "epoch": 0.05624297, "global_step/max_steps": "50/8890", "percentage": "0.56%", "elapsed_time": "57s", "remaining_time": "2h 48m 28s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.874511} {"loss": 1.35546565, "grad_norm": 0.86390299, "learning_rate": 1.146e-05, "token_acc": 0.65359477, "epoch": 0.05736783, "global_step/max_steps": "51/8890", "percentage": "0.57%", "elapsed_time": "58s", "remaining_time": "2h 49m 7s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.871057} {"loss": 1.58437657, "grad_norm": 0.95799476, "learning_rate": 1.169e-05, "token_acc": 0.60091743, "epoch": 0.05849269, "global_step/max_steps": "52/8890", "percentage": "0.58%", "elapsed_time": "59s", "remaining_time": "2h 48m 51s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.872343} {"loss": 1.05281043, "grad_norm": 0.83511746, "learning_rate": 1.191e-05, "token_acc": 0.71754636, "epoch": 0.05961755, "global_step/max_steps": "53/8890", "percentage": "0.60%", "elapsed_time": "1m 0s", "remaining_time": "2h 48m 30s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.874052} {"loss": 1.42461622, "grad_norm": 0.90725505, "learning_rate": 1.213e-05, "token_acc": 0.65384615, "epoch": 0.06074241, "global_step/max_steps": "54/8890", "percentage": "0.61%", "elapsed_time": "1m 1s", "remaining_time": "2h 48m 12s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.875495} {"loss": 1.33966577, "grad_norm": 0.97563028, "learning_rate": 1.236e-05, "token_acc": 0.67247008, "epoch": 0.06186727, "global_step/max_steps": "55/8890", "percentage": "0.62%", "elapsed_time": "1m 2s", "remaining_time": "2h 48m 10s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.875544} {"loss": 1.32358897, "grad_norm": 1.00732815, "learning_rate": 1.258e-05, "token_acc": 0.63309353, "epoch": 0.06299213, "global_step/max_steps": "56/8890", "percentage": "0.63%", "elapsed_time": "1m 3s", "remaining_time": "2h 47m 59s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.876394} {"loss": 1.34436095, "grad_norm": 1.06753314, "learning_rate": 1.281e-05, "token_acc": 0.6590604, "epoch": 0.06411699, "global_step/max_steps": "57/8890", "percentage": "0.64%", "elapsed_time": "1m 4s", "remaining_time": "2h 47m 38s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.878147} {"loss": 1.35749626, "grad_norm": 0.84260303, "learning_rate": 1.303e-05, "token_acc": 0.63838812, "epoch": 0.06524184, "global_step/max_steps": "58/8890", "percentage": "0.65%", "elapsed_time": "1m 6s", "remaining_time": "2h 47m 38s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.878037} {"loss": 1.42192543, "grad_norm": 0.93596476, "learning_rate": 1.326e-05, "token_acc": 0.65159868, "epoch": 0.0663667, "global_step/max_steps": "59/8890", "percentage": "0.66%", "elapsed_time": "1m 7s", "remaining_time": "2h 47m 25s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.87914} {"loss": 1.0539552, "grad_norm": 0.92002225, "learning_rate": 1.348e-05, "token_acc": 0.71008403, "epoch": 0.06749156, "global_step/max_steps": "60/8890", "percentage": "0.67%", "elapsed_time": "1m 8s", "remaining_time": "2h 46m 47s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.882333} {"loss": 1.32220232, "grad_norm": 0.91375375, "learning_rate": 1.371e-05, "token_acc": 0.65542169, "epoch": 0.06861642, "global_step/max_steps": "61/8890", "percentage": "0.69%", "elapsed_time": "1m 9s", "remaining_time": "2h 47m 11s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.880138} {"loss": 1.36680377, "grad_norm": 0.84640843, "learning_rate": 1.393e-05, "token_acc": 0.63757273, "epoch": 0.06974128, "global_step/max_steps": "62/8890", "percentage": "0.70%", "elapsed_time": "1m 10s", "remaining_time": "2h 46m 59s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.88104} {"loss": 1.21439409, "grad_norm": 0.78948641, "learning_rate": 1.416e-05, "token_acc": 0.69433962, "epoch": 0.07086614, "global_step/max_steps": "63/8890", "percentage": "0.71%", "elapsed_time": "1m 11s", "remaining_time": "2h 46m 58s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.881092} {"loss": 1.25094652, "grad_norm": 0.8077451, "learning_rate": 1.438e-05, "token_acc": 0.68975069, "epoch": 0.071991, "global_step/max_steps": "64/8890", "percentage": "0.72%", "elapsed_time": "1m 12s", "remaining_time": "2h 46m 59s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.88084} {"loss": 1.19091308, "grad_norm": 0.86465526, "learning_rate": 1.461e-05, "token_acc": 0.68171021, "epoch": 0.07311586, "global_step/max_steps": "65/8890", "percentage": "0.73%", "elapsed_time": "1m 13s", "remaining_time": "2h 46m 50s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.881597} {"loss": 1.24571252, "grad_norm": 0.91716617, "learning_rate": 1.483e-05, "token_acc": 0.68571429, "epoch": 0.07424072, "global_step/max_steps": "66/8890", "percentage": "0.74%", "elapsed_time": "1m 14s", "remaining_time": "2h 46m 24s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.883764} {"loss": 1.2644918, "grad_norm": 0.96272248, "learning_rate": 1.506e-05, "token_acc": 0.66323907, "epoch": 0.07536558, "global_step/max_steps": "67/8890", "percentage": "0.75%", "elapsed_time": "1m 15s", "remaining_time": "2h 46m 12s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.884703} {"loss": 1.17377162, "grad_norm": 0.89933956, "learning_rate": 1.528e-05, "token_acc": 0.68574635, "epoch": 0.07649044, "global_step/max_steps": "68/8890", "percentage": "0.76%", "elapsed_time": "1m 16s", "remaining_time": "2h 45m 56s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.886024} {"loss": 1.04874218, "grad_norm": 0.86480546, "learning_rate": 1.551e-05, "token_acc": 0.72622108, "epoch": 0.0776153, "global_step/max_steps": "69/8890", "percentage": "0.78%", "elapsed_time": "1m 17s", "remaining_time": "2h 45m 41s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.887274} {"loss": 1.11426044, "grad_norm": 0.86093879, "learning_rate": 1.573e-05, "token_acc": 0.70056497, "epoch": 0.07874016, "global_step/max_steps": "70/8890", "percentage": "0.79%", "elapsed_time": "1m 18s", "remaining_time": "2h 45m 26s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.888515} {"loss": 1.25936425, "grad_norm": 0.76018429, "learning_rate": 1.596e-05, "token_acc": 0.67535744, "epoch": 0.07986502, "global_step/max_steps": "71/8890", "percentage": "0.80%", "elapsed_time": "1m 19s", "remaining_time": "2h 45m 9s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.889946} {"loss": 1.31072021, "grad_norm": 0.93265265, "learning_rate": 1.618e-05, "token_acc": 0.65856481, "epoch": 0.08098988, "global_step/max_steps": "72/8890", "percentage": "0.81%", "elapsed_time": "1m 20s", "remaining_time": "2h 45m 0s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.890691} {"loss": 1.458076, "grad_norm": 0.93074238, "learning_rate": 1.64e-05, "token_acc": 0.6427718, "epoch": 0.08211474, "global_step/max_steps": "73/8890", "percentage": "0.82%", "elapsed_time": "1m 21s", "remaining_time": "2h 44m 59s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.890642} {"loss": 1.17240238, "grad_norm": 0.87888592, "learning_rate": 1.663e-05, "token_acc": 0.69047619, "epoch": 0.0832396, "global_step/max_steps": "74/8890", "percentage": "0.83%", "elapsed_time": "1m 23s", "remaining_time": "2h 45m 1s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.890328} {"loss": 1.29501271, "grad_norm": 0.95915657, "learning_rate": 1.685e-05, "token_acc": 0.65835141, "epoch": 0.08436445, "global_step/max_steps": "75/8890", "percentage": "0.84%", "elapsed_time": "1m 23s", "remaining_time": "2h 44m 21s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.89387} {"loss": 1.25957835, "grad_norm": 1.06986177, "learning_rate": 1.708e-05, "token_acc": 0.6550765, "epoch": 0.08548931, "global_step/max_steps": "76/8890", "percentage": "0.85%", "elapsed_time": "1m 24s", "remaining_time": "2h 44m 16s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.894269} {"loss": 1.32353055, "grad_norm": 1.04146004, "learning_rate": 1.73e-05, "token_acc": 0.64409449, "epoch": 0.08661417, "global_step/max_steps": "77/8890", "percentage": "0.87%", "elapsed_time": "1m 26s", "remaining_time": "2h 44m 15s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.894185} {"loss": 1.18903768, "grad_norm": 0.87567049, "learning_rate": 1.753e-05, "token_acc": 0.66932271, "epoch": 0.08773903, "global_step/max_steps": "78/8890", "percentage": "0.88%", "elapsed_time": "1m 27s", "remaining_time": "2h 44m 6s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.894944} {"loss": 0.99751741, "grad_norm": 0.92393231, "learning_rate": 1.775e-05, "token_acc": 0.698941, "epoch": 0.08886389, "global_step/max_steps": "79/8890", "percentage": "0.89%", "elapsed_time": "1m 28s", "remaining_time": "2h 44m 9s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.894603} {"loss": 1.35254085, "grad_norm": 0.87324339, "learning_rate": 1.798e-05, "token_acc": 0.64821429, "epoch": 0.08998875, "global_step/max_steps": "80/8890", "percentage": "0.90%", "elapsed_time": "1m 29s", "remaining_time": "2h 44m 26s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.892926} {"loss": 0.97618979, "grad_norm": 0.74124312, "learning_rate": 1.82e-05, "token_acc": 0.74369748, "epoch": 0.09111361, "global_step/max_steps": "81/8890", "percentage": "0.91%", "elapsed_time": "1m 30s", "remaining_time": "2h 44m 28s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.89261} {"loss": 1.3313086, "grad_norm": 0.86926675, "learning_rate": 1.843e-05, "token_acc": 0.65240084, "epoch": 0.09223847, "global_step/max_steps": "82/8890", "percentage": "0.92%", "elapsed_time": "1m 31s", "remaining_time": "2h 44m 31s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.892276} {"loss": 1.18488121, "grad_norm": 0.86622739, "learning_rate": 1.865e-05, "token_acc": 0.6822335, "epoch": 0.09336333, "global_step/max_steps": "83/8890", "percentage": "0.93%", "elapsed_time": "1m 33s", "remaining_time": "2h 44m 33s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.892007} {"loss": 1.31956685, "grad_norm": 0.87386024, "learning_rate": 1.888e-05, "token_acc": 0.64850615, "epoch": 0.09448819, "global_step/max_steps": "84/8890", "percentage": "0.94%", "elapsed_time": "1m 34s", "remaining_time": "2h 44m 35s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.89166} {"loss": 1.05255413, "grad_norm": 0.94159323, "learning_rate": 1.91e-05, "token_acc": 0.7005988, "epoch": 0.09561305, "global_step/max_steps": "85/8890", "percentage": "0.96%", "elapsed_time": "1m 35s", "remaining_time": "2h 44m 11s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.893809} {"loss": 1.17551398, "grad_norm": 0.86304897, "learning_rate": 1.933e-05, "token_acc": 0.68855721, "epoch": 0.09673791, "global_step/max_steps": "86/8890", "percentage": "0.97%", "elapsed_time": "1m 35s", "remaining_time": "2h 43m 46s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.895987} {"loss": 1.26633358, "grad_norm": 0.92751741, "learning_rate": 1.955e-05, "token_acc": 0.66259711, "epoch": 0.09786277, "global_step/max_steps": "87/8890", "percentage": "0.98%", "elapsed_time": "1m 36s", "remaining_time": "2h 43m 33s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.896985} {"loss": 0.95039904, "grad_norm": 0.85768121, "learning_rate": 1.978e-05, "token_acc": 0.74060606, "epoch": 0.09898763, "global_step/max_steps": "88/8890", "percentage": "0.99%", "elapsed_time": "1m 38s", "remaining_time": "2h 43m 40s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.896303} {"loss": 1.34119606, "grad_norm": 0.90072864, "learning_rate": 2e-05, "token_acc": 0.64586995, "epoch": 0.10011249, "global_step/max_steps": "89/8890", "percentage": "1.00%", "elapsed_time": "1m 39s", "remaining_time": "2h 43m 16s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.89839} {"loss": 1.19622207, "grad_norm": 0.87237185, "learning_rate": 2.022e-05, "token_acc": 0.68020833, "epoch": 0.10123735, "global_step/max_steps": "90/8890", "percentage": "1.01%", "elapsed_time": "1m 40s", "remaining_time": "2h 43m 22s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.8977} {"loss": 1.10768461, "grad_norm": 0.88597816, "learning_rate": 2.045e-05, "token_acc": 0.68771139, "epoch": 0.1023622, "global_step/max_steps": "91/8890", "percentage": "1.02%", "elapsed_time": "1m 41s", "remaining_time": "2h 42m 48s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.900738} {"loss": 1.23874331, "grad_norm": 0.84254998, "learning_rate": 2.067e-05, "token_acc": 0.67785235, "epoch": 0.10348706, "global_step/max_steps": "92/8890", "percentage": "1.03%", "elapsed_time": "1m 42s", "remaining_time": "2h 42m 49s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.900529} {"loss": 1.29492998, "grad_norm": 1.22685754, "learning_rate": 2.09e-05, "token_acc": 0.65751445, "epoch": 0.10461192, "global_step/max_steps": "93/8890", "percentage": "1.05%", "elapsed_time": "1m 43s", "remaining_time": "2h 42m 42s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.90106} {"loss": 1.05007768, "grad_norm": 1.07012665, "learning_rate": 2.112e-05, "token_acc": 0.70614693, "epoch": 0.10573678, "global_step/max_steps": "94/8890", "percentage": "1.06%", "elapsed_time": "1m 44s", "remaining_time": "2h 42m 36s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.901552} {"loss": 1.33122325, "grad_norm": 1.00660396, "learning_rate": 2.135e-05, "token_acc": 0.63866513, "epoch": 0.10686164, "global_step/max_steps": "95/8890", "percentage": "1.07%", "elapsed_time": "1m 45s", "remaining_time": "2h 42m 37s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.901382} {"loss": 1.17030203, "grad_norm": 1.03970885, "learning_rate": 2.157e-05, "token_acc": 0.67894737, "epoch": 0.1079865, "global_step/max_steps": "96/8890", "percentage": "1.08%", "elapsed_time": "1m 46s", "remaining_time": "2h 42m 7s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.90399} {"loss": 1.25753903, "grad_norm": 0.98551548, "learning_rate": 2.18e-05, "token_acc": 0.6602358, "epoch": 0.10911136, "global_step/max_steps": "97/8890", "percentage": "1.09%", "elapsed_time": "1m 47s", "remaining_time": "2h 42m 9s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.903741} {"loss": 1.20616627, "grad_norm": 1.04355502, "learning_rate": 2.202e-05, "token_acc": 0.67068758, "epoch": 0.11023622, "global_step/max_steps": "98/8890", "percentage": "1.10%", "elapsed_time": "1m 48s", "remaining_time": "2h 42m 13s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.903279} {"loss": 1.1129781, "grad_norm": 1.09564555, "learning_rate": 2.225e-05, "token_acc": 0.68530559, "epoch": 0.11136108, "global_step/max_steps": "99/8890", "percentage": "1.11%", "elapsed_time": "1m 49s", "remaining_time": "2h 42m 14s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.90304} {"loss": 1.11786938, "grad_norm": 0.96410376, "learning_rate": 2.247e-05, "token_acc": 0.69493177, "epoch": 0.11248594, "global_step/max_steps": "100/8890", "percentage": "1.12%", "elapsed_time": "1m 50s", "remaining_time": "2h 42m 10s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.903305} {"loss": 1.16376269, "grad_norm": 1.135216, "learning_rate": 2.27e-05, "token_acc": 0.67527675, "epoch": 0.1136108, "global_step/max_steps": "101/8890", "percentage": "1.14%", "elapsed_time": "1m 51s", "remaining_time": "2h 42m 12s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.903039} {"loss": 1.34537709, "grad_norm": 0.9879117, "learning_rate": 2.292e-05, "token_acc": 0.66318786, "epoch": 0.11473566, "global_step/max_steps": "102/8890", "percentage": "1.15%", "elapsed_time": "1m 52s", "remaining_time": "2h 42m 12s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.902926} {"loss": 1.1417892, "grad_norm": 1.04325807, "learning_rate": 2.315e-05, "token_acc": 0.69247788, "epoch": 0.11586052, "global_step/max_steps": "103/8890", "percentage": "1.16%", "elapsed_time": "1m 54s", "remaining_time": "2h 42m 14s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.90266} {"loss": 1.21374083, "grad_norm": 1.14657915, "learning_rate": 2.337e-05, "token_acc": 0.68970381, "epoch": 0.11698538, "global_step/max_steps": "104/8890", "percentage": "1.17%", "elapsed_time": "1m 55s", "remaining_time": "2h 42m 5s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.903392} {"loss": 1.1853112, "grad_norm": 1.16486084, "learning_rate": 2.36e-05, "token_acc": 0.69494585, "epoch": 0.11811024, "global_step/max_steps": "105/8890", "percentage": "1.18%", "elapsed_time": "1m 56s", "remaining_time": "2h 42m 7s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.903108} {"loss": 1.31376338, "grad_norm": 1.12948573, "learning_rate": 2.382e-05, "token_acc": 0.64975248, "epoch": 0.1192351, "global_step/max_steps": "106/8890", "percentage": "1.19%", "elapsed_time": "1m 57s", "remaining_time": "2h 42m 25s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.90132} {"loss": 1.15995193, "grad_norm": 1.15254354, "learning_rate": 2.404e-05, "token_acc": 0.68457143, "epoch": 0.12035996, "global_step/max_steps": "107/8890", "percentage": "1.20%", "elapsed_time": "1m 58s", "remaining_time": "2h 42m 37s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.900108} {"loss": 1.19181108, "grad_norm": 1.08095372, "learning_rate": 2.427e-05, "token_acc": 0.68756999, "epoch": 0.12148481, "global_step/max_steps": "108/8890", "percentage": "1.21%", "elapsed_time": "2m 0s", "remaining_time": "2h 42m 39s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.899814} {"loss": 1.02092731, "grad_norm": 1.22955573, "learning_rate": 2.449e-05, "token_acc": 0.71639586, "epoch": 0.12260967, "global_step/max_steps": "109/8890", "percentage": "1.23%", "elapsed_time": "2m 1s", "remaining_time": "2h 42m 40s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.89968} {"loss": 0.93570966, "grad_norm": 1.15739262, "learning_rate": 2.472e-05, "token_acc": 0.74304636, "epoch": 0.12373453, "global_step/max_steps": "110/8890", "percentage": "1.24%", "elapsed_time": "2m 2s", "remaining_time": "2h 42m 51s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.898491} {"loss": 1.15192461, "grad_norm": 1.08122563, "learning_rate": 2.494e-05, "token_acc": 0.69203223, "epoch": 0.12485939, "global_step/max_steps": "111/8890", "percentage": "1.25%", "elapsed_time": "2m 3s", "remaining_time": "2h 42m 52s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.898358} {"loss": 1.03619576, "grad_norm": 1.17627835, "learning_rate": 2.517e-05, "token_acc": 0.72752044, "epoch": 0.12598425, "global_step/max_steps": "112/8890", "percentage": "1.26%", "elapsed_time": "2m 4s", "remaining_time": "2h 42m 52s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.898229} {"loss": 0.9733876, "grad_norm": 0.93413502, "learning_rate": 2.539e-05, "token_acc": 0.70452358, "epoch": 0.12710911, "global_step/max_steps": "113/8890", "percentage": "1.27%", "elapsed_time": "2m 5s", "remaining_time": "2h 42m 43s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.898934} {"loss": 0.9553889, "grad_norm": 1.14076185, "learning_rate": 2.562e-05, "token_acc": 0.71391076, "epoch": 0.12823397, "global_step/max_steps": "114/8890", "percentage": "1.28%", "elapsed_time": "2m 6s", "remaining_time": "2h 42m 44s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.898812} {"loss": 1.11644185, "grad_norm": 1.18051064, "learning_rate": 2.584e-05, "token_acc": 0.69922879, "epoch": 0.12935883, "global_step/max_steps": "115/8890", "percentage": "1.29%", "elapsed_time": "2m 7s", "remaining_time": "2h 42m 37s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.899343} {"loss": 1.3218329, "grad_norm": 1.08414567, "learning_rate": 2.607e-05, "token_acc": 0.66145354, "epoch": 0.13048369, "global_step/max_steps": "116/8890", "percentage": "1.30%", "elapsed_time": "2m 9s", "remaining_time": "2h 42m 39s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.89898} {"loss": 1.17112529, "grad_norm": 1.20059359, "learning_rate": 2.629e-05, "token_acc": 0.68471721, "epoch": 0.13160855, "global_step/max_steps": "117/8890", "percentage": "1.32%", "elapsed_time": "2m 9s", "remaining_time": "2h 42m 22s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.900532} {"loss": 1.02839899, "grad_norm": 1.24438322, "learning_rate": 2.652e-05, "token_acc": 0.69007264, "epoch": 0.13273341, "global_step/max_steps": "118/8890", "percentage": "1.33%", "elapsed_time": "2m 11s", "remaining_time": "2h 42m 22s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.900419} {"loss": 1.09058857, "grad_norm": 1.18186498, "learning_rate": 2.674e-05, "token_acc": 0.68850267, "epoch": 0.13385827, "global_step/max_steps": "119/8890", "percentage": "1.34%", "elapsed_time": "2m 12s", "remaining_time": "2h 42m 22s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.900302} {"loss": 0.99342406, "grad_norm": 1.01011765, "learning_rate": 2.697e-05, "token_acc": 0.73146293, "epoch": 0.13498313, "global_step/max_steps": "120/8890", "percentage": "1.35%", "elapsed_time": "2m 13s", "remaining_time": "2h 42m 23s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.900067} {"loss": 1.08675027, "grad_norm": 1.01053286, "learning_rate": 2.719e-05, "token_acc": 0.7080689, "epoch": 0.13610799, "global_step/max_steps": "121/8890", "percentage": "1.36%", "elapsed_time": "2m 14s", "remaining_time": "2h 42m 24s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.89987} {"loss": 1.32221532, "grad_norm": 1.23438907, "learning_rate": 2.742e-05, "token_acc": 0.65144231, "epoch": 0.13723285, "global_step/max_steps": "122/8890", "percentage": "1.37%", "elapsed_time": "2m 15s", "remaining_time": "2h 42m 19s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.900264} {"loss": 0.90268898, "grad_norm": 0.96253836, "learning_rate": 2.764e-05, "token_acc": 0.7589545, "epoch": 0.13835771, "global_step/max_steps": "123/8890", "percentage": "1.38%", "elapsed_time": "2m 16s", "remaining_time": "2h 42m 20s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.900031} {"loss": 1.27679026, "grad_norm": 1.35558593, "learning_rate": 2.787e-05, "token_acc": 0.65429234, "epoch": 0.13948256, "global_step/max_steps": "124/8890", "percentage": "1.39%", "elapsed_time": "2m 17s", "remaining_time": "2h 42m 20s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.89992} {"loss": 1.02469945, "grad_norm": 1.09831047, "learning_rate": 2.809e-05, "token_acc": 0.72413793, "epoch": 0.14060742, "global_step/max_steps": "125/8890", "percentage": "1.41%", "elapsed_time": "2m 18s", "remaining_time": "2h 42m 19s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.899952} {"loss": 1.1399858, "grad_norm": 1.40232456, "learning_rate": 2.831e-05, "token_acc": 0.67014342, "epoch": 0.14173228, "global_step/max_steps": "126/8890", "percentage": "1.42%", "elapsed_time": "2m 19s", "remaining_time": "2h 42m 10s", "memory(GiB)": 21.88, "train_speed(iter/s)": 0.900631} {"loss": 1.19401884, "grad_norm": 1.19011962, "learning_rate": 2.854e-05, "token_acc": 0.68157034, "epoch": 0.14285714, "global_step/max_steps": "127/8890", "percentage": "1.43%", "elapsed_time": "2m 21s", "remaining_time": "2h 42m 26s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.899108} {"loss": 1.01303411, "grad_norm": 1.33333635, "learning_rate": 2.876e-05, "token_acc": 0.68187744, "epoch": 0.143982, "global_step/max_steps": "128/8890", "percentage": "1.44%", "elapsed_time": "2m 22s", "remaining_time": "2h 42m 27s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.89889} {"loss": 1.09488118, "grad_norm": 1.25329673, "learning_rate": 2.899e-05, "token_acc": 0.69230769, "epoch": 0.14510686, "global_step/max_steps": "129/8890", "percentage": "1.45%", "elapsed_time": "2m 23s", "remaining_time": "2h 42m 27s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898754} {"loss": 0.93130863, "grad_norm": 1.17964375, "learning_rate": 2.921e-05, "token_acc": 0.73248408, "epoch": 0.14623172, "global_step/max_steps": "130/8890", "percentage": "1.46%", "elapsed_time": "2m 24s", "remaining_time": "2h 42m 28s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.89857} {"loss": 0.99492878, "grad_norm": 1.12793028, "learning_rate": 2.944e-05, "token_acc": 0.7309417, "epoch": 0.14735658, "global_step/max_steps": "131/8890", "percentage": "1.47%", "elapsed_time": "2m 25s", "remaining_time": "2h 42m 29s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898439} {"loss": 0.95147949, "grad_norm": 1.24706244, "learning_rate": 2.966e-05, "token_acc": 0.72250639, "epoch": 0.14848144, "global_step/max_steps": "132/8890", "percentage": "1.48%", "elapsed_time": "2m 26s", "remaining_time": "2h 42m 28s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898396} {"loss": 1.07836723, "grad_norm": 1.2036556, "learning_rate": 2.989e-05, "token_acc": 0.70642202, "epoch": 0.1496063, "global_step/max_steps": "133/8890", "percentage": "1.50%", "elapsed_time": "2m 28s", "remaining_time": "2h 42m 28s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898262} {"loss": 1.01094222, "grad_norm": 1.17452002, "learning_rate": 3.011e-05, "token_acc": 0.71085714, "epoch": 0.15073116, "global_step/max_steps": "134/8890", "percentage": "1.51%", "elapsed_time": "2m 29s", "remaining_time": "2h 42m 30s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897998} {"loss": 0.95735443, "grad_norm": 1.31745827, "learning_rate": 3.034e-05, "token_acc": 0.72030075, "epoch": 0.15185602, "global_step/max_steps": "135/8890", "percentage": "1.52%", "elapsed_time": "2m 30s", "remaining_time": "2h 42m 32s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897718} {"loss": 1.09239864, "grad_norm": 1.46430349, "learning_rate": 3.056e-05, "token_acc": 0.71063257, "epoch": 0.15298088, "global_step/max_steps": "136/8890", "percentage": "1.53%", "elapsed_time": "2m 31s", "remaining_time": "2h 42m 27s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898118} {"loss": 1.08359003, "grad_norm": 1.04505968, "learning_rate": 3.079e-05, "token_acc": 0.69991687, "epoch": 0.15410574, "global_step/max_steps": "137/8890", "percentage": "1.54%", "elapsed_time": "2m 32s", "remaining_time": "2h 42m 30s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897733} {"loss": 1.0368793, "grad_norm": 1.24166799, "learning_rate": 3.101e-05, "token_acc": 0.71130626, "epoch": 0.1552306, "global_step/max_steps": "138/8890", "percentage": "1.55%", "elapsed_time": "2m 33s", "remaining_time": "2h 42m 32s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897432} {"loss": 1.20785439, "grad_norm": 1.23844993, "learning_rate": 3.124e-05, "token_acc": 0.67857143, "epoch": 0.15635546, "global_step/max_steps": "139/8890", "percentage": "1.56%", "elapsed_time": "2m 34s", "remaining_time": "2h 42m 34s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897136} {"loss": 1.2164185, "grad_norm": 1.21963823, "learning_rate": 3.146e-05, "token_acc": 0.68731269, "epoch": 0.15748031, "global_step/max_steps": "140/8890", "percentage": "1.57%", "elapsed_time": "2m 36s", "remaining_time": "2h 42m 34s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897012} {"loss": 1.16425431, "grad_norm": 1.40956008, "learning_rate": 3.169e-05, "token_acc": 0.6783692, "epoch": 0.15860517, "global_step/max_steps": "141/8890", "percentage": "1.59%", "elapsed_time": "2m 37s", "remaining_time": "2h 42m 29s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897404} {"loss": 0.96854329, "grad_norm": 1.51780927, "learning_rate": 3.191e-05, "token_acc": 0.71956224, "epoch": 0.15973003, "global_step/max_steps": "142/8890", "percentage": "1.60%", "elapsed_time": "2m 37s", "remaining_time": "2h 42m 8s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.89921} {"loss": 1.12963033, "grad_norm": 1.38356507, "learning_rate": 3.213e-05, "token_acc": 0.67383059, "epoch": 0.16085489, "global_step/max_steps": "143/8890", "percentage": "1.61%", "elapsed_time": "2m 39s", "remaining_time": "2h 42m 10s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898965} {"loss": 1.16167545, "grad_norm": 1.17117643, "learning_rate": 3.236e-05, "token_acc": 0.69600726, "epoch": 0.16197975, "global_step/max_steps": "144/8890", "percentage": "1.62%", "elapsed_time": "2m 40s", "remaining_time": "2h 42m 11s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898763} {"loss": 1.17770493, "grad_norm": 1.79663801, "learning_rate": 3.258e-05, "token_acc": 0.69753086, "epoch": 0.16310461, "global_step/max_steps": "145/8890", "percentage": "1.63%", "elapsed_time": "2m 41s", "remaining_time": "2h 42m 6s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.899118} {"loss": 1.1560607, "grad_norm": 1.33790779, "learning_rate": 3.281e-05, "token_acc": 0.67201835, "epoch": 0.16422947, "global_step/max_steps": "146/8890", "percentage": "1.64%", "elapsed_time": "2m 42s", "remaining_time": "2h 42m 7s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898897} {"loss": 1.14795804, "grad_norm": 1.404495, "learning_rate": 3.303e-05, "token_acc": 0.68028005, "epoch": 0.16535433, "global_step/max_steps": "147/8890", "percentage": "1.65%", "elapsed_time": "2m 43s", "remaining_time": "2h 42m 3s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.899166} {"loss": 1.04232419, "grad_norm": 1.05992687, "learning_rate": 3.326e-05, "token_acc": 0.71013289, "epoch": 0.16647919, "global_step/max_steps": "148/8890", "percentage": "1.66%", "elapsed_time": "2m 44s", "remaining_time": "2h 42m 4s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898986} {"loss": 1.2026943, "grad_norm": 1.28616452, "learning_rate": 3.348e-05, "token_acc": 0.66945996, "epoch": 0.16760405, "global_step/max_steps": "149/8890", "percentage": "1.68%", "elapsed_time": "2m 45s", "remaining_time": "2h 42m 6s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898691} {"loss": 0.9196437, "grad_norm": 1.77501285, "learning_rate": 3.371e-05, "token_acc": 0.71428571, "epoch": 0.16872891, "global_step/max_steps": "150/8890", "percentage": "1.69%", "elapsed_time": "2m 46s", "remaining_time": "2h 42m 0s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.899087} {"loss": 1.10529113, "grad_norm": 1.4608109, "learning_rate": 3.393e-05, "token_acc": 0.6988266, "epoch": 0.16985377, "global_step/max_steps": "151/8890", "percentage": "1.70%", "elapsed_time": "2m 48s", "remaining_time": "2h 42m 3s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898721} {"loss": 1.19629252, "grad_norm": 1.6421442, "learning_rate": 3.416e-05, "token_acc": 0.65905849, "epoch": 0.17097863, "global_step/max_steps": "152/8890", "percentage": "1.71%", "elapsed_time": "2m 49s", "remaining_time": "2h 42m 3s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898628} {"loss": 1.04445624, "grad_norm": 1.22885597, "learning_rate": 3.438e-05, "token_acc": 0.70919881, "epoch": 0.17210349, "global_step/max_steps": "153/8890", "percentage": "1.72%", "elapsed_time": "2m 50s", "remaining_time": "2h 42m 4s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898476} {"loss": 1.21274686, "grad_norm": 1.3263526, "learning_rate": 3.461e-05, "token_acc": 0.66413662, "epoch": 0.17322835, "global_step/max_steps": "154/8890", "percentage": "1.73%", "elapsed_time": "2m 51s", "remaining_time": "2h 42m 12s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897618} {"loss": 1.10992432, "grad_norm": 1.29997408, "learning_rate": 3.483e-05, "token_acc": 0.69652266, "epoch": 0.17435321, "global_step/max_steps": "155/8890", "percentage": "1.74%", "elapsed_time": "2m 52s", "remaining_time": "2h 42m 19s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896848} {"loss": 1.06035292, "grad_norm": 1.58298147, "learning_rate": 3.506e-05, "token_acc": 0.67961165, "epoch": 0.17547807, "global_step/max_steps": "156/8890", "percentage": "1.75%", "elapsed_time": "2m 53s", "remaining_time": "2h 42m 13s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897315} {"loss": 0.73946917, "grad_norm": 1.20098162, "learning_rate": 3.528e-05, "token_acc": 0.78763127, "epoch": 0.17660292, "global_step/max_steps": "157/8890", "percentage": "1.77%", "elapsed_time": "2m 54s", "remaining_time": "2h 42m 8s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897663} {"loss": 1.01361728, "grad_norm": 1.452492, "learning_rate": 3.551e-05, "token_acc": 0.70434783, "epoch": 0.17772778, "global_step/max_steps": "158/8890", "percentage": "1.78%", "elapsed_time": "2m 56s", "remaining_time": "2h 42m 9s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897438} {"loss": 0.96643394, "grad_norm": 1.29191589, "learning_rate": 3.573e-05, "token_acc": 0.73586957, "epoch": 0.17885264, "global_step/max_steps": "159/8890", "percentage": "1.79%", "elapsed_time": "2m 57s", "remaining_time": "2h 42m 9s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897396} {"loss": 1.04332733, "grad_norm": 1.56112909, "learning_rate": 3.596e-05, "token_acc": 0.69565217, "epoch": 0.1799775, "global_step/max_steps": "160/8890", "percentage": "1.80%", "elapsed_time": "2m 58s", "remaining_time": "2h 42m 10s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897211} {"loss": 1.01126647, "grad_norm": 1.29902732, "learning_rate": 3.618e-05, "token_acc": 0.72763819, "epoch": 0.18110236, "global_step/max_steps": "161/8890", "percentage": "1.81%", "elapsed_time": "2m 59s", "remaining_time": "2h 42m 19s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896293} {"loss": 0.99785113, "grad_norm": 1.22243655, "learning_rate": 3.64e-05, "token_acc": 0.72628726, "epoch": 0.18222722, "global_step/max_steps": "162/8890", "percentage": "1.82%", "elapsed_time": "3m 0s", "remaining_time": "2h 42m 21s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.895997} {"loss": 0.94572651, "grad_norm": 1.47525728, "learning_rate": 3.663e-05, "token_acc": 0.72624113, "epoch": 0.18335208, "global_step/max_steps": "163/8890", "percentage": "1.83%", "elapsed_time": "3m 1s", "remaining_time": "2h 42m 16s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896326} {"loss": 0.99527872, "grad_norm": 1.50788724, "learning_rate": 3.685e-05, "token_acc": 0.69314079, "epoch": 0.18447694, "global_step/max_steps": "164/8890", "percentage": "1.84%", "elapsed_time": "3m 3s", "remaining_time": "2h 42m 17s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896082} {"loss": 0.99756253, "grad_norm": 1.48564434, "learning_rate": 3.708e-05, "token_acc": 0.71592357, "epoch": 0.1856018, "global_step/max_steps": "165/8890", "percentage": "1.86%", "elapsed_time": "3m 4s", "remaining_time": "2h 42m 10s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896656} {"loss": 1.01323259, "grad_norm": 1.40900707, "learning_rate": 3.73e-05, "token_acc": 0.70556161, "epoch": 0.18672666, "global_step/max_steps": "166/8890", "percentage": "1.87%", "elapsed_time": "3m 5s", "remaining_time": "2h 42m 6s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896923} {"loss": 1.05041623, "grad_norm": 1.54798353, "learning_rate": 3.753e-05, "token_acc": 0.71798189, "epoch": 0.18785152, "global_step/max_steps": "167/8890", "percentage": "1.88%", "elapsed_time": "3m 6s", "remaining_time": "2h 42m 3s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897127} {"loss": 0.8709265, "grad_norm": 1.58618426, "learning_rate": 3.775e-05, "token_acc": 0.72972973, "epoch": 0.18897638, "global_step/max_steps": "168/8890", "percentage": "1.89%", "elapsed_time": "3m 7s", "remaining_time": "2h 42m 3s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.89703} {"loss": 0.99843955, "grad_norm": 1.54094577, "learning_rate": 3.798e-05, "token_acc": 0.6970128, "epoch": 0.19010124, "global_step/max_steps": "169/8890", "percentage": "1.90%", "elapsed_time": "3m 8s", "remaining_time": "2h 41m 58s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897392} {"loss": 1.00920415, "grad_norm": 1.53533459, "learning_rate": 3.82e-05, "token_acc": 0.72750317, "epoch": 0.1912261, "global_step/max_steps": "170/8890", "percentage": "1.91%", "elapsed_time": "3m 9s", "remaining_time": "2h 41m 58s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.89729} {"loss": 1.22226906, "grad_norm": 1.49639893, "learning_rate": 3.843e-05, "token_acc": 0.66836216, "epoch": 0.19235096, "global_step/max_steps": "171/8890", "percentage": "1.92%", "elapsed_time": "3m 10s", "remaining_time": "2h 41m 59s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897041} {"loss": 0.9650889, "grad_norm": 1.47765577, "learning_rate": 3.865e-05, "token_acc": 0.72815534, "epoch": 0.19347582, "global_step/max_steps": "172/8890", "percentage": "1.93%", "elapsed_time": "3m 11s", "remaining_time": "2h 42m 1s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896781} {"loss": 0.92723346, "grad_norm": 1.49366844, "learning_rate": 3.888e-05, "token_acc": 0.72337662, "epoch": 0.19460067, "global_step/max_steps": "173/8890", "percentage": "1.95%", "elapsed_time": "3m 12s", "remaining_time": "2h 41m 49s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897786} {"loss": 0.97949916, "grad_norm": 1.36453891, "learning_rate": 3.91e-05, "token_acc": 0.71659919, "epoch": 0.19572553, "global_step/max_steps": "174/8890", "percentage": "1.96%", "elapsed_time": "3m 13s", "remaining_time": "2h 41m 44s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898185} {"loss": 1.096187, "grad_norm": 1.4073174, "learning_rate": 3.933e-05, "token_acc": 0.70910781, "epoch": 0.19685039, "global_step/max_steps": "175/8890", "percentage": "1.97%", "elapsed_time": "3m 14s", "remaining_time": "2h 41m 38s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898577} {"loss": 1.03389525, "grad_norm": 1.5859164, "learning_rate": 3.955e-05, "token_acc": 0.71392723, "epoch": 0.19797525, "global_step/max_steps": "176/8890", "percentage": "1.98%", "elapsed_time": "3m 15s", "remaining_time": "2h 41m 39s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898393} {"loss": 1.03684521, "grad_norm": 1.63318276, "learning_rate": 3.978e-05, "token_acc": 0.680798, "epoch": 0.19910011, "global_step/max_steps": "177/8890", "percentage": "1.99%", "elapsed_time": "3m 16s", "remaining_time": "2h 41m 31s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.899032} {"loss": 0.93490505, "grad_norm": 1.19699287, "learning_rate": 4e-05, "token_acc": 0.74323063, "epoch": 0.20022497, "global_step/max_steps": "178/8890", "percentage": "2.00%", "elapsed_time": "3m 18s", "remaining_time": "2h 41m 31s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898951} {"loss": 0.94529784, "grad_norm": 1.59906602, "learning_rate": 4.022e-05, "token_acc": 0.71879106, "epoch": 0.20134983, "global_step/max_steps": "179/8890", "percentage": "2.01%", "elapsed_time": "3m 19s", "remaining_time": "2h 41m 32s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898741} {"loss": 0.96280277, "grad_norm": 1.46515822, "learning_rate": 4.045e-05, "token_acc": 0.74047619, "epoch": 0.20247469, "global_step/max_steps": "180/8890", "percentage": "2.02%", "elapsed_time": "3m 20s", "remaining_time": "2h 41m 32s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898617} {"loss": 0.99609613, "grad_norm": 1.49292076, "learning_rate": 4.067e-05, "token_acc": 0.72087912, "epoch": 0.20359955, "global_step/max_steps": "181/8890", "percentage": "2.04%", "elapsed_time": "3m 21s", "remaining_time": "2h 41m 29s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898766} {"loss": 1.10941815, "grad_norm": 1.62848783, "learning_rate": 4.09e-05, "token_acc": 0.70408163, "epoch": 0.20472441, "global_step/max_steps": "182/8890", "percentage": "2.05%", "elapsed_time": "3m 22s", "remaining_time": "2h 41m 24s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.899145} {"loss": 0.97994745, "grad_norm": 1.64035761, "learning_rate": 4.112e-05, "token_acc": 0.71052632, "epoch": 0.20584927, "global_step/max_steps": "183/8890", "percentage": "2.06%", "elapsed_time": "3m 23s", "remaining_time": "2h 41m 24s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.899056} {"loss": 1.10973418, "grad_norm": 1.54055655, "learning_rate": 4.135e-05, "token_acc": 0.69300226, "epoch": 0.20697413, "global_step/max_steps": "184/8890", "percentage": "2.07%", "elapsed_time": "3m 24s", "remaining_time": "2h 41m 26s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898819} {"loss": 1.12679613, "grad_norm": 1.48765767, "learning_rate": 4.157e-05, "token_acc": 0.70241851, "epoch": 0.20809899, "global_step/max_steps": "185/8890", "percentage": "2.08%", "elapsed_time": "3m 25s", "remaining_time": "2h 41m 32s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.898074} {"loss": 1.06793189, "grad_norm": 1.45966589, "learning_rate": 4.18e-05, "token_acc": 0.7019937, "epoch": 0.20922385, "global_step/max_steps": "186/8890", "percentage": "2.09%", "elapsed_time": "3m 27s", "remaining_time": "2h 41m 44s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896929} {"loss": 0.82099521, "grad_norm": 1.35852599, "learning_rate": 4.202e-05, "token_acc": 0.75854214, "epoch": 0.21034871, "global_step/max_steps": "187/8890", "percentage": "2.10%", "elapsed_time": "3m 28s", "remaining_time": "2h 41m 39s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897256} {"loss": 1.36833715, "grad_norm": 1.82614541, "learning_rate": 4.225e-05, "token_acc": 0.6275, "epoch": 0.21147357, "global_step/max_steps": "188/8890", "percentage": "2.11%", "elapsed_time": "3m 29s", "remaining_time": "2h 41m 40s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897051} {"loss": 0.84459072, "grad_norm": 1.56208372, "learning_rate": 4.247e-05, "token_acc": 0.76584022, "epoch": 0.21259843, "global_step/max_steps": "189/8890", "percentage": "2.13%", "elapsed_time": "3m 30s", "remaining_time": "2h 41m 41s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896899} {"loss": 0.89201486, "grad_norm": 1.68321168, "learning_rate": 4.27e-05, "token_acc": 0.73721591, "epoch": 0.21372328, "global_step/max_steps": "190/8890", "percentage": "2.14%", "elapsed_time": "3m 31s", "remaining_time": "2h 41m 41s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896789} {"loss": 0.87877339, "grad_norm": 1.63205707, "learning_rate": 4.292e-05, "token_acc": 0.74162011, "epoch": 0.21484814, "global_step/max_steps": "191/8890", "percentage": "2.15%", "elapsed_time": "3m 32s", "remaining_time": "2h 41m 37s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.897042} {"loss": 1.11289811, "grad_norm": 1.48824954, "learning_rate": 4.315e-05, "token_acc": 0.71347785, "epoch": 0.215973, "global_step/max_steps": "192/8890", "percentage": "2.16%", "elapsed_time": "3m 34s", "remaining_time": "2h 41m 38s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896804} {"loss": 0.85654354, "grad_norm": 1.27782023, "learning_rate": 4.337e-05, "token_acc": 0.76155718, "epoch": 0.21709786, "global_step/max_steps": "193/8890", "percentage": "2.17%", "elapsed_time": "3m 35s", "remaining_time": "2h 41m 39s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896642} {"loss": 0.86386371, "grad_norm": 1.31329691, "learning_rate": 4.36e-05, "token_acc": 0.73921569, "epoch": 0.21822272, "global_step/max_steps": "194/8890", "percentage": "2.18%", "elapsed_time": "3m 36s", "remaining_time": "2h 41m 40s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896489} {"loss": 0.96070683, "grad_norm": 1.57249832, "learning_rate": 4.382e-05, "token_acc": 0.71002387, "epoch": 0.21934758, "global_step/max_steps": "195/8890", "percentage": "2.19%", "elapsed_time": "3m 37s", "remaining_time": "2h 41m 39s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896482} {"loss": 1.10737205, "grad_norm": 1.55300915, "learning_rate": 4.404e-05, "token_acc": 0.70258621, "epoch": 0.22047244, "global_step/max_steps": "196/8890", "percentage": "2.20%", "elapsed_time": "3m 38s", "remaining_time": "2h 41m 44s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.895888} {"loss": 0.99575317, "grad_norm": 1.46818566, "learning_rate": 4.427e-05, "token_acc": 0.71480804, "epoch": 0.2215973, "global_step/max_steps": "197/8890", "percentage": "2.22%", "elapsed_time": "3m 39s", "remaining_time": "2h 41m 44s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.895727} {"loss": 1.05356705, "grad_norm": 1.79273605, "learning_rate": 4.449e-05, "token_acc": 0.68486352, "epoch": 0.22272216, "global_step/max_steps": "198/8890", "percentage": "2.23%", "elapsed_time": "3m 40s", "remaining_time": "2h 41m 39s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896121} {"loss": 0.96685672, "grad_norm": 1.64008677, "learning_rate": 4.472e-05, "token_acc": 0.7231405, "epoch": 0.22384702, "global_step/max_steps": "199/8890", "percentage": "2.24%", "elapsed_time": "3m 42s", "remaining_time": "2h 41m 39s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896043} {"loss": 1.09827161, "grad_norm": 1.52021515, "learning_rate": 4.494e-05, "token_acc": 0.70912738, "epoch": 0.22497188, "global_step/max_steps": "200/8890", "percentage": "2.25%", "elapsed_time": "3m 43s", "remaining_time": "2h 41m 34s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896355} {"loss": 1.11916518, "grad_norm": 2.03558683, "learning_rate": 4.517e-05, "token_acc": 0.67301587, "epoch": 0.22609674, "global_step/max_steps": "201/8890", "percentage": "2.26%", "elapsed_time": "3m 44s", "remaining_time": "2h 41m 31s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896548} {"loss": 0.97906876, "grad_norm": 1.65172184, "learning_rate": 4.539e-05, "token_acc": 0.71486486, "epoch": 0.2272216, "global_step/max_steps": "202/8890", "percentage": "2.27%", "elapsed_time": "3m 45s", "remaining_time": "2h 41m 31s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896481} {"loss": 1.03930736, "grad_norm": 1.55491579, "learning_rate": 4.562e-05, "token_acc": 0.69684639, "epoch": 0.22834646, "global_step/max_steps": "203/8890", "percentage": "2.28%", "elapsed_time": "3m 46s", "remaining_time": "2h 41m 32s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896282} {"loss": 0.89664996, "grad_norm": 1.74438119, "learning_rate": 4.584e-05, "token_acc": 0.73341523, "epoch": 0.22947132, "global_step/max_steps": "204/8890", "percentage": "2.29%", "elapsed_time": "3m 47s", "remaining_time": "2h 41m 31s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896207} {"loss": 1.01378322, "grad_norm": 1.72862804, "learning_rate": 4.607e-05, "token_acc": 0.72171651, "epoch": 0.23059618, "global_step/max_steps": "205/8890", "percentage": "2.31%", "elapsed_time": "3m 48s", "remaining_time": "2h 41m 31s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896132} {"loss": 0.8844738, "grad_norm": 1.51932561, "learning_rate": 4.629e-05, "token_acc": 0.74722565, "epoch": 0.23172103, "global_step/max_steps": "206/8890", "percentage": "2.32%", "elapsed_time": "3m 49s", "remaining_time": "2h 41m 31s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896065} {"loss": 1.11518431, "grad_norm": 1.81639051, "learning_rate": 4.652e-05, "token_acc": 0.68181818, "epoch": 0.23284589, "global_step/max_steps": "207/8890", "percentage": "2.33%", "elapsed_time": "3m 50s", "remaining_time": "2h 41m 26s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896371} {"loss": 0.85354972, "grad_norm": 1.43014491, "learning_rate": 4.674e-05, "token_acc": 0.75117925, "epoch": 0.23397075, "global_step/max_steps": "208/8890", "percentage": "2.34%", "elapsed_time": "3m 51s", "remaining_time": "2h 41m 22s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896678} {"loss": 1.05817628, "grad_norm": 1.64165223, "learning_rate": 4.697e-05, "token_acc": 0.69322709, "epoch": 0.23509561, "global_step/max_steps": "209/8890", "percentage": "2.35%", "elapsed_time": "3m 53s", "remaining_time": "2h 41m 21s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896642} {"loss": 1.11711597, "grad_norm": 1.55153644, "learning_rate": 4.719e-05, "token_acc": 0.69846154, "epoch": 0.23622047, "global_step/max_steps": "210/8890", "percentage": "2.36%", "elapsed_time": "3m 54s", "remaining_time": "2h 41m 22s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896472} {"loss": 0.95883799, "grad_norm": 1.34727883, "learning_rate": 4.742e-05, "token_acc": 0.75408426, "epoch": 0.23734533, "global_step/max_steps": "211/8890", "percentage": "2.37%", "elapsed_time": "3m 55s", "remaining_time": "2h 41m 28s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.895794} {"loss": 0.86137706, "grad_norm": 1.37169182, "learning_rate": 4.764e-05, "token_acc": 0.74462114, "epoch": 0.23847019, "global_step/max_steps": "212/8890", "percentage": "2.38%", "elapsed_time": "3m 56s", "remaining_time": "2h 41m 28s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.895668} {"loss": 0.75911391, "grad_norm": 1.36780667, "learning_rate": 4.787e-05, "token_acc": 0.76758045, "epoch": 0.23959505, "global_step/max_steps": "213/8890", "percentage": "2.40%", "elapsed_time": "3m 57s", "remaining_time": "2h 41m 23s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896019} {"loss": 1.13222694, "grad_norm": 1.70372164, "learning_rate": 4.809e-05, "token_acc": 0.68674699, "epoch": 0.24071991, "global_step/max_steps": "214/8890", "percentage": "2.41%", "elapsed_time": "3m 58s", "remaining_time": "2h 41m 15s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896674} {"loss": 0.93422866, "grad_norm": 1.79850852, "learning_rate": 4.831e-05, "token_acc": 0.72019465, "epoch": 0.24184477, "global_step/max_steps": "215/8890", "percentage": "2.42%", "elapsed_time": "3m 59s", "remaining_time": "2h 41m 11s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896933} {"loss": 1.05976057, "grad_norm": 1.5648309, "learning_rate": 4.854e-05, "token_acc": 0.68586387, "epoch": 0.24296963, "global_step/max_steps": "216/8890", "percentage": "2.43%", "elapsed_time": "4m 0s", "remaining_time": "2h 41m 12s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896779} {"loss": 0.86477351, "grad_norm": 1.65223563, "learning_rate": 4.876e-05, "token_acc": 0.75568862, "epoch": 0.24409449, "global_step/max_steps": "217/8890", "percentage": "2.44%", "elapsed_time": "4m 2s", "remaining_time": "2h 41m 18s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.896077} {"loss": 0.98607439, "grad_norm": 1.65073228, "learning_rate": 4.899e-05, "token_acc": 0.73751452, "epoch": 0.24521935, "global_step/max_steps": "218/8890", "percentage": "2.45%", "elapsed_time": "4m 3s", "remaining_time": "2h 41m 25s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.89537} {"loss": 1.02075219, "grad_norm": 1.50771511, "learning_rate": 4.921e-05, "token_acc": 0.73069106, "epoch": 0.24634421, "global_step/max_steps": "219/8890", "percentage": "2.46%", "elapsed_time": "4m 4s", "remaining_time": "2h 41m 29s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894854} {"loss": 1.01186013, "grad_norm": 1.55593204, "learning_rate": 4.944e-05, "token_acc": 0.69530355, "epoch": 0.24746907, "global_step/max_steps": "220/8890", "percentage": "2.47%", "elapsed_time": "4m 5s", "remaining_time": "2h 41m 27s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.895006} {"loss": 0.88729072, "grad_norm": 1.57866693, "learning_rate": 4.966e-05, "token_acc": 0.75, "epoch": 0.24859393, "global_step/max_steps": "221/8890", "percentage": "2.49%", "elapsed_time": "4m 7s", "remaining_time": "2h 41m 32s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894419} {"loss": 1.06291306, "grad_norm": 1.7283839, "learning_rate": 4.989e-05, "token_acc": 0.68763797, "epoch": 0.24971879, "global_step/max_steps": "222/8890", "percentage": "2.50%", "elapsed_time": "4m 8s", "remaining_time": "2h 41m 32s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.89427} {"loss": 1.02882731, "grad_norm": 1.92041409, "learning_rate": 5.011e-05, "token_acc": 0.71676301, "epoch": 0.25084364, "global_step/max_steps": "223/8890", "percentage": "2.51%", "elapsed_time": "4m 9s", "remaining_time": "2h 41m 28s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.8946} {"loss": 0.85543096, "grad_norm": 1.75737524, "learning_rate": 5.034e-05, "token_acc": 0.73430657, "epoch": 0.2519685, "global_step/max_steps": "224/8890", "percentage": "2.52%", "elapsed_time": "4m 10s", "remaining_time": "2h 41m 28s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894469} {"loss": 1.09116781, "grad_norm": 2.00094032, "learning_rate": 5.056e-05, "token_acc": 0.68539326, "epoch": 0.25309336, "global_step/max_steps": "225/8890", "percentage": "2.53%", "elapsed_time": "4m 11s", "remaining_time": "2h 41m 27s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894444} {"loss": 1.14014435, "grad_norm": 1.8616972, "learning_rate": 5.079e-05, "token_acc": 0.71347678, "epoch": 0.25421822, "global_step/max_steps": "226/8890", "percentage": "2.54%", "elapsed_time": "4m 12s", "remaining_time": "2h 41m 27s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894367} {"loss": 0.95118141, "grad_norm": 1.88601887, "learning_rate": 5.101e-05, "token_acc": 0.73579109, "epoch": 0.25534308, "global_step/max_steps": "227/8890", "percentage": "2.55%", "elapsed_time": "4m 13s", "remaining_time": "2h 41m 22s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894714} {"loss": 1.06613016, "grad_norm": 1.84157288, "learning_rate": 5.124e-05, "token_acc": 0.71497006, "epoch": 0.25646794, "global_step/max_steps": "228/8890", "percentage": "2.56%", "elapsed_time": "4m 14s", "remaining_time": "2h 41m 18s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894934} {"loss": 1.23136687, "grad_norm": 1.55273926, "learning_rate": 5.146e-05, "token_acc": 0.67930029, "epoch": 0.2575928, "global_step/max_steps": "229/8890", "percentage": "2.58%", "elapsed_time": "4m 15s", "remaining_time": "2h 41m 17s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894938} {"loss": 0.97565639, "grad_norm": 2.0452404, "learning_rate": 5.169e-05, "token_acc": 0.72082718, "epoch": 0.25871766, "global_step/max_steps": "230/8890", "percentage": "2.59%", "elapsed_time": "4m 16s", "remaining_time": "2h 41m 15s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.895086} {"loss": 1.13038254, "grad_norm": 1.6925112, "learning_rate": 5.191e-05, "token_acc": 0.68008256, "epoch": 0.25984252, "global_step/max_steps": "231/8890", "percentage": "2.60%", "elapsed_time": "4m 18s", "remaining_time": "2h 41m 21s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.89443} {"loss": 1.0046674, "grad_norm": 1.8258121, "learning_rate": 5.213e-05, "token_acc": 0.71068427, "epoch": 0.26096738, "global_step/max_steps": "232/8890", "percentage": "2.61%", "elapsed_time": "4m 19s", "remaining_time": "2h 41m 20s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894383} {"loss": 0.96442735, "grad_norm": 1.8084693, "learning_rate": 5.236e-05, "token_acc": 0.72156863, "epoch": 0.26209224, "global_step/max_steps": "233/8890", "percentage": "2.62%", "elapsed_time": "4m 20s", "remaining_time": "2h 41m 25s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893821} {"loss": 1.00173032, "grad_norm": 1.91664267, "learning_rate": 5.258e-05, "token_acc": 0.72775564, "epoch": 0.2632171, "global_step/max_steps": "234/8890", "percentage": "2.63%", "elapsed_time": "4m 21s", "remaining_time": "2h 41m 24s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893787} {"loss": 1.05998087, "grad_norm": 1.66636336, "learning_rate": 5.281e-05, "token_acc": 0.71196755, "epoch": 0.26434196, "global_step/max_steps": "235/8890", "percentage": "2.64%", "elapsed_time": "4m 22s", "remaining_time": "2h 41m 23s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893766} {"loss": 0.98814046, "grad_norm": 1.68647945, "learning_rate": 5.303e-05, "token_acc": 0.71041215, "epoch": 0.26546682, "global_step/max_steps": "236/8890", "percentage": "2.65%", "elapsed_time": "4m 24s", "remaining_time": "2h 41m 23s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893668} {"loss": 0.87668145, "grad_norm": 1.70186591, "learning_rate": 5.326e-05, "token_acc": 0.73633441, "epoch": 0.26659168, "global_step/max_steps": "237/8890", "percentage": "2.67%", "elapsed_time": "4m 25s", "remaining_time": "2h 41m 17s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894122} {"loss": 1.02079153, "grad_norm": 1.79445004, "learning_rate": 5.348e-05, "token_acc": 0.71741935, "epoch": 0.26771654, "global_step/max_steps": "238/8890", "percentage": "2.68%", "elapsed_time": "4m 26s", "remaining_time": "2h 41m 17s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894071} {"loss": 1.03209019, "grad_norm": 1.7520678, "learning_rate": 5.371e-05, "token_acc": 0.72129187, "epoch": 0.26884139, "global_step/max_steps": "239/8890", "percentage": "2.69%", "elapsed_time": "4m 27s", "remaining_time": "2h 41m 17s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893925} {"loss": 1.15604019, "grad_norm": 1.46830797, "learning_rate": 5.393e-05, "token_acc": 0.68956522, "epoch": 0.26996625, "global_step/max_steps": "240/8890", "percentage": "2.70%", "elapsed_time": "4m 28s", "remaining_time": "2h 41m 23s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893255} {"loss": 0.89060473, "grad_norm": 1.62281895, "learning_rate": 5.416e-05, "token_acc": 0.74253731, "epoch": 0.27109111, "global_step/max_steps": "241/8890", "percentage": "2.71%", "elapsed_time": "4m 29s", "remaining_time": "2h 41m 23s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893141} {"loss": 0.84687388, "grad_norm": 1.58819699, "learning_rate": 5.438e-05, "token_acc": 0.7654321, "epoch": 0.27221597, "global_step/max_steps": "242/8890", "percentage": "2.72%", "elapsed_time": "4m 30s", "remaining_time": "2h 41m 23s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893061} {"loss": 0.72665381, "grad_norm": 1.53787231, "learning_rate": 5.461e-05, "token_acc": 0.78051643, "epoch": 0.27334083, "global_step/max_steps": "243/8890", "percentage": "2.73%", "elapsed_time": "4m 32s", "remaining_time": "2h 41m 22s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.89304} {"loss": 0.79552215, "grad_norm": 1.63555908, "learning_rate": 5.483e-05, "token_acc": 0.76167076, "epoch": 0.27446569, "global_step/max_steps": "244/8890", "percentage": "2.74%", "elapsed_time": "4m 33s", "remaining_time": "2h 41m 23s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.892853} {"loss": 0.91750598, "grad_norm": 1.77634811, "learning_rate": 5.506e-05, "token_acc": 0.7369697, "epoch": 0.27559055, "global_step/max_steps": "245/8890", "percentage": "2.76%", "elapsed_time": "4m 34s", "remaining_time": "2h 41m 22s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.892845} {"loss": 0.91946244, "grad_norm": 1.76146507, "learning_rate": 5.528e-05, "token_acc": 0.76205128, "epoch": 0.27671541, "global_step/max_steps": "246/8890", "percentage": "2.77%", "elapsed_time": "4m 35s", "remaining_time": "2h 41m 22s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.892785} {"loss": 1.00178671, "grad_norm": 1.77467191, "learning_rate": 5.551e-05, "token_acc": 0.74902471, "epoch": 0.27784027, "global_step/max_steps": "247/8890", "percentage": "2.78%", "elapsed_time": "4m 36s", "remaining_time": "2h 41m 17s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893115} {"loss": 1.03638041, "grad_norm": 1.83395612, "learning_rate": 5.573e-05, "token_acc": 0.69518072, "epoch": 0.27896513, "global_step/max_steps": "248/8890", "percentage": "2.79%", "elapsed_time": "4m 37s", "remaining_time": "2h 41m 17s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.89303} {"loss": 1.06268394, "grad_norm": 1.68134367, "learning_rate": 5.596e-05, "token_acc": 0.70247046, "epoch": 0.28008999, "global_step/max_steps": "249/8890", "percentage": "2.80%", "elapsed_time": "4m 38s", "remaining_time": "2h 41m 16s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.892968} {"loss": 0.93722266, "grad_norm": 1.63598788, "learning_rate": 5.618e-05, "token_acc": 0.7089372, "epoch": 0.28121485, "global_step/max_steps": "250/8890", "percentage": "2.81%", "elapsed_time": "4m 39s", "remaining_time": "2h 41m 15s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.892955} {"loss": 1.01767492, "grad_norm": 1.77486217, "learning_rate": 5.64e-05, "token_acc": 0.71496437, "epoch": 0.28233971, "global_step/max_steps": "251/8890", "percentage": "2.82%", "elapsed_time": "4m 41s", "remaining_time": "2h 41m 13s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893061} {"loss": 0.89156008, "grad_norm": 1.97583687, "learning_rate": 5.663e-05, "token_acc": 0.74704142, "epoch": 0.28346457, "global_step/max_steps": "252/8890", "percentage": "2.83%", "elapsed_time": "4m 42s", "remaining_time": "2h 41m 10s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893275} {"loss": 1.0423491, "grad_norm": 1.48021972, "learning_rate": 5.685e-05, "token_acc": 0.70914127, "epoch": 0.28458943, "global_step/max_steps": "253/8890", "percentage": "2.85%", "elapsed_time": "4m 43s", "remaining_time": "2h 41m 10s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893151} {"loss": 1.04850459, "grad_norm": 1.71012521, "learning_rate": 5.708e-05, "token_acc": 0.69381443, "epoch": 0.28571429, "global_step/max_steps": "254/8890", "percentage": "2.86%", "elapsed_time": "4m 44s", "remaining_time": "2h 41m 10s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893037} {"loss": 1.05106676, "grad_norm": 1.84495389, "learning_rate": 5.73e-05, "token_acc": 0.70743405, "epoch": 0.28683915, "global_step/max_steps": "255/8890", "percentage": "2.87%", "elapsed_time": "4m 45s", "remaining_time": "2h 41m 3s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893591} {"loss": 1.01633811, "grad_norm": 1.67702746, "learning_rate": 5.753e-05, "token_acc": 0.71717172, "epoch": 0.287964, "global_step/max_steps": "256/8890", "percentage": "2.88%", "elapsed_time": "4m 46s", "remaining_time": "2h 41m 3s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893506} {"loss": 1.12074161, "grad_norm": 1.5033226, "learning_rate": 5.775e-05, "token_acc": 0.71280603, "epoch": 0.28908886, "global_step/max_steps": "257/8890", "percentage": "2.89%", "elapsed_time": "4m 47s", "remaining_time": "2h 41m 3s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893335} {"loss": 0.89283574, "grad_norm": 1.65105152, "learning_rate": 5.798e-05, "token_acc": 0.7439759, "epoch": 0.29021372, "global_step/max_steps": "258/8890", "percentage": "2.90%", "elapsed_time": "4m 48s", "remaining_time": "2h 41m 3s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893303} {"loss": 1.00849307, "grad_norm": 1.88232434, "learning_rate": 5.82e-05, "token_acc": 0.7076326, "epoch": 0.29133858, "global_step/max_steps": "259/8890", "percentage": "2.91%", "elapsed_time": "4m 49s", "remaining_time": "2h 41m 3s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893201} {"loss": 0.93775183, "grad_norm": 1.5926373, "learning_rate": 5.843e-05, "token_acc": 0.74033149, "epoch": 0.29246344, "global_step/max_steps": "260/8890", "percentage": "2.92%", "elapsed_time": "4m 51s", "remaining_time": "2h 41m 2s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893146} {"loss": 1.13691449, "grad_norm": 1.97136509, "learning_rate": 5.865e-05, "token_acc": 0.68935236, "epoch": 0.2935883, "global_step/max_steps": "261/8890", "percentage": "2.94%", "elapsed_time": "4m 52s", "remaining_time": "2h 40m 54s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893819} {"loss": 1.09307241, "grad_norm": 1.78016412, "learning_rate": 5.888e-05, "token_acc": 0.70526316, "epoch": 0.29471316, "global_step/max_steps": "262/8890", "percentage": "2.95%", "elapsed_time": "4m 53s", "remaining_time": "2h 40m 53s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893749} {"loss": 1.04744792, "grad_norm": 2.00931525, "learning_rate": 5.91e-05, "token_acc": 0.70381232, "epoch": 0.29583802, "global_step/max_steps": "263/8890", "percentage": "2.96%", "elapsed_time": "4m 54s", "remaining_time": "2h 40m 49s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894059} {"loss": 1.16375446, "grad_norm": 1.77058077, "learning_rate": 5.933e-05, "token_acc": 0.67592593, "epoch": 0.29696288, "global_step/max_steps": "264/8890", "percentage": "2.97%", "elapsed_time": "4m 55s", "remaining_time": "2h 40m 54s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893508} {"loss": 0.9982096, "grad_norm": 1.78732288, "learning_rate": 5.955e-05, "token_acc": 0.71551724, "epoch": 0.29808774, "global_step/max_steps": "265/8890", "percentage": "2.98%", "elapsed_time": "4m 56s", "remaining_time": "2h 40m 54s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893389} {"loss": 1.00606012, "grad_norm": 2.20267892, "learning_rate": 5.978e-05, "token_acc": 0.71798561, "epoch": 0.2992126, "global_step/max_steps": "266/8890", "percentage": "2.99%", "elapsed_time": "4m 57s", "remaining_time": "2h 40m 51s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893579} {"loss": 1.07221961, "grad_norm": 1.96596277, "learning_rate": 6e-05, "token_acc": 0.70557717, "epoch": 0.30033746, "global_step/max_steps": "267/8890", "percentage": "3.00%", "elapsed_time": "4m 58s", "remaining_time": "2h 40m 50s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893546} {"loss": 1.09054995, "grad_norm": 2.01285934, "learning_rate": 6.022e-05, "token_acc": 0.68842365, "epoch": 0.30146232, "global_step/max_steps": "268/8890", "percentage": "3.01%", "elapsed_time": "4m 59s", "remaining_time": "2h 40m 50s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.89345} {"loss": 1.00180972, "grad_norm": 1.80151677, "learning_rate": 6.045e-05, "token_acc": 0.72311213, "epoch": 0.30258718, "global_step/max_steps": "269/8890", "percentage": "3.03%", "elapsed_time": "5m 1s", "remaining_time": "2h 40m 49s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893374} {"loss": 0.86053205, "grad_norm": 1.65420353, "learning_rate": 6.067e-05, "token_acc": 0.74172185, "epoch": 0.30371204, "global_step/max_steps": "270/8890", "percentage": "3.04%", "elapsed_time": "5m 2s", "remaining_time": "2h 40m 49s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893336} {"loss": 1.03862882, "grad_norm": 1.88167655, "learning_rate": 6.09e-05, "token_acc": 0.69977427, "epoch": 0.3048369, "global_step/max_steps": "271/8890", "percentage": "3.05%", "elapsed_time": "5m 3s", "remaining_time": "2h 40m 48s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893307} {"loss": 1.03604412, "grad_norm": 1.59245718, "learning_rate": 6.112e-05, "token_acc": 0.71992976, "epoch": 0.30596175, "global_step/max_steps": "272/8890", "percentage": "3.06%", "elapsed_time": "5m 4s", "remaining_time": "2h 40m 48s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893185} {"loss": 0.95124698, "grad_norm": 1.67217267, "learning_rate": 6.135e-05, "token_acc": 0.73318386, "epoch": 0.30708661, "global_step/max_steps": "273/8890", "percentage": "3.07%", "elapsed_time": "5m 5s", "remaining_time": "2h 40m 45s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893371} {"loss": 0.91406262, "grad_norm": 1.80854809, "learning_rate": 6.157e-05, "token_acc": 0.73375, "epoch": 0.30821147, "global_step/max_steps": "274/8890", "percentage": "3.08%", "elapsed_time": "5m 6s", "remaining_time": "2h 40m 44s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893332} {"loss": 0.96320564, "grad_norm": 2.12781501, "learning_rate": 6.18e-05, "token_acc": 0.73451327, "epoch": 0.30933633, "global_step/max_steps": "275/8890", "percentage": "3.09%", "elapsed_time": "5m 7s", "remaining_time": "2h 40m 41s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893556} {"loss": 0.98564935, "grad_norm": 2.04999518, "learning_rate": 6.202e-05, "token_acc": 0.7092511, "epoch": 0.31046119, "global_step/max_steps": "276/8890", "percentage": "3.10%", "elapsed_time": "5m 8s", "remaining_time": "2h 40m 38s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893737} {"loss": 1.10922432, "grad_norm": 1.62357354, "learning_rate": 6.225e-05, "token_acc": 0.67769376, "epoch": 0.31158605, "global_step/max_steps": "277/8890", "percentage": "3.12%", "elapsed_time": "5m 10s", "remaining_time": "2h 40m 42s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893272} {"loss": 0.93211567, "grad_norm": 1.54869378, "learning_rate": 6.247e-05, "token_acc": 0.74087933, "epoch": 0.31271091, "global_step/max_steps": "278/8890", "percentage": "3.13%", "elapsed_time": "5m 11s", "remaining_time": "2h 40m 35s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.89378} {"loss": 0.99520749, "grad_norm": 2.31955695, "learning_rate": 6.27e-05, "token_acc": 0.69681742, "epoch": 0.31383577, "global_step/max_steps": "279/8890", "percentage": "3.14%", "elapsed_time": "5m 11s", "remaining_time": "2h 40m 23s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894771} {"loss": 1.08103049, "grad_norm": 1.95568073, "learning_rate": 6.292e-05, "token_acc": 0.68948035, "epoch": 0.31496063, "global_step/max_steps": "280/8890", "percentage": "3.15%", "elapsed_time": "5m 12s", "remaining_time": "2h 40m 20s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894964} {"loss": 1.01375997, "grad_norm": 1.87797594, "learning_rate": 6.315e-05, "token_acc": 0.71029083, "epoch": 0.31608549, "global_step/max_steps": "281/8890", "percentage": "3.16%", "elapsed_time": "5m 13s", "remaining_time": "2h 40m 19s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894932} {"loss": 0.95424938, "grad_norm": 1.65769899, "learning_rate": 6.337e-05, "token_acc": 0.72249752, "epoch": 0.31721035, "global_step/max_steps": "282/8890", "percentage": "3.17%", "elapsed_time": "5m 15s", "remaining_time": "2h 40m 19s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894881} {"loss": 1.02263021, "grad_norm": 1.69913816, "learning_rate": 6.36e-05, "token_acc": 0.71763393, "epoch": 0.31833521, "global_step/max_steps": "283/8890", "percentage": "3.18%", "elapsed_time": "5m 16s", "remaining_time": "2h 40m 25s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894209} {"loss": 1.04578042, "grad_norm": 2.09796381, "learning_rate": 6.382e-05, "token_acc": 0.69354839, "epoch": 0.31946007, "global_step/max_steps": "284/8890", "percentage": "3.19%", "elapsed_time": "5m 17s", "remaining_time": "2h 40m 24s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894168} {"loss": 1.03832626, "grad_norm": 1.76308453, "learning_rate": 6.404e-05, "token_acc": 0.70666667, "epoch": 0.32058493, "global_step/max_steps": "285/8890", "percentage": "3.21%", "elapsed_time": "5m 18s", "remaining_time": "2h 40m 24s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.894054} {"loss": 1.14544916, "grad_norm": 1.46289313, "learning_rate": 6.427e-05, "token_acc": 0.67952128, "epoch": 0.32170979, "global_step/max_steps": "286/8890", "percentage": "3.22%", "elapsed_time": "5m 20s", "remaining_time": "2h 40m 31s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893301} {"loss": 1.07789671, "grad_norm": 1.82121253, "learning_rate": 6.449e-05, "token_acc": 0.70662461, "epoch": 0.32283465, "global_step/max_steps": "287/8890", "percentage": "3.23%", "elapsed_time": "5m 21s", "remaining_time": "2h 40m 37s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.892647} {"loss": 0.82444775, "grad_norm": 1.55123103, "learning_rate": 6.472e-05, "token_acc": 0.75726141, "epoch": 0.32395951, "global_step/max_steps": "288/8890", "percentage": "3.24%", "elapsed_time": "5m 22s", "remaining_time": "2h 40m 37s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.892584} {"loss": 1.12013662, "grad_norm": 1.84743428, "learning_rate": 6.494e-05, "token_acc": 0.6835443, "epoch": 0.32508436, "global_step/max_steps": "289/8890", "percentage": "3.25%", "elapsed_time": "5m 23s", "remaining_time": "2h 40m 31s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.892996} {"loss": 0.92770284, "grad_norm": 1.70694137, "learning_rate": 6.517e-05, "token_acc": 0.74731824, "epoch": 0.32620922, "global_step/max_steps": "290/8890", "percentage": "3.26%", "elapsed_time": "5m 24s", "remaining_time": "2h 40m 27s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.893279} {"loss": 1.13274598, "grad_norm": 1.88545239, "learning_rate": 6.539e-05, "token_acc": 0.70155902, "epoch": 0.32733408, "global_step/max_steps": "291/8890", "percentage": "3.27%", "elapsed_time": "5m 25s", "remaining_time": "2h 40m 31s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.892816} {"loss": 0.96120375, "grad_norm": 2.0194459, "learning_rate": 6.562e-05, "token_acc": 0.71033479, "epoch": 0.32845894, "global_step/max_steps": "292/8890", "percentage": "3.28%", "elapsed_time": "5m 27s", "remaining_time": "2h 40m 31s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.892691} {"loss": 1.06700432, "grad_norm": 1.5913291, "learning_rate": 6.584e-05, "token_acc": 0.71521336, "epoch": 0.3295838, "global_step/max_steps": "293/8890", "percentage": "3.30%", "elapsed_time": "5m 28s", "remaining_time": "2h 40m 31s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.892556} {"loss": 1.14460421, "grad_norm": 1.97313643, "learning_rate": 6.607e-05, "token_acc": 0.68726823, "epoch": 0.33070866, "global_step/max_steps": "294/8890", "percentage": "3.31%", "elapsed_time": "5m 29s", "remaining_time": "2h 40m 30s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.892541} {"loss": 0.79391176, "grad_norm": 1.72744775, "learning_rate": 6.629e-05, "token_acc": 0.75923567, "epoch": 0.33183352, "global_step/max_steps": "295/8890", "percentage": "3.32%", "elapsed_time": "5m 30s", "remaining_time": "2h 40m 30s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.892518} {"loss": 0.94619989, "grad_norm": 1.27148187, "learning_rate": 6.652e-05, "token_acc": 0.73031359, "epoch": 0.33295838, "global_step/max_steps": "296/8890", "percentage": "3.33%", "elapsed_time": "5m 31s", "remaining_time": "2h 40m 34s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.892037} {"loss": 1.08124447, "grad_norm": 2.0224123, "learning_rate": 6.674e-05, "token_acc": 0.70711297, "epoch": 0.33408324, "global_step/max_steps": "297/8890", "percentage": "3.34%", "elapsed_time": "5m 32s", "remaining_time": "2h 40m 30s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.89231} {"loss": 0.91233659, "grad_norm": 1.71750677, "learning_rate": 6.697e-05, "token_acc": 0.72222222, "epoch": 0.3352081, "global_step/max_steps": "298/8890", "percentage": "3.35%", "elapsed_time": "5m 34s", "remaining_time": "2h 40m 30s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.892206} {"loss": 1.01974702, "grad_norm": 1.86967123, "learning_rate": 6.719e-05, "token_acc": 0.7309185, "epoch": 0.33633296, "global_step/max_steps": "299/8890", "percentage": "3.36%", "elapsed_time": "5m 35s", "remaining_time": "2h 40m 29s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.89213} {"loss": 1.1613307, "grad_norm": 1.81685638, "learning_rate": 6.742e-05, "token_acc": 0.69414317, "epoch": 0.33745782, "global_step/max_steps": "300/8890", "percentage": "3.37%", "elapsed_time": "5m 36s", "remaining_time": "2h 40m 29s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.892011} {"eval_loss": 0.96907377, "eval_runtime": 32.013, "eval_samples_per_second": 25.084, "eval_steps_per_second": 3.155, "eval_token_acc": 0.71992801, "epoch": 0.33745782, "global_step/max_steps": "300/8890", "percentage": "3.37%", "elapsed_time": "6m 8s", "remaining_time": "2h 55m 46s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.814464} {"loss": 0.91158652, "grad_norm": 1.69965184, "learning_rate": 6.764e-05, "token_acc": 0.73726852, "epoch": 0.33858268, "global_step/max_steps": "301/8890", "percentage": "3.39%", "elapsed_time": "6m 23s", "remaining_time": "3h 2m 35s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.784012} {"loss": 0.86291647, "grad_norm": 1.65644503, "learning_rate": 6.787e-05, "token_acc": 0.74623116, "epoch": 0.33970754, "global_step/max_steps": "302/8890", "percentage": "3.40%", "elapsed_time": "6m 25s", "remaining_time": "3h 2m 29s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.784295} {"loss": 1.1120404, "grad_norm": 1.51749706, "learning_rate": 6.809e-05, "token_acc": 0.67076923, "epoch": 0.3408324, "global_step/max_steps": "303/8890", "percentage": "3.41%", "elapsed_time": "6m 26s", "remaining_time": "3h 2m 28s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.784281} {"loss": 1.15173018, "grad_norm": 1.85916543, "learning_rate": 6.831e-05, "token_acc": 0.68323587, "epoch": 0.34195726, "global_step/max_steps": "304/8890", "percentage": "3.42%", "elapsed_time": "6m 27s", "remaining_time": "3h 2m 23s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.784567} {"loss": 1.00364351, "grad_norm": 1.83838058, "learning_rate": 6.854e-05, "token_acc": 0.71068548, "epoch": 0.34308211, "global_step/max_steps": "305/8890", "percentage": "3.43%", "elapsed_time": "6m 28s", "remaining_time": "3h 2m 19s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.784775} {"loss": 0.99507171, "grad_norm": 1.827829, "learning_rate": 6.876e-05, "token_acc": 0.7236404, "epoch": 0.34420697, "global_step/max_steps": "306/8890", "percentage": "3.44%", "elapsed_time": "6m 29s", "remaining_time": "3h 2m 14s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.785048} {"loss": 1.01696932, "grad_norm": 1.69316185, "learning_rate": 6.899e-05, "token_acc": 0.71472393, "epoch": 0.34533183, "global_step/max_steps": "307/8890", "percentage": "3.45%", "elapsed_time": "6m 30s", "remaining_time": "3h 2m 10s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.785254} {"loss": 1.06186855, "grad_norm": 1.71786892, "learning_rate": 6.921e-05, "token_acc": 0.69579646, "epoch": 0.34645669, "global_step/max_steps": "308/8890", "percentage": "3.46%", "elapsed_time": "6m 32s", "remaining_time": "3h 2m 5s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.785493} {"loss": 1.07772994, "grad_norm": 1.79668713, "learning_rate": 6.944e-05, "token_acc": 0.6878147, "epoch": 0.34758155, "global_step/max_steps": "309/8890", "percentage": "3.48%", "elapsed_time": "6m 33s", "remaining_time": "3h 2m 5s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.78541} {"loss": 0.9578858, "grad_norm": 1.91420615, "learning_rate": 6.966e-05, "token_acc": 0.72966781, "epoch": 0.34870641, "global_step/max_steps": "310/8890", "percentage": "3.49%", "elapsed_time": "6m 34s", "remaining_time": "3h 1m 58s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.785801} {"loss": 0.92804873, "grad_norm": 2.13187671, "learning_rate": 6.989e-05, "token_acc": 0.71888726, "epoch": 0.34983127, "global_step/max_steps": "311/8890", "percentage": "3.50%", "elapsed_time": "6m 35s", "remaining_time": "3h 1m 54s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.786031} {"loss": 1.1335876, "grad_norm": 1.90993237, "learning_rate": 7.011e-05, "token_acc": 0.69604863, "epoch": 0.35095613, "global_step/max_steps": "312/8890", "percentage": "3.51%", "elapsed_time": "6m 36s", "remaining_time": "3h 1m 50s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.786228} {"loss": 1.04328096, "grad_norm": 1.59891081, "learning_rate": 7.034e-05, "token_acc": 0.72139303, "epoch": 0.35208099, "global_step/max_steps": "313/8890", "percentage": "3.52%", "elapsed_time": "6m 37s", "remaining_time": "3h 1m 40s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.786852} {"loss": 1.20658863, "grad_norm": 1.87849891, "learning_rate": 7.056e-05, "token_acc": 0.68862275, "epoch": 0.35320585, "global_step/max_steps": "314/8890", "percentage": "3.53%", "elapsed_time": "6m 38s", "remaining_time": "3h 1m 34s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.787176} {"loss": 0.9105646, "grad_norm": 1.68480194, "learning_rate": 7.079e-05, "token_acc": 0.75408163, "epoch": 0.35433071, "global_step/max_steps": "315/8890", "percentage": "3.54%", "elapsed_time": "6m 40s", "remaining_time": "3h 1m 29s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.787464} {"loss": 1.14193821, "grad_norm": 1.62084472, "learning_rate": 7.101e-05, "token_acc": 0.71217712, "epoch": 0.35545557, "global_step/max_steps": "316/8890", "percentage": "3.55%", "elapsed_time": "6m 41s", "remaining_time": "3h 1m 25s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.78768} {"loss": 0.97138041, "grad_norm": 1.75173688, "learning_rate": 7.124e-05, "token_acc": 0.7206235, "epoch": 0.35658043, "global_step/max_steps": "317/8890", "percentage": "3.57%", "elapsed_time": "6m 42s", "remaining_time": "3h 1m 20s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.787911} {"loss": 1.10007071, "grad_norm": 2.20463228, "learning_rate": 7.146e-05, "token_acc": 0.68587106, "epoch": 0.35770529, "global_step/max_steps": "318/8890", "percentage": "3.58%", "elapsed_time": "6m 43s", "remaining_time": "3h 1m 13s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.788332} {"loss": 0.83222723, "grad_norm": 1.82401979, "learning_rate": 7.169e-05, "token_acc": 0.76052632, "epoch": 0.35883015, "global_step/max_steps": "319/8890", "percentage": "3.59%", "elapsed_time": "6m 44s", "remaining_time": "3h 1m 8s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.788588} {"loss": 1.03077722, "grad_norm": 1.88663137, "learning_rate": 7.191e-05, "token_acc": 0.7053021, "epoch": 0.35995501, "global_step/max_steps": "320/8890", "percentage": "3.60%", "elapsed_time": "6m 45s", "remaining_time": "3h 1m 4s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.788788} {"loss": 1.11828232, "grad_norm": 1.74631965, "learning_rate": 7.213e-05, "token_acc": 0.68719611, "epoch": 0.36107987, "global_step/max_steps": "321/8890", "percentage": "3.61%", "elapsed_time": "6m 46s", "remaining_time": "3h 1m 0s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.788992} {"loss": 0.93227082, "grad_norm": 1.97921181, "learning_rate": 7.236e-05, "token_acc": 0.74165457, "epoch": 0.36220472, "global_step/max_steps": "322/8890", "percentage": "3.62%", "elapsed_time": "6m 47s", "remaining_time": "3h 0m 53s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.789404} {"loss": 0.89963067, "grad_norm": 1.97894549, "learning_rate": 7.258e-05, "token_acc": 0.73292868, "epoch": 0.36332958, "global_step/max_steps": "323/8890", "percentage": "3.63%", "elapsed_time": "6m 49s", "remaining_time": "3h 0m 49s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.789648} {"loss": 1.22239017, "grad_norm": 1.90407383, "learning_rate": 7.281e-05, "token_acc": 0.65698925, "epoch": 0.36445444, "global_step/max_steps": "324/8890", "percentage": "3.64%", "elapsed_time": "6m 50s", "remaining_time": "3h 0m 46s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.789758} {"loss": 0.94652772, "grad_norm": 1.86995196, "learning_rate": 7.303e-05, "token_acc": 0.70310559, "epoch": 0.3655793, "global_step/max_steps": "325/8890", "percentage": "3.66%", "elapsed_time": "6m 51s", "remaining_time": "3h 0m 41s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.790034} {"loss": 0.75732237, "grad_norm": 1.75253141, "learning_rate": 7.326e-05, "token_acc": 0.7699005, "epoch": 0.36670416, "global_step/max_steps": "326/8890", "percentage": "3.67%", "elapsed_time": "6m 52s", "remaining_time": "3h 0m 36s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.79028} {"loss": 0.85820198, "grad_norm": 1.96407425, "learning_rate": 7.348e-05, "token_acc": 0.72470978, "epoch": 0.36782902, "global_step/max_steps": "327/8890", "percentage": "3.68%", "elapsed_time": "6m 53s", "remaining_time": "3h 0m 32s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.790498} {"loss": 1.01585102, "grad_norm": 1.60530484, "learning_rate": 7.371e-05, "token_acc": 0.71630435, "epoch": 0.36895388, "global_step/max_steps": "328/8890", "percentage": "3.69%", "elapsed_time": "6m 54s", "remaining_time": "3h 0m 28s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.79072} {"loss": 1.07460129, "grad_norm": 1.98194873, "learning_rate": 7.393e-05, "token_acc": 0.69735183, "epoch": 0.37007874, "global_step/max_steps": "329/8890", "percentage": "3.70%", "elapsed_time": "6m 55s", "remaining_time": "3h 0m 24s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.790916} {"loss": 0.93298233, "grad_norm": 1.92009068, "learning_rate": 7.416e-05, "token_acc": 0.74932615, "epoch": 0.3712036, "global_step/max_steps": "330/8890", "percentage": "3.71%", "elapsed_time": "6m 57s", "remaining_time": "3h 0m 19s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.791133} {"loss": 0.91582584, "grad_norm": 1.85506284, "learning_rate": 7.438e-05, "token_acc": 0.73543689, "epoch": 0.37232846, "global_step/max_steps": "331/8890", "percentage": "3.72%", "elapsed_time": "6m 58s", "remaining_time": "3h 0m 13s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.791529} {"loss": 1.05165458, "grad_norm": 1.77586389, "learning_rate": 7.461e-05, "token_acc": 0.71694215, "epoch": 0.37345332, "global_step/max_steps": "332/8890", "percentage": "3.73%", "elapsed_time": "6m 59s", "remaining_time": "3h 0m 10s", "memory(GiB)": 22.5, "train_speed(iter/s)": 0.791674} {"loss": 0.92727828, "grad_norm": 1.55809939, "learning_rate": 7.483e-05, "token_acc": 0.73394495, "epoch": 0.37457818, "global_step/max_steps": "333/8890", "percentage": "3.75%", "elapsed_time": "7m 0s", "remaining_time": "3h 0m 9s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.791594} {"loss": 0.9680391, "grad_norm": 1.9125303, "learning_rate": 7.506e-05, "token_acc": 0.73684211, "epoch": 0.37570304, "global_step/max_steps": "334/8890", "percentage": "3.76%", "elapsed_time": "7m 1s", "remaining_time": "3h 0m 5s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.79179} {"loss": 1.05613852, "grad_norm": 1.67259789, "learning_rate": 7.528e-05, "token_acc": 0.72124352, "epoch": 0.3768279, "global_step/max_steps": "335/8890", "percentage": "3.77%", "elapsed_time": "7m 3s", "remaining_time": "3h 0m 2s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.791952} {"loss": 0.9434557, "grad_norm": 1.92253947, "learning_rate": 7.551e-05, "token_acc": 0.71705426, "epoch": 0.37795276, "global_step/max_steps": "336/8890", "percentage": "3.78%", "elapsed_time": "7m 4s", "remaining_time": "2h 59m 57s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.792206} {"loss": 0.9698993, "grad_norm": 1.95607316, "learning_rate": 7.573e-05, "token_acc": 0.71100917, "epoch": 0.37907762, "global_step/max_steps": "337/8890", "percentage": "3.79%", "elapsed_time": "7m 5s", "remaining_time": "2h 59m 52s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.792463} {"loss": 1.10203207, "grad_norm": 1.87304807, "learning_rate": 7.596e-05, "token_acc": 0.69639066, "epoch": 0.38020247, "global_step/max_steps": "338/8890", "percentage": "3.80%", "elapsed_time": "7m 6s", "remaining_time": "2h 59m 48s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.792688} {"loss": 1.01789331, "grad_norm": 1.95869267, "learning_rate": 7.618e-05, "token_acc": 0.72079772, "epoch": 0.38132733, "global_step/max_steps": "339/8890", "percentage": "3.81%", "elapsed_time": "7m 7s", "remaining_time": "2h 59m 42s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.79307} {"loss": 0.79843205, "grad_norm": 1.56152141, "learning_rate": 7.64e-05, "token_acc": 0.77238403, "epoch": 0.38245219, "global_step/max_steps": "340/8890", "percentage": "3.82%", "elapsed_time": "7m 8s", "remaining_time": "2h 59m 38s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.793258} {"loss": 0.958435, "grad_norm": 2.08478642, "learning_rate": 7.663e-05, "token_acc": 0.73538012, "epoch": 0.38357705, "global_step/max_steps": "341/8890", "percentage": "3.84%", "elapsed_time": "7m 10s", "remaining_time": "2h 59m 42s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.792853} {"loss": 1.0064764, "grad_norm": 1.81250119, "learning_rate": 7.685e-05, "token_acc": 0.72269807, "epoch": 0.38470191, "global_step/max_steps": "342/8890", "percentage": "3.85%", "elapsed_time": "7m 11s", "remaining_time": "2h 59m 38s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.793063} {"loss": 0.93153858, "grad_norm": 1.64321339, "learning_rate": 7.708e-05, "token_acc": 0.7305586, "epoch": 0.38582677, "global_step/max_steps": "343/8890", "percentage": "3.86%", "elapsed_time": "7m 12s", "remaining_time": "2h 59m 34s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.793285} {"loss": 0.92164731, "grad_norm": 1.54672623, "learning_rate": 7.73e-05, "token_acc": 0.72791878, "epoch": 0.38695163, "global_step/max_steps": "344/8890", "percentage": "3.87%", "elapsed_time": "7m 13s", "remaining_time": "2h 59m 30s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.793479} {"loss": 0.9662903, "grad_norm": 1.71765637, "learning_rate": 7.753e-05, "token_acc": 0.71268238, "epoch": 0.38807649, "global_step/max_steps": "345/8890", "percentage": "3.88%", "elapsed_time": "7m 14s", "remaining_time": "2h 59m 26s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.793641} {"loss": 0.96274292, "grad_norm": 1.47142625, "learning_rate": 7.775e-05, "token_acc": 0.73357336, "epoch": 0.38920135, "global_step/max_steps": "346/8890", "percentage": "3.89%", "elapsed_time": "7m 15s", "remaining_time": "2h 59m 23s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.793818} {"loss": 1.03696406, "grad_norm": 1.55588984, "learning_rate": 7.798e-05, "token_acc": 0.71375808, "epoch": 0.39032621, "global_step/max_steps": "347/8890", "percentage": "3.90%", "elapsed_time": "7m 17s", "remaining_time": "2h 59m 19s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.794024} {"loss": 1.02533293, "grad_norm": 1.61494219, "learning_rate": 7.82e-05, "token_acc": 0.71398305, "epoch": 0.39145107, "global_step/max_steps": "348/8890", "percentage": "3.91%", "elapsed_time": "7m 18s", "remaining_time": "2h 59m 15s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.794205} {"loss": 1.07795668, "grad_norm": 1.72146523, "learning_rate": 7.843e-05, "token_acc": 0.71845673, "epoch": 0.39257593, "global_step/max_steps": "349/8890", "percentage": "3.93%", "elapsed_time": "7m 19s", "remaining_time": "2h 59m 12s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.794364} {"loss": 1.04063582, "grad_norm": 1.95470941, "learning_rate": 7.865e-05, "token_acc": 0.72969697, "epoch": 0.39370079, "global_step/max_steps": "350/8890", "percentage": "3.94%", "elapsed_time": "7m 20s", "remaining_time": "2h 59m 8s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.794522} {"loss": 0.86781722, "grad_norm": 1.8684833, "learning_rate": 7.888e-05, "token_acc": 0.73286052, "epoch": 0.39482565, "global_step/max_steps": "351/8890", "percentage": "3.95%", "elapsed_time": "7m 21s", "remaining_time": "2h 59m 4s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.794707} {"loss": 0.67698801, "grad_norm": 1.77700984, "learning_rate": 7.91e-05, "token_acc": 0.79756469, "epoch": 0.39595051, "global_step/max_steps": "352/8890", "percentage": "3.96%", "elapsed_time": "7m 22s", "remaining_time": "2h 58m 58s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.795096} {"loss": 1.01605225, "grad_norm": 1.90918708, "learning_rate": 7.933e-05, "token_acc": 0.72815534, "epoch": 0.39707537, "global_step/max_steps": "353/8890", "percentage": "3.97%", "elapsed_time": "7m 23s", "remaining_time": "2h 58m 49s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.795661} {"loss": 0.95965445, "grad_norm": 2.05032706, "learning_rate": 7.955e-05, "token_acc": 0.75227964, "epoch": 0.39820022, "global_step/max_steps": "354/8890", "percentage": "3.98%", "elapsed_time": "7m 24s", "remaining_time": "2h 58m 44s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.795907} {"loss": 1.17659187, "grad_norm": 2.10553432, "learning_rate": 7.978e-05, "token_acc": 0.68552632, "epoch": 0.39932508, "global_step/max_steps": "355/8890", "percentage": "3.99%", "elapsed_time": "7m 25s", "remaining_time": "2h 58m 40s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.796132} {"loss": 1.12415874, "grad_norm": 1.92825949, "learning_rate": 8e-05, "token_acc": 0.68681319, "epoch": 0.40044994, "global_step/max_steps": "356/8890", "percentage": "4.00%", "elapsed_time": "7m 27s", "remaining_time": "2h 58m 36s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.79635} {"loss": 1.09966195, "grad_norm": 1.78515577, "learning_rate": 8.022e-05, "token_acc": 0.71969697, "epoch": 0.4015748, "global_step/max_steps": "357/8890", "percentage": "4.02%", "elapsed_time": "7m 28s", "remaining_time": "2h 58m 32s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.796546} {"loss": 1.10762453, "grad_norm": 1.59590673, "learning_rate": 8.045e-05, "token_acc": 0.71237458, "epoch": 0.40269966, "global_step/max_steps": "358/8890", "percentage": "4.03%", "elapsed_time": "7m 29s", "remaining_time": "2h 58m 32s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.796458} {"loss": 1.04981518, "grad_norm": 1.88765705, "learning_rate": 8.067e-05, "token_acc": 0.70511535, "epoch": 0.40382452, "global_step/max_steps": "359/8890", "percentage": "4.04%", "elapsed_time": "7m 30s", "remaining_time": "2h 58m 28s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.796683} {"loss": 1.03596401, "grad_norm": 1.81816554, "learning_rate": 8.09e-05, "token_acc": 0.71004566, "epoch": 0.40494938, "global_step/max_steps": "360/8890", "percentage": "4.05%", "elapsed_time": "7m 31s", "remaining_time": "2h 58m 24s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.796841} {"loss": 1.01540756, "grad_norm": 2.20068526, "learning_rate": 8.112e-05, "token_acc": 0.69061584, "epoch": 0.40607424, "global_step/max_steps": "361/8890", "percentage": "4.06%", "elapsed_time": "7m 32s", "remaining_time": "2h 58m 20s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.797073} {"loss": 1.00567484, "grad_norm": 1.73842442, "learning_rate": 8.135e-05, "token_acc": 0.72698073, "epoch": 0.4071991, "global_step/max_steps": "362/8890", "percentage": "4.07%", "elapsed_time": "7m 33s", "remaining_time": "2h 58m 15s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.797379} {"loss": 0.98364007, "grad_norm": 1.82908881, "learning_rate": 8.157e-05, "token_acc": 0.73218391, "epoch": 0.40832396, "global_step/max_steps": "363/8890", "percentage": "4.08%", "elapsed_time": "7m 35s", "remaining_time": "2h 58m 11s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.797562} {"loss": 1.06825244, "grad_norm": 1.64304209, "learning_rate": 8.18e-05, "token_acc": 0.68893528, "epoch": 0.40944882, "global_step/max_steps": "364/8890", "percentage": "4.09%", "elapsed_time": "7m 36s", "remaining_time": "2h 58m 6s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.797794} {"loss": 1.01489019, "grad_norm": 2.01932454, "learning_rate": 8.202e-05, "token_acc": 0.71444322, "epoch": 0.41057368, "global_step/max_steps": "365/8890", "percentage": "4.11%", "elapsed_time": "7m 37s", "remaining_time": "2h 58m 1s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.798135} {"loss": 0.98341131, "grad_norm": 1.73254597, "learning_rate": 8.225e-05, "token_acc": 0.73657289, "epoch": 0.41169854, "global_step/max_steps": "366/8890", "percentage": "4.12%", "elapsed_time": "7m 38s", "remaining_time": "2h 57m 51s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.79878} {"loss": 0.8023439, "grad_norm": 1.45589626, "learning_rate": 8.247e-05, "token_acc": 0.77371542, "epoch": 0.4128234, "global_step/max_steps": "367/8890", "percentage": "4.13%", "elapsed_time": "7m 39s", "remaining_time": "2h 57m 50s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.798721} {"loss": 0.88687515, "grad_norm": 1.9293468, "learning_rate": 8.27e-05, "token_acc": 0.7318117, "epoch": 0.41394826, "global_step/max_steps": "368/8890", "percentage": "4.14%", "elapsed_time": "7m 40s", "remaining_time": "2h 57m 47s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.798906} {"loss": 1.08510029, "grad_norm": 1.81869614, "learning_rate": 8.292e-05, "token_acc": 0.6848249, "epoch": 0.41507312, "global_step/max_steps": "369/8890", "percentage": "4.15%", "elapsed_time": "7m 41s", "remaining_time": "2h 57m 43s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.799096} {"loss": 1.01071143, "grad_norm": 1.91201675, "learning_rate": 8.315e-05, "token_acc": 0.71265678, "epoch": 0.41619798, "global_step/max_steps": "370/8890", "percentage": "4.16%", "elapsed_time": "7m 42s", "remaining_time": "2h 57m 40s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.799242} {"loss": 0.86101937, "grad_norm": 1.93873632, "learning_rate": 8.337e-05, "token_acc": 0.73705722, "epoch": 0.41732283, "global_step/max_steps": "371/8890", "percentage": "4.17%", "elapsed_time": "7m 44s", "remaining_time": "2h 57m 36s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.799395} {"loss": 1.03512216, "grad_norm": 2.03495669, "learning_rate": 8.36e-05, "token_acc": 0.69821826, "epoch": 0.41844769, "global_step/max_steps": "372/8890", "percentage": "4.18%", "elapsed_time": "7m 44s", "remaining_time": "2h 57m 22s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.800351} {"loss": 0.97148448, "grad_norm": 2.14065194, "learning_rate": 8.382e-05, "token_acc": 0.72642762, "epoch": 0.41957255, "global_step/max_steps": "373/8890", "percentage": "4.20%", "elapsed_time": "7m 45s", "remaining_time": "2h 57m 16s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.800715} {"loss": 1.02961683, "grad_norm": 1.86846793, "learning_rate": 8.404e-05, "token_acc": 0.69550931, "epoch": 0.42069741, "global_step/max_steps": "374/8890", "percentage": "4.21%", "elapsed_time": "7m 46s", "remaining_time": "2h 57m 13s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.800894} {"loss": 1.03249717, "grad_norm": 1.7831248, "learning_rate": 8.427e-05, "token_acc": 0.72097561, "epoch": 0.42182227, "global_step/max_steps": "375/8890", "percentage": "4.22%", "elapsed_time": "7m 48s", "remaining_time": "2h 57m 8s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.801125} {"loss": 0.92132068, "grad_norm": 2.29271173, "learning_rate": 8.449e-05, "token_acc": 0.74319728, "epoch": 0.42294713, "global_step/max_steps": "376/8890", "percentage": "4.23%", "elapsed_time": "7m 49s", "remaining_time": "2h 57m 3s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.80146} {"loss": 0.82541472, "grad_norm": 1.79758072, "learning_rate": 8.472e-05, "token_acc": 0.7637028, "epoch": 0.42407199, "global_step/max_steps": "377/8890", "percentage": "4.24%", "elapsed_time": "7m 50s", "remaining_time": "2h 56m 59s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.801666} {"loss": 0.94169986, "grad_norm": 1.68559897, "learning_rate": 8.494e-05, "token_acc": 0.7358871, "epoch": 0.42519685, "global_step/max_steps": "378/8890", "percentage": "4.25%", "elapsed_time": "7m 51s", "remaining_time": "2h 56m 56s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.801799} {"loss": 1.08881855, "grad_norm": 1.60006845, "learning_rate": 8.517e-05, "token_acc": 0.71772253, "epoch": 0.42632171, "global_step/max_steps": "379/8890", "percentage": "4.26%", "elapsed_time": "7m 52s", "remaining_time": "2h 56m 52s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.801984} {"loss": 1.04843152, "grad_norm": 2.02270246, "learning_rate": 8.539e-05, "token_acc": 0.70838253, "epoch": 0.42744657, "global_step/max_steps": "380/8890", "percentage": "4.27%", "elapsed_time": "7m 53s", "remaining_time": "2h 56m 48s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.802182} {"loss": 0.82090819, "grad_norm": 1.84290111, "learning_rate": 8.562e-05, "token_acc": 0.77303071, "epoch": 0.42857143, "global_step/max_steps": "381/8890", "percentage": "4.29%", "elapsed_time": "7m 54s", "remaining_time": "2h 56m 39s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.802755} {"loss": 0.92262381, "grad_norm": 1.9690057, "learning_rate": 8.584e-05, "token_acc": 0.73464052, "epoch": 0.42969629, "global_step/max_steps": "382/8890", "percentage": "4.30%", "elapsed_time": "7m 55s", "remaining_time": "2h 56m 35s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.802974} {"loss": 0.97804606, "grad_norm": 1.79529941, "learning_rate": 8.607e-05, "token_acc": 0.72210526, "epoch": 0.43082115, "global_step/max_steps": "383/8890", "percentage": "4.31%", "elapsed_time": "7m 57s", "remaining_time": "2h 56m 35s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.802861} {"loss": 0.97296, "grad_norm": 1.98852384, "learning_rate": 8.629e-05, "token_acc": 0.71253406, "epoch": 0.43194601, "global_step/max_steps": "384/8890", "percentage": "4.32%", "elapsed_time": "7m 58s", "remaining_time": "2h 56m 32s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.803019} {"loss": 1.03308535, "grad_norm": 1.93881154, "learning_rate": 8.652e-05, "token_acc": 0.7021764, "epoch": 0.43307087, "global_step/max_steps": "385/8890", "percentage": "4.33%", "elapsed_time": "7m 59s", "remaining_time": "2h 56m 26s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.803379} {"loss": 0.89868641, "grad_norm": 1.60722053, "learning_rate": 8.674e-05, "token_acc": 0.74545455, "epoch": 0.43419573, "global_step/max_steps": "386/8890", "percentage": "4.34%", "elapsed_time": "8m 0s", "remaining_time": "2h 56m 23s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.803537} {"loss": 1.02370572, "grad_norm": 1.93063045, "learning_rate": 8.697e-05, "token_acc": 0.71636771, "epoch": 0.43532058, "global_step/max_steps": "387/8890", "percentage": "4.35%", "elapsed_time": "8m 1s", "remaining_time": "2h 56m 20s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.803679} {"loss": 0.93383861, "grad_norm": 1.71278083, "learning_rate": 8.719e-05, "token_acc": 0.72181243, "epoch": 0.43644544, "global_step/max_steps": "388/8890", "percentage": "4.36%", "elapsed_time": "8m 2s", "remaining_time": "2h 56m 13s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.804077} {"loss": 0.922584, "grad_norm": 2.2748692, "learning_rate": 8.742e-05, "token_acc": 0.72727273, "epoch": 0.4375703, "global_step/max_steps": "389/8890", "percentage": "4.38%", "elapsed_time": "8m 3s", "remaining_time": "2h 56m 7s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.80446} {"loss": 0.81450266, "grad_norm": 1.79306424, "learning_rate": 8.764e-05, "token_acc": 0.77277599, "epoch": 0.43869516, "global_step/max_steps": "390/8890", "percentage": "4.39%", "elapsed_time": "8m 4s", "remaining_time": "2h 56m 3s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.804648} {"loss": 0.98208064, "grad_norm": 2.25196242, "learning_rate": 8.787e-05, "token_acc": 0.72168285, "epoch": 0.43982002, "global_step/max_steps": "391/8890", "percentage": "4.40%", "elapsed_time": "8m 5s", "remaining_time": "2h 55m 52s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.805408} {"loss": 0.79017252, "grad_norm": 1.81585062, "learning_rate": 8.809e-05, "token_acc": 0.7571965, "epoch": 0.44094488, "global_step/max_steps": "392/8890", "percentage": "4.41%", "elapsed_time": "8m 6s", "remaining_time": "2h 55m 49s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.805531} {"loss": 0.85278815, "grad_norm": 1.955423, "learning_rate": 8.831e-05, "token_acc": 0.7568741, "epoch": 0.44206974, "global_step/max_steps": "393/8890", "percentage": "4.42%", "elapsed_time": "8m 7s", "remaining_time": "2h 55m 44s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.80586} {"loss": 0.85958791, "grad_norm": 1.97796214, "learning_rate": 8.854e-05, "token_acc": 0.73984772, "epoch": 0.4431946, "global_step/max_steps": "394/8890", "percentage": "4.43%", "elapsed_time": "8m 8s", "remaining_time": "2h 55m 40s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.806007} {"loss": 0.75725698, "grad_norm": 1.61315358, "learning_rate": 8.876e-05, "token_acc": 0.776, "epoch": 0.44431946, "global_step/max_steps": "395/8890", "percentage": "4.44%", "elapsed_time": "8m 10s", "remaining_time": "2h 55m 42s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.805824} {"loss": 0.86149192, "grad_norm": 1.72982526, "learning_rate": 8.899e-05, "token_acc": 0.73789474, "epoch": 0.44544432, "global_step/max_steps": "396/8890", "percentage": "4.45%", "elapsed_time": "8m 11s", "remaining_time": "2h 55m 37s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.806042} {"loss": 0.93605924, "grad_norm": 1.91815543, "learning_rate": 8.921e-05, "token_acc": 0.74576271, "epoch": 0.44656918, "global_step/max_steps": "397/8890", "percentage": "4.47%", "elapsed_time": "8m 12s", "remaining_time": "2h 55m 33s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.806304} {"loss": 1.02340829, "grad_norm": 1.61049712, "learning_rate": 8.944e-05, "token_acc": 0.71050228, "epoch": 0.44769404, "global_step/max_steps": "398/8890", "percentage": "4.48%", "elapsed_time": "8m 13s", "remaining_time": "2h 55m 28s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.806554} {"loss": 0.77764773, "grad_norm": 1.84102893, "learning_rate": 8.966e-05, "token_acc": 0.75526316, "epoch": 0.4488189, "global_step/max_steps": "399/8890", "percentage": "4.49%", "elapsed_time": "8m 14s", "remaining_time": "2h 55m 23s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.806867} {"loss": 1.06175745, "grad_norm": 2.05927491, "learning_rate": 8.989e-05, "token_acc": 0.69949495, "epoch": 0.44994376, "global_step/max_steps": "400/8890", "percentage": "4.50%", "elapsed_time": "8m 15s", "remaining_time": "2h 55m 20s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.807} {"loss": 1.08062863, "grad_norm": 1.79418921, "learning_rate": 9.011e-05, "token_acc": 0.6902834, "epoch": 0.45106862, "global_step/max_steps": "401/8890", "percentage": "4.51%", "elapsed_time": "8m 16s", "remaining_time": "2h 55m 16s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.80718} {"loss": 0.95579696, "grad_norm": 1.76763988, "learning_rate": 9.034e-05, "token_acc": 0.73347779, "epoch": 0.45219348, "global_step/max_steps": "402/8890", "percentage": "4.52%", "elapsed_time": "8m 17s", "remaining_time": "2h 55m 11s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.807478} {"loss": 1.16984558, "grad_norm": 2.02071857, "learning_rate": 9.056e-05, "token_acc": 0.67381974, "epoch": 0.45331834, "global_step/max_steps": "403/8890", "percentage": "4.53%", "elapsed_time": "8m 18s", "remaining_time": "2h 55m 8s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.807652} {"loss": 0.90713394, "grad_norm": 1.78210878, "learning_rate": 9.079e-05, "token_acc": 0.74204947, "epoch": 0.45444319, "global_step/max_steps": "404/8890", "percentage": "4.54%", "elapsed_time": "8m 20s", "remaining_time": "2h 55m 4s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.807845} {"loss": 1.01241875, "grad_norm": 1.63686311, "learning_rate": 9.101e-05, "token_acc": 0.70679278, "epoch": 0.45556805, "global_step/max_steps": "405/8890", "percentage": "4.56%", "elapsed_time": "8m 21s", "remaining_time": "2h 55m 4s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.807744} {"loss": 0.75910544, "grad_norm": 1.91542065, "learning_rate": 9.124e-05, "token_acc": 0.75103164, "epoch": 0.45669291, "global_step/max_steps": "406/8890", "percentage": "4.57%", "elapsed_time": "8m 22s", "remaining_time": "2h 55m 2s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.807783} {"loss": 0.86699939, "grad_norm": 1.98668981, "learning_rate": 9.146e-05, "token_acc": 0.752, "epoch": 0.45781777, "global_step/max_steps": "407/8890", "percentage": "4.58%", "elapsed_time": "8m 23s", "remaining_time": "2h 55m 0s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.807894} {"loss": 0.93638694, "grad_norm": 1.74888217, "learning_rate": 9.169e-05, "token_acc": 0.74002157, "epoch": 0.45894263, "global_step/max_steps": "408/8890", "percentage": "4.59%", "elapsed_time": "8m 24s", "remaining_time": "2h 54m 57s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.808007} {"loss": 0.86562753, "grad_norm": 1.7887603, "learning_rate": 9.191e-05, "token_acc": 0.74518686, "epoch": 0.46006749, "global_step/max_steps": "409/8890", "percentage": "4.60%", "elapsed_time": "8m 25s", "remaining_time": "2h 54m 52s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.808328} {"loss": 0.8968094, "grad_norm": 2.16172647, "learning_rate": 9.213e-05, "token_acc": 0.75462963, "epoch": 0.46119235, "global_step/max_steps": "410/8890", "percentage": "4.61%", "elapsed_time": "8m 27s", "remaining_time": "2h 54m 48s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.808497} {"loss": 0.75229192, "grad_norm": 1.7886883, "learning_rate": 9.236e-05, "token_acc": 0.74384236, "epoch": 0.46231721, "global_step/max_steps": "411/8890", "percentage": "4.62%", "elapsed_time": "8m 28s", "remaining_time": "2h 54m 44s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.808745} {"loss": 0.94485444, "grad_norm": 1.57460487, "learning_rate": 9.258e-05, "token_acc": 0.72681282, "epoch": 0.46344207, "global_step/max_steps": "412/8890", "percentage": "4.63%", "elapsed_time": "8m 29s", "remaining_time": "2h 54m 40s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.808911} {"loss": 1.12272143, "grad_norm": 2.06698203, "learning_rate": 9.281e-05, "token_acc": 0.68041237, "epoch": 0.46456693, "global_step/max_steps": "413/8890", "percentage": "4.65%", "elapsed_time": "8m 31s", "remaining_time": "2h 54m 53s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.807865} {"loss": 1.06860948, "grad_norm": 1.86942935, "learning_rate": 9.303e-05, "token_acc": 0.73098126, "epoch": 0.46569179, "global_step/max_steps": "414/8890", "percentage": "4.66%", "elapsed_time": "8m 32s", "remaining_time": "2h 54m 50s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.807956} {"loss": 0.71580482, "grad_norm": 2.01301074, "learning_rate": 9.326e-05, "token_acc": 0.77480315, "epoch": 0.46681665, "global_step/max_steps": "415/8890", "percentage": "4.67%", "elapsed_time": "8m 33s", "remaining_time": "2h 54m 47s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.808124} {"loss": 1.04995441, "grad_norm": 1.86507952, "learning_rate": 9.348e-05, "token_acc": 0.71881838, "epoch": 0.46794151, "global_step/max_steps": "416/8890", "percentage": "4.68%", "elapsed_time": "8m 34s", "remaining_time": "2h 54m 44s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.808246} {"loss": 0.82834023, "grad_norm": 2.00347495, "learning_rate": 9.371e-05, "token_acc": 0.73444613, "epoch": 0.46906637, "global_step/max_steps": "417/8890", "percentage": "4.69%", "elapsed_time": "8m 35s", "remaining_time": "2h 54m 41s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.808357} {"loss": 0.90450394, "grad_norm": 1.70738888, "learning_rate": 9.393e-05, "token_acc": 0.73522727, "epoch": 0.47019123, "global_step/max_steps": "418/8890", "percentage": "4.70%", "elapsed_time": "8m 36s", "remaining_time": "2h 54m 36s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.808694} {"loss": 0.92378807, "grad_norm": 1.78197527, "learning_rate": 9.416e-05, "token_acc": 0.71888889, "epoch": 0.47131609, "global_step/max_steps": "419/8890", "percentage": "4.71%", "elapsed_time": "8m 37s", "remaining_time": "2h 54m 29s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.809107} {"loss": 1.27981496, "grad_norm": 1.79180288, "learning_rate": 9.438e-05, "token_acc": 0.65425024, "epoch": 0.47244094, "global_step/max_steps": "420/8890", "percentage": "4.72%", "elapsed_time": "8m 39s", "remaining_time": "2h 54m 29s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.80905} {"loss": 0.98147357, "grad_norm": 1.47221315, "learning_rate": 9.461e-05, "token_acc": 0.73545384, "epoch": 0.4735658, "global_step/max_steps": "421/8890", "percentage": "4.74%", "elapsed_time": "8m 40s", "remaining_time": "2h 54m 26s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.809143} {"loss": 0.97818887, "grad_norm": 1.83972645, "learning_rate": 9.483e-05, "token_acc": 0.71411043, "epoch": 0.47469066, "global_step/max_steps": "422/8890", "percentage": "4.75%", "elapsed_time": "8m 41s", "remaining_time": "2h 54m 23s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.809302} {"loss": 1.08247566, "grad_norm": 1.65384424, "learning_rate": 9.506e-05, "token_acc": 0.69883041, "epoch": 0.47581552, "global_step/max_steps": "423/8890", "percentage": "4.76%", "elapsed_time": "8m 42s", "remaining_time": "2h 54m 20s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.809456} {"loss": 0.95296395, "grad_norm": 1.75897241, "learning_rate": 9.528e-05, "token_acc": 0.74364896, "epoch": 0.47694038, "global_step/max_steps": "424/8890", "percentage": "4.77%", "elapsed_time": "8m 43s", "remaining_time": "2h 54m 17s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.809579} {"loss": 0.8819443, "grad_norm": 1.73166549, "learning_rate": 9.551e-05, "token_acc": 0.75116822, "epoch": 0.47806524, "global_step/max_steps": "425/8890", "percentage": "4.78%", "elapsed_time": "8m 44s", "remaining_time": "2h 54m 13s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.809758} {"loss": 0.91988397, "grad_norm": 1.83311141, "learning_rate": 9.573e-05, "token_acc": 0.74144038, "epoch": 0.4791901, "global_step/max_steps": "426/8890", "percentage": "4.79%", "elapsed_time": "8m 45s", "remaining_time": "2h 54m 10s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.809916} {"loss": 1.05802798, "grad_norm": 1.53141081, "learning_rate": 9.596e-05, "token_acc": 0.70967742, "epoch": 0.48031496, "global_step/max_steps": "427/8890", "percentage": "4.80%", "elapsed_time": "8m 47s", "remaining_time": "2h 54m 6s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.810105} {"loss": 1.02602601, "grad_norm": 1.73919976, "learning_rate": 9.618e-05, "token_acc": 0.73307791, "epoch": 0.48143982, "global_step/max_steps": "428/8890", "percentage": "4.81%", "elapsed_time": "8m 48s", "remaining_time": "2h 54m 4s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.810224} {"loss": 0.95972842, "grad_norm": 1.65516293, "learning_rate": 9.64e-05, "token_acc": 0.72587719, "epoch": 0.48256468, "global_step/max_steps": "429/8890", "percentage": "4.83%", "elapsed_time": "8m 49s", "remaining_time": "2h 54m 1s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.810345} {"loss": 1.09590864, "grad_norm": 1.76337564, "learning_rate": 9.663e-05, "token_acc": 0.68874869, "epoch": 0.48368954, "global_step/max_steps": "430/8890", "percentage": "4.84%", "elapsed_time": "8m 50s", "remaining_time": "2h 53m 58s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.810457} {"loss": 0.91296977, "grad_norm": 1.64246488, "learning_rate": 9.685e-05, "token_acc": 0.73689727, "epoch": 0.4848144, "global_step/max_steps": "431/8890", "percentage": "4.85%", "elapsed_time": "8m 51s", "remaining_time": "2h 53m 55s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.810596} {"loss": 1.11682391, "grad_norm": 1.70104301, "learning_rate": 9.708e-05, "token_acc": 0.67553191, "epoch": 0.48593926, "global_step/max_steps": "432/8890", "percentage": "4.86%", "elapsed_time": "8m 52s", "remaining_time": "2h 53m 51s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.810839} {"loss": 0.91412652, "grad_norm": 1.88871264, "learning_rate": 9.73e-05, "token_acc": 0.74242424, "epoch": 0.48706412, "global_step/max_steps": "433/8890", "percentage": "4.87%", "elapsed_time": "8m 53s", "remaining_time": "2h 53m 45s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.811168} {"loss": 1.05454779, "grad_norm": 1.6572324, "learning_rate": 9.753e-05, "token_acc": 0.70805044, "epoch": 0.48818898, "global_step/max_steps": "434/8890", "percentage": "4.88%", "elapsed_time": "8m 55s", "remaining_time": "2h 53m 46s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.811044} {"loss": 1.11036205, "grad_norm": 1.91921818, "learning_rate": 9.775e-05, "token_acc": 0.69754464, "epoch": 0.48931384, "global_step/max_steps": "435/8890", "percentage": "4.89%", "elapsed_time": "8m 56s", "remaining_time": "2h 53m 43s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.81114} {"loss": 1.01852107, "grad_norm": 2.08995748, "learning_rate": 9.798e-05, "token_acc": 0.70200573, "epoch": 0.4904387, "global_step/max_steps": "436/8890", "percentage": "4.90%", "elapsed_time": "8m 57s", "remaining_time": "2h 53m 39s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.811369} {"loss": 1.05333817, "grad_norm": 1.97887647, "learning_rate": 9.82e-05, "token_acc": 0.71175727, "epoch": 0.49156355, "global_step/max_steps": "437/8890", "percentage": "4.92%", "elapsed_time": "8m 58s", "remaining_time": "2h 53m 34s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.811664} {"loss": 0.849621, "grad_norm": 1.68903065, "learning_rate": 9.843e-05, "token_acc": 0.74971815, "epoch": 0.49268841, "global_step/max_steps": "438/8890", "percentage": "4.93%", "elapsed_time": "8m 59s", "remaining_time": "2h 53m 31s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.811784} {"loss": 0.83354223, "grad_norm": 1.7298907, "learning_rate": 9.865e-05, "token_acc": 0.75257732, "epoch": 0.49381327, "global_step/max_steps": "439/8890", "percentage": "4.94%", "elapsed_time": "9m 0s", "remaining_time": "2h 53m 26s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.812076} {"loss": 1.10201526, "grad_norm": 1.80181599, "learning_rate": 9.888e-05, "token_acc": 0.69305019, "epoch": 0.49493813, "global_step/max_steps": "440/8890", "percentage": "4.95%", "elapsed_time": "9m 1s", "remaining_time": "2h 53m 24s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.812175} {"loss": 0.9385736, "grad_norm": 1.54404867, "learning_rate": 9.91e-05, "token_acc": 0.71604938, "epoch": 0.49606299, "global_step/max_steps": "441/8890", "percentage": "4.96%", "elapsed_time": "9m 2s", "remaining_time": "2h 53m 21s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.812281} {"loss": 0.92021763, "grad_norm": 1.53642297, "learning_rate": 9.933e-05, "token_acc": 0.75675676, "epoch": 0.49718785, "global_step/max_steps": "442/8890", "percentage": "4.97%", "elapsed_time": "9m 3s", "remaining_time": "2h 53m 17s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.81254} {"loss": 0.94392252, "grad_norm": 1.97629011, "learning_rate": 9.955e-05, "token_acc": 0.74726776, "epoch": 0.49831271, "global_step/max_steps": "443/8890", "percentage": "4.98%", "elapsed_time": "9m 5s", "remaining_time": "2h 53m 14s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.812643} {"loss": 0.97757113, "grad_norm": 1.9044646, "learning_rate": 9.978e-05, "token_acc": 0.71664374, "epoch": 0.49943757, "global_step/max_steps": "444/8890", "percentage": "4.99%", "elapsed_time": "9m 6s", "remaining_time": "2h 53m 11s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.812796} {"loss": 0.96307123, "grad_norm": 1.58959949, "learning_rate": 0.0001, "token_acc": 0.72931655, "epoch": 0.50056243, "global_step/max_steps": "445/8890", "percentage": "5.01%", "elapsed_time": "9m 7s", "remaining_time": "2h 53m 8s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.812936} {"loss": 0.81491619, "grad_norm": 1.47017229, "learning_rate": 0.0001, "token_acc": 0.75090909, "epoch": 0.50168729, "global_step/max_steps": "446/8890", "percentage": "5.02%", "elapsed_time": "9m 8s", "remaining_time": "2h 53m 5s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.813066} {"loss": 1.12681115, "grad_norm": 1.69262266, "learning_rate": 0.0001, "token_acc": 0.68731563, "epoch": 0.50281215, "global_step/max_steps": "447/8890", "percentage": "5.03%", "elapsed_time": "9m 9s", "remaining_time": "2h 53m 3s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.813147} {"loss": 0.89255655, "grad_norm": 1.87721372, "learning_rate": 0.0001, "token_acc": 0.72119342, "epoch": 0.50393701, "global_step/max_steps": "448/8890", "percentage": "5.04%", "elapsed_time": "9m 10s", "remaining_time": "2h 53m 0s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.813284} {"loss": 0.82189631, "grad_norm": 1.68658996, "learning_rate": 0.0001, "token_acc": 0.74432497, "epoch": 0.50506187, "global_step/max_steps": "449/8890", "percentage": "5.05%", "elapsed_time": "9m 12s", "remaining_time": "2h 52m 57s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.813389} {"loss": 1.00559795, "grad_norm": 1.7684983, "learning_rate": 0.0001, "token_acc": 0.7297593, "epoch": 0.50618673, "global_step/max_steps": "450/8890", "percentage": "5.06%", "elapsed_time": "9m 13s", "remaining_time": "2h 52m 54s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.813535} {"loss": 0.82664633, "grad_norm": 1.90215743, "learning_rate": 0.0001, "token_acc": 0.74394904, "epoch": 0.50731159, "global_step/max_steps": "451/8890", "percentage": "5.07%", "elapsed_time": "9m 14s", "remaining_time": "2h 52m 49s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.813835} {"loss": 0.95054883, "grad_norm": 1.6723665, "learning_rate": 0.0001, "token_acc": 0.7516269, "epoch": 0.50843645, "global_step/max_steps": "452/8890", "percentage": "5.08%", "elapsed_time": "9m 15s", "remaining_time": "2h 52m 46s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.813984} {"loss": 0.88912249, "grad_norm": 1.67656565, "learning_rate": 0.0001, "token_acc": 0.74081847, "epoch": 0.5095613, "global_step/max_steps": "453/8890", "percentage": "5.10%", "elapsed_time": "9m 16s", "remaining_time": "2h 52m 43s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.814111} {"loss": 0.9195739, "grad_norm": 1.79635954, "learning_rate": 0.0001, "token_acc": 0.74093264, "epoch": 0.51068616, "global_step/max_steps": "454/8890", "percentage": "5.11%", "elapsed_time": "9m 17s", "remaining_time": "2h 52m 39s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.814363} {"loss": 1.10558677, "grad_norm": 1.79318523, "learning_rate": 0.0001, "token_acc": 0.70023419, "epoch": 0.51181102, "global_step/max_steps": "455/8890", "percentage": "5.12%", "elapsed_time": "9m 18s", "remaining_time": "2h 52m 35s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.814507} {"loss": 1.08086908, "grad_norm": 1.67723227, "learning_rate": 0.0001, "token_acc": 0.69514563, "epoch": 0.51293588, "global_step/max_steps": "456/8890", "percentage": "5.13%", "elapsed_time": "9m 20s", "remaining_time": "2h 52m 37s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.814257} {"loss": 0.86895716, "grad_norm": 1.75824344, "learning_rate": 0.0001, "token_acc": 0.75738126, "epoch": 0.51406074, "global_step/max_steps": "457/8890", "percentage": "5.14%", "elapsed_time": "9m 20s", "remaining_time": "2h 52m 31s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.814648} {"loss": 0.92071497, "grad_norm": 1.8511914, "learning_rate": 0.0001, "token_acc": 0.74305556, "epoch": 0.5151856, "global_step/max_steps": "458/8890", "percentage": "5.15%", "elapsed_time": "9m 22s", "remaining_time": "2h 52m 29s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.81475} {"loss": 0.83772445, "grad_norm": 1.79806674, "learning_rate": 0.0001, "token_acc": 0.76815287, "epoch": 0.51631046, "global_step/max_steps": "459/8890", "percentage": "5.16%", "elapsed_time": "9m 23s", "remaining_time": "2h 52m 24s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.815017} {"loss": 0.71854901, "grad_norm": 2.11249471, "learning_rate": 0.0001, "token_acc": 0.796, "epoch": 0.51743532, "global_step/max_steps": "460/8890", "percentage": "5.17%", "elapsed_time": "9m 24s", "remaining_time": "2h 52m 20s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.815228} {"loss": 1.08918858, "grad_norm": 1.63231671, "learning_rate": 0.0001, "token_acc": 0.69202899, "epoch": 0.51856018, "global_step/max_steps": "461/8890", "percentage": "5.19%", "elapsed_time": "9m 25s", "remaining_time": "2h 52m 18s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.815293} {"loss": 0.81067896, "grad_norm": 1.87741745, "learning_rate": 0.0001, "token_acc": 0.76454668, "epoch": 0.51968504, "global_step/max_steps": "462/8890", "percentage": "5.20%", "elapsed_time": "9m 26s", "remaining_time": "2h 52m 15s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.815437} {"loss": 1.13633192, "grad_norm": 2.19774604, "learning_rate": 0.0001, "token_acc": 0.69309463, "epoch": 0.5208099, "global_step/max_steps": "463/8890", "percentage": "5.21%", "elapsed_time": "9m 27s", "remaining_time": "2h 52m 11s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.815646} {"loss": 1.032125, "grad_norm": 1.78856206, "learning_rate": 0.0001, "token_acc": 0.70238095, "epoch": 0.52193476, "global_step/max_steps": "464/8890", "percentage": "5.22%", "elapsed_time": "9m 28s", "remaining_time": "2h 52m 8s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.815773} {"loss": 1.08914614, "grad_norm": 1.79473472, "learning_rate": 0.0001, "token_acc": 0.70563231, "epoch": 0.52305962, "global_step/max_steps": "465/8890", "percentage": "5.23%", "elapsed_time": "9m 30s", "remaining_time": "2h 52m 8s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.815692} {"loss": 1.00381446, "grad_norm": 2.02130985, "learning_rate": 0.0001, "token_acc": 0.71512482, "epoch": 0.52418448, "global_step/max_steps": "466/8890", "percentage": "5.24%", "elapsed_time": "9m 31s", "remaining_time": "2h 52m 4s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.815908} {"loss": 0.97177035, "grad_norm": 1.86544454, "learning_rate": 0.0001, "token_acc": 0.71719457, "epoch": 0.52530934, "global_step/max_steps": "467/8890", "percentage": "5.25%", "elapsed_time": "9m 32s", "remaining_time": "2h 52m 0s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.81616} {"loss": 0.8342433, "grad_norm": 1.67775559, "learning_rate": 0.0001, "token_acc": 0.75197294, "epoch": 0.5264342, "global_step/max_steps": "468/8890", "percentage": "5.26%", "elapsed_time": "9m 33s", "remaining_time": "2h 51m 57s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.816278} {"loss": 0.85620534, "grad_norm": 1.75535142, "learning_rate": 0.0001, "token_acc": 0.74609781, "epoch": 0.52755906, "global_step/max_steps": "469/8890", "percentage": "5.28%", "elapsed_time": "9m 34s", "remaining_time": "2h 51m 53s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.816526} {"loss": 0.88993919, "grad_norm": 2.04821157, "learning_rate": 0.0001, "token_acc": 0.73731343, "epoch": 0.52868391, "global_step/max_steps": "470/8890", "percentage": "5.29%", "elapsed_time": "9m 35s", "remaining_time": "2h 51m 48s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.81683} {"loss": 0.92330647, "grad_norm": 1.95224595, "learning_rate": 0.0001, "token_acc": 0.74489796, "epoch": 0.52980877, "global_step/max_steps": "471/8890", "percentage": "5.30%", "elapsed_time": "9m 36s", "remaining_time": "2h 51m 41s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.817256} {"loss": 0.92451501, "grad_norm": 1.83828723, "learning_rate": 0.0001, "token_acc": 0.72959805, "epoch": 0.53093363, "global_step/max_steps": "472/8890", "percentage": "5.31%", "elapsed_time": "9m 37s", "remaining_time": "2h 51m 38s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.817399} {"loss": 0.93640077, "grad_norm": 1.56980228, "learning_rate": 0.0001, "token_acc": 0.7370892, "epoch": 0.53205849, "global_step/max_steps": "473/8890", "percentage": "5.32%", "elapsed_time": "9m 38s", "remaining_time": "2h 51m 34s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.817637} {"loss": 0.94125366, "grad_norm": 1.79623461, "learning_rate": 0.0001, "token_acc": 0.7089372, "epoch": 0.53318335, "global_step/max_steps": "474/8890", "percentage": "5.33%", "elapsed_time": "9m 39s", "remaining_time": "2h 51m 34s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.817555} {"loss": 0.82693744, "grad_norm": 1.57411528, "learning_rate": 0.0001, "token_acc": 0.7657082, "epoch": 0.53430821, "global_step/max_steps": "475/8890", "percentage": "5.34%", "elapsed_time": "9m 40s", "remaining_time": "2h 51m 31s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.817665} {"loss": 0.94065481, "grad_norm": 1.85774255, "learning_rate": 0.0001, "token_acc": 0.73430493, "epoch": 0.53543307, "global_step/max_steps": "476/8890", "percentage": "5.35%", "elapsed_time": "9m 42s", "remaining_time": "2h 51m 28s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.817811} {"loss": 0.90933442, "grad_norm": 1.74392092, "learning_rate": 0.0001, "token_acc": 0.72868217, "epoch": 0.53655793, "global_step/max_steps": "477/8890", "percentage": "5.37%", "elapsed_time": "9m 43s", "remaining_time": "2h 51m 25s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.817928} {"loss": 0.95060056, "grad_norm": 1.8215903, "learning_rate": 0.0001, "token_acc": 0.7281768, "epoch": 0.53768279, "global_step/max_steps": "478/8890", "percentage": "5.38%", "elapsed_time": "9m 44s", "remaining_time": "2h 51m 21s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.818132} {"loss": 0.79672652, "grad_norm": 1.66728675, "learning_rate": 0.0001, "token_acc": 0.75802752, "epoch": 0.53880765, "global_step/max_steps": "479/8890", "percentage": "5.39%", "elapsed_time": "9m 45s", "remaining_time": "2h 51m 19s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.818211} {"loss": 0.99145651, "grad_norm": 1.83755159, "learning_rate": 0.0001, "token_acc": 0.72831633, "epoch": 0.53993251, "global_step/max_steps": "480/8890", "percentage": "5.40%", "elapsed_time": "9m 46s", "remaining_time": "2h 51m 19s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.818097} {"loss": 0.86316967, "grad_norm": 1.84528661, "learning_rate": 0.0001, "token_acc": 0.73435655, "epoch": 0.54105737, "global_step/max_steps": "481/8890", "percentage": "5.41%", "elapsed_time": "9m 47s", "remaining_time": "2h 51m 18s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.818152} {"loss": 0.96949488, "grad_norm": 2.05641508, "learning_rate": 0.0001, "token_acc": 0.70767004, "epoch": 0.54218223, "global_step/max_steps": "482/8890", "percentage": "5.42%", "elapsed_time": "9m 49s", "remaining_time": "2h 51m 15s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.818261} {"loss": 0.9015702, "grad_norm": 1.87461627, "learning_rate": 0.0001, "token_acc": 0.7372449, "epoch": 0.54330709, "global_step/max_steps": "483/8890", "percentage": "5.43%", "elapsed_time": "9m 50s", "remaining_time": "2h 51m 15s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.818184} {"loss": 0.93305403, "grad_norm": 1.694013, "learning_rate": 9.999e-05, "token_acc": 0.73138833, "epoch": 0.54443195, "global_step/max_steps": "484/8890", "percentage": "5.44%", "elapsed_time": "9m 51s", "remaining_time": "2h 51m 12s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.818297} {"loss": 0.98481315, "grad_norm": 1.97180259, "learning_rate": 9.999e-05, "token_acc": 0.697733, "epoch": 0.54555681, "global_step/max_steps": "485/8890", "percentage": "5.46%", "elapsed_time": "9m 52s", "remaining_time": "2h 51m 9s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.818416} {"loss": 0.61575484, "grad_norm": 1.54722428, "learning_rate": 9.999e-05, "token_acc": 0.82485207, "epoch": 0.54668166, "global_step/max_steps": "486/8890", "percentage": "5.47%", "elapsed_time": "9m 53s", "remaining_time": "2h 51m 7s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.818535} {"loss": 0.91478813, "grad_norm": 1.74507689, "learning_rate": 9.999e-05, "token_acc": 0.73519914, "epoch": 0.54780652, "global_step/max_steps": "487/8890", "percentage": "5.48%", "elapsed_time": "9m 54s", "remaining_time": "2h 51m 4s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.818622} {"loss": 0.77675152, "grad_norm": 1.95376837, "learning_rate": 9.999e-05, "token_acc": 0.78032345, "epoch": 0.54893138, "global_step/max_steps": "488/8890", "percentage": "5.49%", "elapsed_time": "9m 55s", "remaining_time": "2h 51m 0s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.81885} {"loss": 0.93537402, "grad_norm": 1.65193999, "learning_rate": 9.999e-05, "token_acc": 0.74973931, "epoch": 0.55005624, "global_step/max_steps": "489/8890", "percentage": "5.50%", "elapsed_time": "9m 57s", "remaining_time": "2h 50m 58s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.818916} {"loss": 0.89798075, "grad_norm": 1.64205897, "learning_rate": 9.999e-05, "token_acc": 0.74514039, "epoch": 0.5511811, "global_step/max_steps": "490/8890", "percentage": "5.51%", "elapsed_time": "9m 58s", "remaining_time": "2h 50m 56s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.819028} {"loss": 0.97742003, "grad_norm": 1.96956706, "learning_rate": 9.999e-05, "token_acc": 0.70344828, "epoch": 0.55230596, "global_step/max_steps": "491/8890", "percentage": "5.52%", "elapsed_time": "9m 59s", "remaining_time": "2h 50m 47s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.819605} {"loss": 0.96909487, "grad_norm": 1.59170032, "learning_rate": 9.999e-05, "token_acc": 0.72884811, "epoch": 0.55343082, "global_step/max_steps": "492/8890", "percentage": "5.53%", "elapsed_time": "10m 0s", "remaining_time": "2h 50m 45s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.819658} {"loss": 0.92383122, "grad_norm": 1.91276944, "learning_rate": 9.999e-05, "token_acc": 0.74362606, "epoch": 0.55455568, "global_step/max_steps": "493/8890", "percentage": "5.55%", "elapsed_time": "10m 1s", "remaining_time": "2h 50m 41s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.819926} {"loss": 0.83979654, "grad_norm": 1.59657621, "learning_rate": 9.999e-05, "token_acc": 0.75739645, "epoch": 0.55568054, "global_step/max_steps": "494/8890", "percentage": "5.56%", "elapsed_time": "10m 2s", "remaining_time": "2h 50m 39s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.819994} {"loss": 0.87144554, "grad_norm": 1.46151209, "learning_rate": 9.999e-05, "token_acc": 0.75557538, "epoch": 0.5568054, "global_step/max_steps": "495/8890", "percentage": "5.57%", "elapsed_time": "10m 3s", "remaining_time": "2h 50m 37s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.820051} {"loss": 0.95813048, "grad_norm": 1.99494314, "learning_rate": 9.999e-05, "token_acc": 0.72307692, "epoch": 0.55793026, "global_step/max_steps": "496/8890", "percentage": "5.58%", "elapsed_time": "10m 4s", "remaining_time": "2h 50m 34s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.820174} {"loss": 0.79870355, "grad_norm": 1.70249748, "learning_rate": 9.999e-05, "token_acc": 0.77541371, "epoch": 0.55905512, "global_step/max_steps": "497/8890", "percentage": "5.59%", "elapsed_time": "10m 5s", "remaining_time": "2h 50m 31s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.820297} {"loss": 0.97023332, "grad_norm": 1.70983338, "learning_rate": 9.999e-05, "token_acc": 0.69725738, "epoch": 0.56017998, "global_step/max_steps": "498/8890", "percentage": "5.60%", "elapsed_time": "10m 7s", "remaining_time": "2h 50m 29s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.82041} {"loss": 0.83120817, "grad_norm": 1.55193603, "learning_rate": 9.999e-05, "token_acc": 0.74918212, "epoch": 0.56130484, "global_step/max_steps": "499/8890", "percentage": "5.61%", "elapsed_time": "10m 8s", "remaining_time": "2h 50m 26s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.820501} {"loss": 0.96958113, "grad_norm": 1.61576819, "learning_rate": 9.999e-05, "token_acc": 0.71300448, "epoch": 0.5624297, "global_step/max_steps": "500/8890", "percentage": "5.62%", "elapsed_time": "10m 9s", "remaining_time": "2h 50m 24s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.820588} {"loss": 1.01639485, "grad_norm": 1.76655221, "learning_rate": 9.999e-05, "token_acc": 0.72177879, "epoch": 0.56355456, "global_step/max_steps": "501/8890", "percentage": "5.64%", "elapsed_time": "10m 10s", "remaining_time": "2h 50m 24s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.820508} {"loss": 0.91227394, "grad_norm": 1.74989438, "learning_rate": 9.999e-05, "token_acc": 0.74137931, "epoch": 0.56467942, "global_step/max_steps": "502/8890", "percentage": "5.65%", "elapsed_time": "10m 11s", "remaining_time": "2h 50m 21s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.820611} {"loss": 0.90047675, "grad_norm": 1.68811178, "learning_rate": 9.999e-05, "token_acc": 0.74666667, "epoch": 0.56580427, "global_step/max_steps": "503/8890", "percentage": "5.66%", "elapsed_time": "10m 12s", "remaining_time": "2h 50m 15s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.820985} {"loss": 0.84321034, "grad_norm": 1.67098963, "learning_rate": 9.999e-05, "token_acc": 0.74858757, "epoch": 0.56692913, "global_step/max_steps": "504/8890", "percentage": "5.67%", "elapsed_time": "10m 13s", "remaining_time": "2h 50m 13s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.821084} {"loss": 0.91305858, "grad_norm": 1.62974548, "learning_rate": 9.999e-05, "token_acc": 0.74424899, "epoch": 0.56805399, "global_step/max_steps": "505/8890", "percentage": "5.68%", "elapsed_time": "10m 14s", "remaining_time": "2h 50m 10s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.821194} {"loss": 1.07901883, "grad_norm": 1.80604887, "learning_rate": 9.999e-05, "token_acc": 0.69493278, "epoch": 0.56917885, "global_step/max_steps": "506/8890", "percentage": "5.69%", "elapsed_time": "10m 16s", "remaining_time": "2h 50m 8s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.821314} {"loss": 1.04390943, "grad_norm": 1.67573988, "learning_rate": 9.999e-05, "token_acc": 0.70071502, "epoch": 0.57030371, "global_step/max_steps": "507/8890", "percentage": "5.70%", "elapsed_time": "10m 17s", "remaining_time": "2h 50m 16s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.820558} {"loss": 0.92858338, "grad_norm": 1.74821746, "learning_rate": 9.999e-05, "token_acc": 0.73950796, "epoch": 0.57142857, "global_step/max_steps": "508/8890", "percentage": "5.71%", "elapsed_time": "10m 19s", "remaining_time": "2h 50m 13s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.820666} {"loss": 0.93205476, "grad_norm": 1.62776446, "learning_rate": 9.999e-05, "token_acc": 0.73684211, "epoch": 0.57255343, "global_step/max_steps": "509/8890", "percentage": "5.73%", "elapsed_time": "10m 20s", "remaining_time": "2h 50m 11s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.820746} {"loss": 0.86712497, "grad_norm": 1.75894356, "learning_rate": 9.999e-05, "token_acc": 0.73959572, "epoch": 0.57367829, "global_step/max_steps": "510/8890", "percentage": "5.74%", "elapsed_time": "10m 21s", "remaining_time": "2h 50m 6s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.821011} {"loss": 1.01168835, "grad_norm": 1.61678183, "learning_rate": 9.998e-05, "token_acc": 0.72349727, "epoch": 0.57480315, "global_step/max_steps": "511/8890", "percentage": "5.75%", "elapsed_time": "10m 22s", "remaining_time": "2h 50m 2s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.821277} {"loss": 1.12840271, "grad_norm": 1.72067857, "learning_rate": 9.998e-05, "token_acc": 0.69230769, "epoch": 0.57592801, "global_step/max_steps": "512/8890", "percentage": "5.76%", "elapsed_time": "10m 23s", "remaining_time": "2h 49m 59s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.82141} {"loss": 0.8479954, "grad_norm": 1.53672826, "learning_rate": 9.998e-05, "token_acc": 0.73228346, "epoch": 0.57705287, "global_step/max_steps": "513/8890", "percentage": "5.77%", "elapsed_time": "10m 24s", "remaining_time": "2h 49m 57s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.821486} {"loss": 1.11167049, "grad_norm": 1.56707907, "learning_rate": 9.998e-05, "token_acc": 0.68630705, "epoch": 0.57817773, "global_step/max_steps": "514/8890", "percentage": "5.78%", "elapsed_time": "10m 25s", "remaining_time": "2h 49m 54s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.821616} {"loss": 0.75461626, "grad_norm": 1.5198797, "learning_rate": 9.998e-05, "token_acc": 0.76421053, "epoch": 0.57930259, "global_step/max_steps": "515/8890", "percentage": "5.79%", "elapsed_time": "10m 26s", "remaining_time": "2h 49m 52s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.821662} {"loss": 0.92050606, "grad_norm": 1.5688237, "learning_rate": 9.998e-05, "token_acc": 0.74232558, "epoch": 0.58042745, "global_step/max_steps": "516/8890", "percentage": "5.80%", "elapsed_time": "10m 27s", "remaining_time": "2h 49m 47s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.821948} {"loss": 0.93094307, "grad_norm": 1.69309306, "learning_rate": 9.998e-05, "token_acc": 0.73736264, "epoch": 0.58155231, "global_step/max_steps": "517/8890", "percentage": "5.82%", "elapsed_time": "10m 28s", "remaining_time": "2h 49m 45s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.822066} {"loss": 0.79524577, "grad_norm": 1.82065439, "learning_rate": 9.998e-05, "token_acc": 0.76010101, "epoch": 0.58267717, "global_step/max_steps": "518/8890", "percentage": "5.83%", "elapsed_time": "10m 29s", "remaining_time": "2h 49m 40s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.822318} {"loss": 0.87792736, "grad_norm": 1.66315603, "learning_rate": 9.998e-05, "token_acc": 0.75582686, "epoch": 0.58380202, "global_step/max_steps": "519/8890", "percentage": "5.84%", "elapsed_time": "10m 30s", "remaining_time": "2h 49m 36s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.822558} {"loss": 0.82592165, "grad_norm": 1.61441588, "learning_rate": 9.998e-05, "token_acc": 0.75690608, "epoch": 0.58492688, "global_step/max_steps": "520/8890", "percentage": "5.85%", "elapsed_time": "10m 32s", "remaining_time": "2h 49m 34s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.822676} {"loss": 1.00669265, "grad_norm": 1.61827469, "learning_rate": 9.998e-05, "token_acc": 0.71139706, "epoch": 0.58605174, "global_step/max_steps": "521/8890", "percentage": "5.86%", "elapsed_time": "10m 33s", "remaining_time": "2h 49m 31s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.822788} {"loss": 0.84421915, "grad_norm": 2.07877493, "learning_rate": 9.998e-05, "token_acc": 0.72051696, "epoch": 0.5871766, "global_step/max_steps": "522/8890", "percentage": "5.87%", "elapsed_time": "10m 34s", "remaining_time": "2h 49m 27s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.822986} {"loss": 1.13607168, "grad_norm": 1.73637009, "learning_rate": 9.998e-05, "token_acc": 0.69952153, "epoch": 0.58830146, "global_step/max_steps": "523/8890", "percentage": "5.88%", "elapsed_time": "10m 35s", "remaining_time": "2h 49m 25s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.823077} {"loss": 0.81186473, "grad_norm": 1.62884939, "learning_rate": 9.998e-05, "token_acc": 0.76476907, "epoch": 0.58942632, "global_step/max_steps": "524/8890", "percentage": "5.89%", "elapsed_time": "10m 36s", "remaining_time": "2h 49m 23s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.823177} {"loss": 0.99578732, "grad_norm": 1.86398709, "learning_rate": 9.998e-05, "token_acc": 0.69615385, "epoch": 0.59055118, "global_step/max_steps": "525/8890", "percentage": "5.91%", "elapsed_time": "10m 37s", "remaining_time": "2h 49m 19s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.823375} {"loss": 0.82779968, "grad_norm": 1.8878994, "learning_rate": 9.998e-05, "token_acc": 0.76268861, "epoch": 0.59167604, "global_step/max_steps": "526/8890", "percentage": "5.92%", "elapsed_time": "10m 38s", "remaining_time": "2h 49m 16s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.823496} {"loss": 0.97067958, "grad_norm": 1.67131257, "learning_rate": 9.998e-05, "token_acc": 0.72154472, "epoch": 0.5928009, "global_step/max_steps": "527/8890", "percentage": "5.93%", "elapsed_time": "10m 40s", "remaining_time": "2h 49m 17s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.823323} {"loss": 0.68545061, "grad_norm": 1.36134911, "learning_rate": 9.998e-05, "token_acc": 0.80751174, "epoch": 0.59392576, "global_step/max_steps": "528/8890", "percentage": "5.94%", "elapsed_time": "10m 41s", "remaining_time": "2h 49m 23s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.822774} {"loss": 1.05814111, "grad_norm": 1.5325563, "learning_rate": 9.998e-05, "token_acc": 0.70871985, "epoch": 0.59505062, "global_step/max_steps": "529/8890", "percentage": "5.95%", "elapsed_time": "10m 42s", "remaining_time": "2h 49m 20s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.822856} {"loss": 0.98603195, "grad_norm": 1.96564078, "learning_rate": 9.998e-05, "token_acc": 0.72623574, "epoch": 0.59617548, "global_step/max_steps": "530/8890", "percentage": "5.96%", "elapsed_time": "10m 43s", "remaining_time": "2h 49m 16s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.823113} {"loss": 0.89250404, "grad_norm": 1.73641694, "learning_rate": 9.997e-05, "token_acc": 0.76048714, "epoch": 0.59730034, "global_step/max_steps": "531/8890", "percentage": "5.97%", "elapsed_time": "10m 44s", "remaining_time": "2h 49m 11s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.823425} {"loss": 1.00421631, "grad_norm": 1.58987701, "learning_rate": 9.997e-05, "token_acc": 0.72163966, "epoch": 0.5984252, "global_step/max_steps": "532/8890", "percentage": "5.98%", "elapsed_time": "10m 45s", "remaining_time": "2h 49m 8s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.823543} {"loss": 0.88940662, "grad_norm": 1.70387888, "learning_rate": 9.997e-05, "token_acc": 0.7316129, "epoch": 0.59955006, "global_step/max_steps": "533/8890", "percentage": "6.00%", "elapsed_time": "10m 47s", "remaining_time": "2h 49m 8s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.823468} {"loss": 1.07596099, "grad_norm": 1.62554884, "learning_rate": 9.997e-05, "token_acc": 0.7061657, "epoch": 0.60067492, "global_step/max_steps": "534/8890", "percentage": "6.01%", "elapsed_time": "10m 48s", "remaining_time": "2h 49m 8s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.823398} {"loss": 0.96204549, "grad_norm": 1.78017247, "learning_rate": 9.997e-05, "token_acc": 0.72727273, "epoch": 0.60179978, "global_step/max_steps": "535/8890", "percentage": "6.02%", "elapsed_time": "10m 49s", "remaining_time": "2h 49m 4s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.823609} {"loss": 0.93518996, "grad_norm": 1.63219249, "learning_rate": 9.997e-05, "token_acc": 0.72844828, "epoch": 0.60292463, "global_step/max_steps": "536/8890", "percentage": "6.03%", "elapsed_time": "10m 50s", "remaining_time": "2h 49m 2s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.823646} {"loss": 0.7403425, "grad_norm": 1.32370543, "learning_rate": 9.997e-05, "token_acc": 0.79269497, "epoch": 0.60404949, "global_step/max_steps": "537/8890", "percentage": "6.04%", "elapsed_time": "10m 52s", "remaining_time": "2h 49m 2s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.823565} {"loss": 0.90717524, "grad_norm": 1.74299717, "learning_rate": 9.997e-05, "token_acc": 0.74318508, "epoch": 0.60517435, "global_step/max_steps": "538/8890", "percentage": "6.05%", "elapsed_time": "10m 53s", "remaining_time": "2h 48m 59s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.823685} {"loss": 0.90239191, "grad_norm": 1.5787462, "learning_rate": 9.997e-05, "token_acc": 0.7396789, "epoch": 0.60629921, "global_step/max_steps": "539/8890", "percentage": "6.06%", "elapsed_time": "10m 54s", "remaining_time": "2h 48m 56s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.823856} {"loss": 0.84031105, "grad_norm": 1.6242975, "learning_rate": 9.997e-05, "token_acc": 0.75497512, "epoch": 0.60742407, "global_step/max_steps": "540/8890", "percentage": "6.07%", "elapsed_time": "10m 55s", "remaining_time": "2h 48m 52s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.824084} {"loss": 0.92295778, "grad_norm": 1.67715144, "learning_rate": 9.997e-05, "token_acc": 0.72785623, "epoch": 0.60854893, "global_step/max_steps": "541/8890", "percentage": "6.09%", "elapsed_time": "10m 56s", "remaining_time": "2h 48m 48s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.824268} {"loss": 1.01402032, "grad_norm": 1.85395515, "learning_rate": 9.997e-05, "token_acc": 0.73563218, "epoch": 0.60967379, "global_step/max_steps": "542/8890", "percentage": "6.10%", "elapsed_time": "10m 57s", "remaining_time": "2h 48m 45s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.824485} {"loss": 0.94523835, "grad_norm": 1.72650576, "learning_rate": 9.997e-05, "token_acc": 0.70906949, "epoch": 0.61079865, "global_step/max_steps": "543/8890", "percentage": "6.11%", "elapsed_time": "10m 58s", "remaining_time": "2h 48m 43s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.824545} {"loss": 0.87746626, "grad_norm": 1.69107044, "learning_rate": 9.997e-05, "token_acc": 0.75445816, "epoch": 0.61192351, "global_step/max_steps": "544/8890", "percentage": "6.12%", "elapsed_time": "10m 59s", "remaining_time": "2h 48m 43s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.824425} {"loss": 1.08996296, "grad_norm": 1.61208689, "learning_rate": 9.997e-05, "token_acc": 0.70175439, "epoch": 0.61304837, "global_step/max_steps": "545/8890", "percentage": "6.13%", "elapsed_time": "11m 0s", "remaining_time": "2h 48m 39s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.824623} {"loss": 1.00959122, "grad_norm": 1.80124474, "learning_rate": 9.996e-05, "token_acc": 0.70116429, "epoch": 0.61417323, "global_step/max_steps": "546/8890", "percentage": "6.14%", "elapsed_time": "11m 1s", "remaining_time": "2h 48m 34s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.82494} {"loss": 0.82580739, "grad_norm": 1.51034999, "learning_rate": 9.996e-05, "token_acc": 0.76369495, "epoch": 0.61529809, "global_step/max_steps": "547/8890", "percentage": "6.15%", "elapsed_time": "11m 3s", "remaining_time": "2h 48m 43s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.824162} {"loss": 1.03731561, "grad_norm": 1.46269584, "learning_rate": 9.996e-05, "token_acc": 0.72916667, "epoch": 0.61642295, "global_step/max_steps": "548/8890", "percentage": "6.16%", "elapsed_time": "11m 4s", "remaining_time": "2h 48m 40s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.824282} {"loss": 0.96920723, "grad_norm": 1.93151331, "learning_rate": 9.996e-05, "token_acc": 0.71134021, "epoch": 0.61754781, "global_step/max_steps": "549/8890", "percentage": "6.18%", "elapsed_time": "11m 5s", "remaining_time": "2h 48m 36s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.824514} {"loss": 1.13263059, "grad_norm": 1.6201508, "learning_rate": 9.996e-05, "token_acc": 0.70178739, "epoch": 0.61867267, "global_step/max_steps": "550/8890", "percentage": "6.19%", "elapsed_time": "11m 7s", "remaining_time": "2h 48m 36s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.824431} {"loss": 1.10133648, "grad_norm": 1.73615623, "learning_rate": 9.996e-05, "token_acc": 0.69365854, "epoch": 0.61979753, "global_step/max_steps": "551/8890", "percentage": "6.20%", "elapsed_time": "11m 8s", "remaining_time": "2h 48m 34s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.824486} {"loss": 0.90699673, "grad_norm": 1.89648926, "learning_rate": 9.996e-05, "token_acc": 0.73989899, "epoch": 0.62092238, "global_step/max_steps": "552/8890", "percentage": "6.21%", "elapsed_time": "11m 9s", "remaining_time": "2h 48m 30s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.82465} {"loss": 1.08798075, "grad_norm": 1.63816786, "learning_rate": 9.996e-05, "token_acc": 0.696913, "epoch": 0.62204724, "global_step/max_steps": "553/8890", "percentage": "6.22%", "elapsed_time": "11m 10s", "remaining_time": "2h 48m 28s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.824719} {"loss": 0.93410492, "grad_norm": 1.78181684, "learning_rate": 9.996e-05, "token_acc": 0.73192771, "epoch": 0.6231721, "global_step/max_steps": "554/8890", "percentage": "6.23%", "elapsed_time": "11m 11s", "remaining_time": "2h 48m 26s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.824836} {"loss": 0.87685859, "grad_norm": 1.55280709, "learning_rate": 9.996e-05, "token_acc": 0.74758454, "epoch": 0.62429696, "global_step/max_steps": "555/8890", "percentage": "6.24%", "elapsed_time": "11m 12s", "remaining_time": "2h 48m 22s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.825023} {"loss": 0.87331748, "grad_norm": 1.59027457, "learning_rate": 9.996e-05, "token_acc": 0.75555556, "epoch": 0.62542182, "global_step/max_steps": "556/8890", "percentage": "6.25%", "elapsed_time": "11m 13s", "remaining_time": "2h 48m 20s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.825117} {"loss": 1.07009542, "grad_norm": 1.64594448, "learning_rate": 9.996e-05, "token_acc": 0.68534907, "epoch": 0.62654668, "global_step/max_steps": "557/8890", "percentage": "6.27%", "elapsed_time": "11m 14s", "remaining_time": "2h 48m 18s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.825192} {"loss": 1.07065237, "grad_norm": 1.84542847, "learning_rate": 9.996e-05, "token_acc": 0.71062271, "epoch": 0.62767154, "global_step/max_steps": "558/8890", "percentage": "6.28%", "elapsed_time": "11m 15s", "remaining_time": "2h 48m 10s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.82573} {"loss": 1.22197175, "grad_norm": 1.56993318, "learning_rate": 9.996e-05, "token_acc": 0.68032129, "epoch": 0.6287964, "global_step/max_steps": "559/8890", "percentage": "6.29%", "elapsed_time": "11m 16s", "remaining_time": "2h 48m 8s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.825797} {"loss": 0.90565896, "grad_norm": 1.56083894, "learning_rate": 9.995e-05, "token_acc": 0.74099099, "epoch": 0.62992126, "global_step/max_steps": "560/8890", "percentage": "6.30%", "elapsed_time": "11m 18s", "remaining_time": "2h 48m 6s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.825882} {"loss": 0.71321708, "grad_norm": 1.37144089, "learning_rate": 9.995e-05, "token_acc": 0.78564405, "epoch": 0.63104612, "global_step/max_steps": "561/8890", "percentage": "6.31%", "elapsed_time": "11m 19s", "remaining_time": "2h 48m 4s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.825943} {"loss": 0.99655056, "grad_norm": 1.70962417, "learning_rate": 9.995e-05, "token_acc": 0.70562771, "epoch": 0.63217098, "global_step/max_steps": "562/8890", "percentage": "6.32%", "elapsed_time": "11m 20s", "remaining_time": "2h 48m 2s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.826019} {"loss": 0.94633996, "grad_norm": 1.74426115, "learning_rate": 9.995e-05, "token_acc": 0.72620447, "epoch": 0.63329584, "global_step/max_steps": "563/8890", "percentage": "6.33%", "elapsed_time": "11m 21s", "remaining_time": "2h 47m 57s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.826323} {"loss": 0.91361928, "grad_norm": 1.70506203, "learning_rate": 9.995e-05, "token_acc": 0.73142857, "epoch": 0.6344207, "global_step/max_steps": "564/8890", "percentage": "6.34%", "elapsed_time": "11m 22s", "remaining_time": "2h 47m 55s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.826384} {"loss": 0.88091528, "grad_norm": 1.61220193, "learning_rate": 9.995e-05, "token_acc": 0.74465558, "epoch": 0.63554556, "global_step/max_steps": "565/8890", "percentage": "6.36%", "elapsed_time": "11m 23s", "remaining_time": "2h 47m 54s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.826307} {"loss": 0.92763031, "grad_norm": 1.7859869, "learning_rate": 9.995e-05, "token_acc": 0.7297619, "epoch": 0.63667042, "global_step/max_steps": "566/8890", "percentage": "6.37%", "elapsed_time": "11m 24s", "remaining_time": "2h 47m 52s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.826382} {"loss": 0.99958694, "grad_norm": 1.58258045, "learning_rate": 9.995e-05, "token_acc": 0.73425926, "epoch": 0.63779528, "global_step/max_steps": "567/8890", "percentage": "6.38%", "elapsed_time": "11m 26s", "remaining_time": "2h 47m 51s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.826424} {"loss": 1.02185917, "grad_norm": 1.65569305, "learning_rate": 9.995e-05, "token_acc": 0.72497366, "epoch": 0.63892013, "global_step/max_steps": "568/8890", "percentage": "6.39%", "elapsed_time": "11m 26s", "remaining_time": "2h 47m 43s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.826973} {"loss": 0.93871891, "grad_norm": 1.77208149, "learning_rate": 9.995e-05, "token_acc": 0.7264631, "epoch": 0.64004499, "global_step/max_steps": "569/8890", "percentage": "6.40%", "elapsed_time": "11m 27s", "remaining_time": "2h 47m 37s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.827354} {"loss": 0.83201671, "grad_norm": 1.41634309, "learning_rate": 9.995e-05, "token_acc": 0.76431298, "epoch": 0.64116985, "global_step/max_steps": "570/8890", "percentage": "6.41%", "elapsed_time": "11m 29s", "remaining_time": "2h 47m 42s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.826837} {"loss": 0.97383618, "grad_norm": 1.50149262, "learning_rate": 9.995e-05, "token_acc": 0.73563218, "epoch": 0.64229471, "global_step/max_steps": "571/8890", "percentage": "6.42%", "elapsed_time": "11m 30s", "remaining_time": "2h 47m 40s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.826935} {"loss": 0.74712569, "grad_norm": 1.42677033, "learning_rate": 9.994e-05, "token_acc": 0.78873239, "epoch": 0.64341957, "global_step/max_steps": "572/8890", "percentage": "6.43%", "elapsed_time": "11m 31s", "remaining_time": "2h 47m 40s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.826817} {"loss": 0.97803521, "grad_norm": 1.59614336, "learning_rate": 9.994e-05, "token_acc": 0.73193047, "epoch": 0.64454443, "global_step/max_steps": "573/8890", "percentage": "6.45%", "elapsed_time": "11m 32s", "remaining_time": "2h 47m 38s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.826885} {"loss": 0.95983577, "grad_norm": 2.05024958, "learning_rate": 9.994e-05, "token_acc": 0.72543353, "epoch": 0.64566929, "global_step/max_steps": "574/8890", "percentage": "6.46%", "elapsed_time": "11m 34s", "remaining_time": "2h 47m 35s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.82698} {"loss": 0.94143283, "grad_norm": 1.7178117, "learning_rate": 9.994e-05, "token_acc": 0.73522459, "epoch": 0.64679415, "global_step/max_steps": "575/8890", "percentage": "6.47%", "elapsed_time": "11m 35s", "remaining_time": "2h 47m 33s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.827057} {"loss": 0.96503013, "grad_norm": 1.76311827, "learning_rate": 9.994e-05, "token_acc": 0.71878646, "epoch": 0.64791901, "global_step/max_steps": "576/8890", "percentage": "6.48%", "elapsed_time": "11m 36s", "remaining_time": "2h 47m 31s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.827149} {"loss": 0.71374094, "grad_norm": 1.72849345, "learning_rate": 9.994e-05, "token_acc": 0.80569514, "epoch": 0.64904387, "global_step/max_steps": "577/8890", "percentage": "6.49%", "elapsed_time": "11m 37s", "remaining_time": "2h 47m 27s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.827383} {"loss": 0.87314689, "grad_norm": 1.89591551, "learning_rate": 9.994e-05, "token_acc": 0.73676248, "epoch": 0.65016873, "global_step/max_steps": "578/8890", "percentage": "6.50%", "elapsed_time": "11m 38s", "remaining_time": "2h 47m 24s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.8275} {"loss": 0.9127748, "grad_norm": 1.85335827, "learning_rate": 9.994e-05, "token_acc": 0.74418605, "epoch": 0.65129359, "global_step/max_steps": "579/8890", "percentage": "6.51%", "elapsed_time": "11m 39s", "remaining_time": "2h 47m 22s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.827571} {"loss": 0.83369219, "grad_norm": 1.47085679, "learning_rate": 9.994e-05, "token_acc": 0.7660021, "epoch": 0.65241845, "global_step/max_steps": "580/8890", "percentage": "6.52%", "elapsed_time": "11m 40s", "remaining_time": "2h 47m 21s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.827604} {"loss": 0.78336793, "grad_norm": 1.37551856, "learning_rate": 9.994e-05, "token_acc": 0.77617675, "epoch": 0.65354331, "global_step/max_steps": "581/8890", "percentage": "6.54%", "elapsed_time": "11m 41s", "remaining_time": "2h 47m 18s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.827675} {"loss": 1.05019796, "grad_norm": 1.7509197, "learning_rate": 9.994e-05, "token_acc": 0.69267139, "epoch": 0.65466817, "global_step/max_steps": "582/8890", "percentage": "6.55%", "elapsed_time": "11m 43s", "remaining_time": "2h 47m 16s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.827761} {"loss": 0.86267185, "grad_norm": 1.89507222, "learning_rate": 9.993e-05, "token_acc": 0.73712737, "epoch": 0.65579303, "global_step/max_steps": "583/8890", "percentage": "6.56%", "elapsed_time": "11m 44s", "remaining_time": "2h 47m 14s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.827858} {"loss": 0.82415378, "grad_norm": 1.56555605, "learning_rate": 9.993e-05, "token_acc": 0.75498008, "epoch": 0.65691789, "global_step/max_steps": "584/8890", "percentage": "6.57%", "elapsed_time": "11m 45s", "remaining_time": "2h 47m 12s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.827928} {"loss": 1.11707592, "grad_norm": 1.62488937, "learning_rate": 9.993e-05, "token_acc": 0.68576389, "epoch": 0.65804274, "global_step/max_steps": "585/8890", "percentage": "6.58%", "elapsed_time": "11m 46s", "remaining_time": "2h 47m 12s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.827833} {"loss": 0.88787854, "grad_norm": 1.70444643, "learning_rate": 9.993e-05, "token_acc": 0.72348485, "epoch": 0.6591676, "global_step/max_steps": "586/8890", "percentage": "6.59%", "elapsed_time": "11m 47s", "remaining_time": "2h 47m 10s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.827912} {"loss": 1.01745701, "grad_norm": 1.75618589, "learning_rate": 9.993e-05, "token_acc": 0.72897196, "epoch": 0.66029246, "global_step/max_steps": "587/8890", "percentage": "6.60%", "elapsed_time": "11m 48s", "remaining_time": "2h 47m 8s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.827961} {"loss": 1.0948478, "grad_norm": 1.77215922, "learning_rate": 9.993e-05, "token_acc": 0.69581281, "epoch": 0.66141732, "global_step/max_steps": "588/8890", "percentage": "6.61%", "elapsed_time": "11m 50s", "remaining_time": "2h 47m 6s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.828034} {"loss": 0.99417257, "grad_norm": 1.75968707, "learning_rate": 9.993e-05, "token_acc": 0.69525959, "epoch": 0.66254218, "global_step/max_steps": "589/8890", "percentage": "6.63%", "elapsed_time": "11m 51s", "remaining_time": "2h 47m 2s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.828262} {"loss": 0.94293237, "grad_norm": 1.94777226, "learning_rate": 9.993e-05, "token_acc": 0.7318236, "epoch": 0.66366704, "global_step/max_steps": "590/8890", "percentage": "6.64%", "elapsed_time": "11m 52s", "remaining_time": "2h 47m 0s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.828287} {"loss": 0.9824059, "grad_norm": 1.6137141, "learning_rate": 9.993e-05, "token_acc": 0.7124632, "epoch": 0.6647919, "global_step/max_steps": "591/8890", "percentage": "6.65%", "elapsed_time": "11m 53s", "remaining_time": "2h 46m 58s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.828365} {"loss": 0.95581031, "grad_norm": 1.63187408, "learning_rate": 9.993e-05, "token_acc": 0.71647902, "epoch": 0.66591676, "global_step/max_steps": "592/8890", "percentage": "6.66%", "elapsed_time": "11m 54s", "remaining_time": "2h 46m 54s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.828593} {"loss": 1.01856697, "grad_norm": 2.08914161, "learning_rate": 9.992e-05, "token_acc": 0.7029703, "epoch": 0.66704162, "global_step/max_steps": "593/8890", "percentage": "6.67%", "elapsed_time": "11m 55s", "remaining_time": "2h 46m 47s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.829085} {"loss": 1.04304206, "grad_norm": 1.74601066, "learning_rate": 9.992e-05, "token_acc": 0.70511425, "epoch": 0.66816648, "global_step/max_steps": "594/8890", "percentage": "6.68%", "elapsed_time": "11m 56s", "remaining_time": "2h 46m 42s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.829357} {"loss": 0.78242081, "grad_norm": 1.80806947, "learning_rate": 9.992e-05, "token_acc": 0.76912181, "epoch": 0.66929134, "global_step/max_steps": "595/8890", "percentage": "6.69%", "elapsed_time": "11m 57s", "remaining_time": "2h 46m 40s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.82946} {"loss": 1.02335894, "grad_norm": 1.62384617, "learning_rate": 9.992e-05, "token_acc": 0.72228202, "epoch": 0.6704162, "global_step/max_steps": "596/8890", "percentage": "6.70%", "elapsed_time": "11m 58s", "remaining_time": "2h 46m 40s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.82934} {"loss": 1.16134262, "grad_norm": 1.54816663, "learning_rate": 9.992e-05, "token_acc": 0.67285945, "epoch": 0.67154106, "global_step/max_steps": "597/8890", "percentage": "6.72%", "elapsed_time": "11m 59s", "remaining_time": "2h 46m 38s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.829408} {"loss": 1.00230849, "grad_norm": 1.709005, "learning_rate": 9.992e-05, "token_acc": 0.71039604, "epoch": 0.67266592, "global_step/max_steps": "598/8890", "percentage": "6.73%", "elapsed_time": "12m 0s", "remaining_time": "2h 46m 36s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.829455} {"loss": 0.86613154, "grad_norm": 1.45336819, "learning_rate": 9.992e-05, "token_acc": 0.75443511, "epoch": 0.67379078, "global_step/max_steps": "599/8890", "percentage": "6.74%", "elapsed_time": "12m 1s", "remaining_time": "2h 46m 32s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.8297} {"loss": 0.82803291, "grad_norm": 1.77577341, "learning_rate": 9.992e-05, "token_acc": 0.75414013, "epoch": 0.67491564, "global_step/max_steps": "600/8890", "percentage": "6.75%", "elapsed_time": "12m 3s", "remaining_time": "2h 46m 30s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.829802} {"eval_loss": 0.92032945, "eval_runtime": 31.6118, "eval_samples_per_second": 25.402, "eval_steps_per_second": 3.195, "eval_token_acc": 0.73327739, "epoch": 0.67491564, "global_step/max_steps": "600/8890", "percentage": "6.75%", "elapsed_time": "12m 34s", "remaining_time": "2h 53m 47s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.795035} {"loss": 1.12023664, "grad_norm": 1.68712127, "learning_rate": 9.992e-05, "token_acc": 0.69119171, "epoch": 0.67604049, "global_step/max_steps": "601/8890", "percentage": "6.76%", "elapsed_time": "12m 49s", "remaining_time": "2h 56m 56s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.780769} {"loss": 1.20039678, "grad_norm": 1.7148068, "learning_rate": 9.991e-05, "token_acc": 0.68355641, "epoch": 0.67716535, "global_step/max_steps": "602/8890", "percentage": "6.77%", "elapsed_time": "12m 50s", "remaining_time": "2h 56m 53s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.780875} {"loss": 0.82560575, "grad_norm": 1.72951853, "learning_rate": 9.991e-05, "token_acc": 0.78848921, "epoch": 0.67829021, "global_step/max_steps": "603/8890", "percentage": "6.78%", "elapsed_time": "12m 52s", "remaining_time": "2h 56m 50s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.781047} {"loss": 0.84114206, "grad_norm": 1.69767511, "learning_rate": 9.991e-05, "token_acc": 0.75638051, "epoch": 0.67941507, "global_step/max_steps": "604/8890", "percentage": "6.79%", "elapsed_time": "12m 52s", "remaining_time": "2h 56m 44s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.781377} {"loss": 1.1071229, "grad_norm": 1.74249923, "learning_rate": 9.991e-05, "token_acc": 0.70431211, "epoch": 0.68053993, "global_step/max_steps": "605/8890", "percentage": "6.81%", "elapsed_time": "12m 54s", "remaining_time": "2h 56m 40s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.781534} {"loss": 1.02554607, "grad_norm": 1.7653929, "learning_rate": 9.991e-05, "token_acc": 0.71582734, "epoch": 0.68166479, "global_step/max_steps": "606/8890", "percentage": "6.82%", "elapsed_time": "12m 55s", "remaining_time": "2h 56m 37s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.781684} {"loss": 0.87186396, "grad_norm": 1.99003446, "learning_rate": 9.991e-05, "token_acc": 0.73484848, "epoch": 0.68278965, "global_step/max_steps": "607/8890", "percentage": "6.83%", "elapsed_time": "12m 56s", "remaining_time": "2h 56m 33s", "memory(GiB)": 22.74, "train_speed(iter/s)": 0.781912} {"loss": 0.93669415, "grad_norm": 2.04538774, "learning_rate": 9.991e-05, "token_acc": 0.73984962, "epoch": 0.68391451, "global_step/max_steps": "608/8890", "percentage": "6.84%", "elapsed_time": "12m 57s", "remaining_time": "2h 56m 29s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.782131} {"loss": 0.95883328, "grad_norm": 1.80669272, "learning_rate": 9.991e-05, "token_acc": 0.7342398, "epoch": 0.68503937, "global_step/max_steps": "609/8890", "percentage": "6.85%", "elapsed_time": "12m 58s", "remaining_time": "2h 56m 26s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.782243} {"loss": 1.03054404, "grad_norm": 1.7486856, "learning_rate": 9.991e-05, "token_acc": 0.70034443, "epoch": 0.68616423, "global_step/max_steps": "610/8890", "percentage": "6.86%", "elapsed_time": "12m 59s", "remaining_time": "2h 56m 21s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.782506} {"loss": 0.90576291, "grad_norm": 1.80235672, "learning_rate": 9.99e-05, "token_acc": 0.74388674, "epoch": 0.68728909, "global_step/max_steps": "611/8890", "percentage": "6.87%", "elapsed_time": "13m 0s", "remaining_time": "2h 56m 18s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.78265} {"loss": 0.98217207, "grad_norm": 1.53366935, "learning_rate": 9.99e-05, "token_acc": 0.72615923, "epoch": 0.68841395, "global_step/max_steps": "612/8890", "percentage": "6.88%", "elapsed_time": "13m 1s", "remaining_time": "2h 56m 15s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.782741} {"loss": 0.8007915, "grad_norm": 1.43519843, "learning_rate": 9.99e-05, "token_acc": 0.781158, "epoch": 0.68953881, "global_step/max_steps": "613/8890", "percentage": "6.90%", "elapsed_time": "13m 3s", "remaining_time": "2h 56m 13s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.782788} {"loss": 0.87304127, "grad_norm": 1.82522035, "learning_rate": 9.99e-05, "token_acc": 0.758967, "epoch": 0.69066367, "global_step/max_steps": "614/8890", "percentage": "6.91%", "elapsed_time": "13m 4s", "remaining_time": "2h 56m 10s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.782911} {"loss": 0.92694724, "grad_norm": 1.79921794, "learning_rate": 9.99e-05, "token_acc": 0.73299748, "epoch": 0.69178853, "global_step/max_steps": "615/8890", "percentage": "6.92%", "elapsed_time": "13m 5s", "remaining_time": "2h 56m 7s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.783033} {"loss": 0.86845028, "grad_norm": 1.56765461, "learning_rate": 9.99e-05, "token_acc": 0.74716553, "epoch": 0.69291339, "global_step/max_steps": "616/8890", "percentage": "6.93%", "elapsed_time": "13m 6s", "remaining_time": "2h 56m 2s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.78337} {"loss": 0.92209148, "grad_norm": 1.7704494, "learning_rate": 9.99e-05, "token_acc": 0.72313527, "epoch": 0.69403825, "global_step/max_steps": "617/8890", "percentage": "6.94%", "elapsed_time": "13m 7s", "remaining_time": "2h 55m 59s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.783484} {"loss": 0.85318005, "grad_norm": 1.73433161, "learning_rate": 9.99e-05, "token_acc": 0.75552666, "epoch": 0.6951631, "global_step/max_steps": "618/8890", "percentage": "6.95%", "elapsed_time": "13m 8s", "remaining_time": "2h 55m 56s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.783612} {"loss": 0.82848382, "grad_norm": 1.67821646, "learning_rate": 9.99e-05, "token_acc": 0.74444444, "epoch": 0.69628796, "global_step/max_steps": "619/8890", "percentage": "6.96%", "elapsed_time": "13m 9s", "remaining_time": "2h 55m 53s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.783733} {"loss": 0.90659875, "grad_norm": 1.55076528, "learning_rate": 9.989e-05, "token_acc": 0.7443787, "epoch": 0.69741282, "global_step/max_steps": "620/8890", "percentage": "6.97%", "elapsed_time": "13m 10s", "remaining_time": "2h 55m 50s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.783847} {"loss": 0.91542435, "grad_norm": 1.71058989, "learning_rate": 9.989e-05, "token_acc": 0.74578059, "epoch": 0.69853768, "global_step/max_steps": "621/8890", "percentage": "6.99%", "elapsed_time": "13m 12s", "remaining_time": "2h 55m 47s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.783953} {"loss": 1.11241794, "grad_norm": 1.75510061, "learning_rate": 9.989e-05, "token_acc": 0.69925611, "epoch": 0.69966254, "global_step/max_steps": "622/8890", "percentage": "7.00%", "elapsed_time": "13m 13s", "remaining_time": "2h 55m 43s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.784166} {"loss": 1.04307067, "grad_norm": 1.5816679, "learning_rate": 9.989e-05, "token_acc": 0.72642487, "epoch": 0.7007874, "global_step/max_steps": "623/8890", "percentage": "7.01%", "elapsed_time": "13m 14s", "remaining_time": "2h 55m 41s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.784261} {"loss": 0.85957247, "grad_norm": 1.56504881, "learning_rate": 9.989e-05, "token_acc": 0.74911243, "epoch": 0.70191226, "global_step/max_steps": "624/8890", "percentage": "7.02%", "elapsed_time": "13m 15s", "remaining_time": "2h 55m 38s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.784374} {"loss": 1.09297729, "grad_norm": 1.83146477, "learning_rate": 9.989e-05, "token_acc": 0.70644719, "epoch": 0.70303712, "global_step/max_steps": "625/8890", "percentage": "7.03%", "elapsed_time": "13m 16s", "remaining_time": "2h 55m 35s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.784501} {"loss": 0.98704797, "grad_norm": 1.47438812, "learning_rate": 9.989e-05, "token_acc": 0.71923744, "epoch": 0.70416198, "global_step/max_steps": "626/8890", "percentage": "7.04%", "elapsed_time": "13m 17s", "remaining_time": "2h 55m 32s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.784617} {"loss": 0.81555128, "grad_norm": 1.47107029, "learning_rate": 9.989e-05, "token_acc": 0.75865922, "epoch": 0.70528684, "global_step/max_steps": "627/8890", "percentage": "7.05%", "elapsed_time": "13m 19s", "remaining_time": "2h 55m 29s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.784729} {"loss": 0.85633004, "grad_norm": 1.72367775, "learning_rate": 9.988e-05, "token_acc": 0.76478318, "epoch": 0.7064117, "global_step/max_steps": "628/8890", "percentage": "7.06%", "elapsed_time": "13m 20s", "remaining_time": "2h 55m 27s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.784839} {"loss": 0.87770951, "grad_norm": 1.66039121, "learning_rate": 9.988e-05, "token_acc": 0.74973147, "epoch": 0.70753656, "global_step/max_steps": "629/8890", "percentage": "7.08%", "elapsed_time": "13m 21s", "remaining_time": "2h 55m 22s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.785058} {"loss": 0.8464756, "grad_norm": 1.64609706, "learning_rate": 9.988e-05, "token_acc": 0.74401914, "epoch": 0.70866142, "global_step/max_steps": "630/8890", "percentage": "7.09%", "elapsed_time": "13m 22s", "remaining_time": "2h 55m 20s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.785131} {"loss": 0.79623061, "grad_norm": 1.43605065, "learning_rate": 9.988e-05, "token_acc": 0.76826484, "epoch": 0.70978628, "global_step/max_steps": "631/8890", "percentage": "7.10%", "elapsed_time": "13m 23s", "remaining_time": "2h 55m 17s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.785254} {"loss": 0.83094418, "grad_norm": 1.56734121, "learning_rate": 9.988e-05, "token_acc": 0.75771971, "epoch": 0.71091114, "global_step/max_steps": "632/8890", "percentage": "7.11%", "elapsed_time": "13m 24s", "remaining_time": "2h 55m 14s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.785381} {"loss": 0.82474273, "grad_norm": 1.69519055, "learning_rate": 9.988e-05, "token_acc": 0.77015644, "epoch": 0.712036, "global_step/max_steps": "633/8890", "percentage": "7.12%", "elapsed_time": "13m 25s", "remaining_time": "2h 55m 11s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.78549} {"loss": 0.94947171, "grad_norm": 1.61446261, "learning_rate": 9.988e-05, "token_acc": 0.73568282, "epoch": 0.71316085, "global_step/max_steps": "634/8890", "percentage": "7.13%", "elapsed_time": "13m 27s", "remaining_time": "2h 55m 8s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.785621} {"loss": 0.76891363, "grad_norm": 1.77774644, "learning_rate": 9.988e-05, "token_acc": 0.76946565, "epoch": 0.71428571, "global_step/max_steps": "635/8890", "percentage": "7.14%", "elapsed_time": "13m 28s", "remaining_time": "2h 55m 4s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.785884} {"loss": 0.88681656, "grad_norm": 1.64757991, "learning_rate": 9.987e-05, "token_acc": 0.75286624, "epoch": 0.71541057, "global_step/max_steps": "636/8890", "percentage": "7.15%", "elapsed_time": "13m 29s", "remaining_time": "2h 55m 1s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.785989} {"loss": 0.78760189, "grad_norm": 1.47509205, "learning_rate": 9.987e-05, "token_acc": 0.76701269, "epoch": 0.71653543, "global_step/max_steps": "637/8890", "percentage": "7.17%", "elapsed_time": "13m 30s", "remaining_time": "2h 54m 57s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.786203} {"loss": 0.83394384, "grad_norm": 1.44773567, "learning_rate": 9.987e-05, "token_acc": 0.75766017, "epoch": 0.71766029, "global_step/max_steps": "638/8890", "percentage": "7.18%", "elapsed_time": "13m 31s", "remaining_time": "2h 54m 54s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.786284} {"loss": 0.82217109, "grad_norm": 1.56221044, "learning_rate": 9.987e-05, "token_acc": 0.7741573, "epoch": 0.71878515, "global_step/max_steps": "639/8890", "percentage": "7.19%", "elapsed_time": "13m 32s", "remaining_time": "2h 54m 51s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.786431} {"loss": 0.93578875, "grad_norm": 1.74205339, "learning_rate": 9.987e-05, "token_acc": 0.7162891, "epoch": 0.71991001, "global_step/max_steps": "640/8890", "percentage": "7.20%", "elapsed_time": "13m 33s", "remaining_time": "2h 54m 50s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.786398} {"loss": 0.88231277, "grad_norm": 1.88179851, "learning_rate": 9.987e-05, "token_acc": 0.75471698, "epoch": 0.72103487, "global_step/max_steps": "641/8890", "percentage": "7.21%", "elapsed_time": "13m 34s", "remaining_time": "2h 54m 47s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.786538} {"loss": 1.00721502, "grad_norm": 1.78888583, "learning_rate": 9.987e-05, "token_acc": 0.71070013, "epoch": 0.72215973, "global_step/max_steps": "642/8890", "percentage": "7.22%", "elapsed_time": "13m 36s", "remaining_time": "2h 54m 45s", "memory(GiB)": 22.81, "train_speed(iter/s)": 0.786638} {"loss": 0.90543127, "grad_norm": 1.65617883, "learning_rate": 9.986e-05, "token_acc": 0.76056338, "epoch": 0.72328459, "global_step/max_steps": "643/8890", "percentage": "7.23%", "elapsed_time": "13m 37s", "remaining_time": "2h 54m 39s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.786991} {"loss": 0.77335685, "grad_norm": 1.64366567, "learning_rate": 9.986e-05, "token_acc": 0.76737968, "epoch": 0.72440945, "global_step/max_steps": "644/8890", "percentage": "7.24%", "elapsed_time": "13m 38s", "remaining_time": "2h 54m 34s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.787231} {"loss": 1.15849686, "grad_norm": 1.6398052, "learning_rate": 9.986e-05, "token_acc": 0.6782364, "epoch": 0.72553431, "global_step/max_steps": "645/8890", "percentage": "7.26%", "elapsed_time": "13m 39s", "remaining_time": "2h 54m 32s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.787317} {"loss": 1.01284885, "grad_norm": 1.55828559, "learning_rate": 9.986e-05, "token_acc": 0.70466321, "epoch": 0.72665917, "global_step/max_steps": "646/8890", "percentage": "7.27%", "elapsed_time": "13m 40s", "remaining_time": "2h 54m 29s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.787421} {"loss": 0.77641231, "grad_norm": 1.68841982, "learning_rate": 9.986e-05, "token_acc": 0.78791615, "epoch": 0.72778403, "global_step/max_steps": "647/8890", "percentage": "7.28%", "elapsed_time": "13m 41s", "remaining_time": "2h 54m 26s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.787569} {"loss": 0.91346395, "grad_norm": 1.58811498, "learning_rate": 9.986e-05, "token_acc": 0.75223499, "epoch": 0.72890889, "global_step/max_steps": "648/8890", "percentage": "7.29%", "elapsed_time": "13m 42s", "remaining_time": "2h 54m 23s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.787717} {"loss": 0.94507718, "grad_norm": 1.52635753, "learning_rate": 9.986e-05, "token_acc": 0.73454914, "epoch": 0.73003375, "global_step/max_steps": "649/8890", "percentage": "7.30%", "elapsed_time": "13m 44s", "remaining_time": "2h 54m 23s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.787579} {"loss": 0.94605082, "grad_norm": 1.30004263, "learning_rate": 9.985e-05, "token_acc": 0.73631387, "epoch": 0.73115861, "global_step/max_steps": "650/8890", "percentage": "7.31%", "elapsed_time": "13m 45s", "remaining_time": "2h 54m 21s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.787678} {"loss": 0.74061728, "grad_norm": 1.42512298, "learning_rate": 9.985e-05, "token_acc": 0.77777778, "epoch": 0.73228346, "global_step/max_steps": "651/8890", "percentage": "7.32%", "elapsed_time": "13m 46s", "remaining_time": "2h 54m 18s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.787745} {"loss": 0.9070614, "grad_norm": 1.77560318, "learning_rate": 9.985e-05, "token_acc": 0.73205128, "epoch": 0.73340832, "global_step/max_steps": "652/8890", "percentage": "7.33%", "elapsed_time": "13m 47s", "remaining_time": "2h 54m 16s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.787867} {"loss": 1.11988044, "grad_norm": 1.7369957, "learning_rate": 9.985e-05, "token_acc": 0.69222222, "epoch": 0.73453318, "global_step/max_steps": "653/8890", "percentage": "7.35%", "elapsed_time": "13m 48s", "remaining_time": "2h 54m 12s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.788006} {"loss": 0.90403432, "grad_norm": 1.50914586, "learning_rate": 9.985e-05, "token_acc": 0.74664107, "epoch": 0.73565804, "global_step/max_steps": "654/8890", "percentage": "7.36%", "elapsed_time": "13m 49s", "remaining_time": "2h 54m 10s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.78812} {"loss": 0.81094182, "grad_norm": 1.39773655, "learning_rate": 9.985e-05, "token_acc": 0.74852652, "epoch": 0.7367829, "global_step/max_steps": "655/8890", "percentage": "7.37%", "elapsed_time": "13m 50s", "remaining_time": "2h 54m 7s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.788263} {"loss": 0.92589808, "grad_norm": 1.58386457, "learning_rate": 9.985e-05, "token_acc": 0.75055928, "epoch": 0.73790776, "global_step/max_steps": "656/8890", "percentage": "7.38%", "elapsed_time": "13m 52s", "remaining_time": "2h 54m 4s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.788387} {"loss": 0.9133172, "grad_norm": 1.64889669, "learning_rate": 9.984e-05, "token_acc": 0.72523585, "epoch": 0.73903262, "global_step/max_steps": "657/8890", "percentage": "7.39%", "elapsed_time": "13m 53s", "remaining_time": "2h 54m 1s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.788492} {"loss": 0.93031001, "grad_norm": 1.78269279, "learning_rate": 9.984e-05, "token_acc": 0.71823204, "epoch": 0.74015748, "global_step/max_steps": "658/8890", "percentage": "7.40%", "elapsed_time": "13m 54s", "remaining_time": "2h 54m 0s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.788496} {"loss": 0.89741123, "grad_norm": 1.65061867, "learning_rate": 9.984e-05, "token_acc": 0.72939866, "epoch": 0.74128234, "global_step/max_steps": "659/8890", "percentage": "7.41%", "elapsed_time": "13m 55s", "remaining_time": "2h 53m 55s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.788753} {"loss": 0.86816245, "grad_norm": 1.71991491, "learning_rate": 9.984e-05, "token_acc": 0.73266078, "epoch": 0.7424072, "global_step/max_steps": "660/8890", "percentage": "7.42%", "elapsed_time": "13m 56s", "remaining_time": "2h 53m 52s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.78888} {"loss": 0.84648705, "grad_norm": 1.64355266, "learning_rate": 9.984e-05, "token_acc": 0.74465409, "epoch": 0.74353206, "global_step/max_steps": "661/8890", "percentage": "7.44%", "elapsed_time": "13m 57s", "remaining_time": "2h 53m 49s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.788989} {"loss": 0.92721164, "grad_norm": 1.37113082, "learning_rate": 9.984e-05, "token_acc": 0.73076923, "epoch": 0.74465692, "global_step/max_steps": "662/8890", "percentage": "7.45%", "elapsed_time": "13m 59s", "remaining_time": "2h 53m 49s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.788903} {"loss": 1.02825475, "grad_norm": 1.84303474, "learning_rate": 9.984e-05, "token_acc": 0.72543353, "epoch": 0.74578178, "global_step/max_steps": "663/8890", "percentage": "7.46%", "elapsed_time": "14m 0s", "remaining_time": "2h 53m 46s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.789036} {"loss": 1.07689357, "grad_norm": 1.92688358, "learning_rate": 9.983e-05, "token_acc": 0.69268897, "epoch": 0.74690664, "global_step/max_steps": "664/8890", "percentage": "7.47%", "elapsed_time": "14m 1s", "remaining_time": "2h 53m 44s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.789117} {"loss": 0.86033744, "grad_norm": 1.88816059, "learning_rate": 9.983e-05, "token_acc": 0.75073314, "epoch": 0.7480315, "global_step/max_steps": "665/8890", "percentage": "7.48%", "elapsed_time": "14m 2s", "remaining_time": "2h 53m 41s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.789242} {"loss": 0.77701426, "grad_norm": 1.77561736, "learning_rate": 9.983e-05, "token_acc": 0.77446809, "epoch": 0.74915636, "global_step/max_steps": "666/8890", "percentage": "7.49%", "elapsed_time": "14m 3s", "remaining_time": "2h 53m 37s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.789473} {"loss": 0.96242064, "grad_norm": 1.63095653, "learning_rate": 9.983e-05, "token_acc": 0.72707182, "epoch": 0.75028121, "global_step/max_steps": "667/8890", "percentage": "7.50%", "elapsed_time": "14m 4s", "remaining_time": "2h 53m 33s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.789611} {"loss": 0.67305744, "grad_norm": 1.52207732, "learning_rate": 9.983e-05, "token_acc": 0.77966102, "epoch": 0.75140607, "global_step/max_steps": "668/8890", "percentage": "7.51%", "elapsed_time": "14m 5s", "remaining_time": "2h 53m 26s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.790063} {"loss": 0.67766005, "grad_norm": 1.47674787, "learning_rate": 9.983e-05, "token_acc": 0.79350348, "epoch": 0.75253093, "global_step/max_steps": "669/8890", "percentage": "7.53%", "elapsed_time": "14m 6s", "remaining_time": "2h 53m 24s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.790175} {"loss": 1.05478609, "grad_norm": 1.80774772, "learning_rate": 9.982e-05, "token_acc": 0.71788991, "epoch": 0.75365579, "global_step/max_steps": "670/8890", "percentage": "7.54%", "elapsed_time": "14m 7s", "remaining_time": "2h 53m 21s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.790307} {"loss": 0.93953162, "grad_norm": 1.55361748, "learning_rate": 9.982e-05, "token_acc": 0.73281704, "epoch": 0.75478065, "global_step/max_steps": "671/8890", "percentage": "7.55%", "elapsed_time": "14m 8s", "remaining_time": "2h 53m 18s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.790389} {"loss": 0.88053083, "grad_norm": 1.80087864, "learning_rate": 9.982e-05, "token_acc": 0.72506083, "epoch": 0.75590551, "global_step/max_steps": "672/8890", "percentage": "7.56%", "elapsed_time": "14m 9s", "remaining_time": "2h 53m 13s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.790678} {"loss": 0.89275545, "grad_norm": 1.67117548, "learning_rate": 9.982e-05, "token_acc": 0.72928821, "epoch": 0.75703037, "global_step/max_steps": "673/8890", "percentage": "7.57%", "elapsed_time": "14m 11s", "remaining_time": "2h 53m 11s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.790775} {"loss": 0.76244271, "grad_norm": 1.47295654, "learning_rate": 9.982e-05, "token_acc": 0.77248201, "epoch": 0.75815523, "global_step/max_steps": "674/8890", "percentage": "7.58%", "elapsed_time": "14m 12s", "remaining_time": "2h 53m 8s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.790909} {"loss": 0.84771055, "grad_norm": 1.62241185, "learning_rate": 9.982e-05, "token_acc": 0.73464658, "epoch": 0.75928009, "global_step/max_steps": "675/8890", "percentage": "7.59%", "elapsed_time": "14m 13s", "remaining_time": "2h 53m 5s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.791005} {"loss": 0.81119376, "grad_norm": 1.79700291, "learning_rate": 9.982e-05, "token_acc": 0.7704698, "epoch": 0.76040495, "global_step/max_steps": "676/8890", "percentage": "7.60%", "elapsed_time": "14m 14s", "remaining_time": "2h 53m 2s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.791127} {"loss": 0.92228389, "grad_norm": 1.58162963, "learning_rate": 9.981e-05, "token_acc": 0.74494382, "epoch": 0.76152981, "global_step/max_steps": "677/8890", "percentage": "7.62%", "elapsed_time": "14m 15s", "remaining_time": "2h 53m 0s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.791207} {"loss": 1.08974814, "grad_norm": 1.71056116, "learning_rate": 9.981e-05, "token_acc": 0.69343066, "epoch": 0.76265467, "global_step/max_steps": "678/8890", "percentage": "7.63%", "elapsed_time": "14m 16s", "remaining_time": "2h 52m 56s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.791389} {"loss": 0.82023084, "grad_norm": 1.52258492, "learning_rate": 9.981e-05, "token_acc": 0.73806452, "epoch": 0.76377953, "global_step/max_steps": "679/8890", "percentage": "7.64%", "elapsed_time": "14m 17s", "remaining_time": "2h 52m 54s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.791496} {"loss": 0.90892977, "grad_norm": 1.51336348, "learning_rate": 9.981e-05, "token_acc": 0.73042616, "epoch": 0.76490439, "global_step/max_steps": "680/8890", "percentage": "7.65%", "elapsed_time": "14m 19s", "remaining_time": "2h 52m 51s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.791596} {"loss": 0.94960701, "grad_norm": 1.35939991, "learning_rate": 9.981e-05, "token_acc": 0.74042553, "epoch": 0.76602925, "global_step/max_steps": "681/8890", "percentage": "7.66%", "elapsed_time": "14m 20s", "remaining_time": "2h 52m 48s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.791713} {"loss": 0.97542751, "grad_norm": 1.63078618, "learning_rate": 9.981e-05, "token_acc": 0.71381579, "epoch": 0.76715411, "global_step/max_steps": "682/8890", "percentage": "7.67%", "elapsed_time": "14m 21s", "remaining_time": "2h 52m 45s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.791894} {"loss": 0.9401046, "grad_norm": 1.75583911, "learning_rate": 9.98e-05, "token_acc": 0.73928571, "epoch": 0.76827897, "global_step/max_steps": "683/8890", "percentage": "7.68%", "elapsed_time": "14m 22s", "remaining_time": "2h 52m 41s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.79209} {"loss": 0.77747202, "grad_norm": 1.60251677, "learning_rate": 9.98e-05, "token_acc": 0.79448276, "epoch": 0.76940382, "global_step/max_steps": "684/8890", "percentage": "7.69%", "elapsed_time": "14m 23s", "remaining_time": "2h 52m 37s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.792264} {"loss": 0.83953768, "grad_norm": 1.66321099, "learning_rate": 9.98e-05, "token_acc": 0.74571429, "epoch": 0.77052868, "global_step/max_steps": "685/8890", "percentage": "7.71%", "elapsed_time": "14m 24s", "remaining_time": "2h 52m 34s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.792447} {"loss": 0.94595861, "grad_norm": 1.41971743, "learning_rate": 9.98e-05, "token_acc": 0.74683544, "epoch": 0.77165354, "global_step/max_steps": "686/8890", "percentage": "7.72%", "elapsed_time": "14m 25s", "remaining_time": "2h 52m 35s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.792199} {"loss": 0.85536051, "grad_norm": 1.56360507, "learning_rate": 9.98e-05, "token_acc": 0.74310777, "epoch": 0.7727784, "global_step/max_steps": "687/8890", "percentage": "7.73%", "elapsed_time": "14m 27s", "remaining_time": "2h 52m 32s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.792379} {"loss": 0.8470273, "grad_norm": 1.74358976, "learning_rate": 9.98e-05, "token_acc": 0.74209861, "epoch": 0.77390326, "global_step/max_steps": "688/8890", "percentage": "7.74%", "elapsed_time": "14m 28s", "remaining_time": "2h 52m 28s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.792594} {"loss": 0.77229488, "grad_norm": 1.48031008, "learning_rate": 9.979e-05, "token_acc": 0.78607595, "epoch": 0.77502812, "global_step/max_steps": "689/8890", "percentage": "7.75%", "elapsed_time": "14m 29s", "remaining_time": "2h 52m 24s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.792792} {"loss": 1.03692329, "grad_norm": 1.66406083, "learning_rate": 9.979e-05, "token_acc": 0.70967742, "epoch": 0.77615298, "global_step/max_steps": "690/8890", "percentage": "7.76%", "elapsed_time": "14m 30s", "remaining_time": "2h 52m 21s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.792891} {"loss": 0.9034794, "grad_norm": 1.62297463, "learning_rate": 9.979e-05, "token_acc": 0.71875, "epoch": 0.77727784, "global_step/max_steps": "691/8890", "percentage": "7.77%", "elapsed_time": "14m 31s", "remaining_time": "2h 52m 18s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.79309} {"loss": 1.09050298, "grad_norm": 1.70506227, "learning_rate": 9.979e-05, "token_acc": 0.72117647, "epoch": 0.7784027, "global_step/max_steps": "692/8890", "percentage": "7.78%", "elapsed_time": "14m 32s", "remaining_time": "2h 52m 14s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.793256} {"loss": 0.85223633, "grad_norm": 1.48116684, "learning_rate": 9.979e-05, "token_acc": 0.77358491, "epoch": 0.77952756, "global_step/max_steps": "693/8890", "percentage": "7.80%", "elapsed_time": "14m 33s", "remaining_time": "2h 52m 10s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.79347} {"loss": 0.92708731, "grad_norm": 1.87291098, "learning_rate": 9.979e-05, "token_acc": 0.71943574, "epoch": 0.78065242, "global_step/max_steps": "694/8890", "percentage": "7.81%", "elapsed_time": "14m 34s", "remaining_time": "2h 52m 7s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.793602} {"loss": 0.91184425, "grad_norm": 1.74602997, "learning_rate": 9.978e-05, "token_acc": 0.72503083, "epoch": 0.78177728, "global_step/max_steps": "695/8890", "percentage": "7.82%", "elapsed_time": "14m 35s", "remaining_time": "2h 52m 4s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.793713} {"loss": 1.00681436, "grad_norm": 1.75774467, "learning_rate": 9.978e-05, "token_acc": 0.70652174, "epoch": 0.78290214, "global_step/max_steps": "696/8890", "percentage": "7.83%", "elapsed_time": "14m 36s", "remaining_time": "2h 52m 2s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.793788} {"loss": 0.80415857, "grad_norm": 1.53333831, "learning_rate": 9.978e-05, "token_acc": 0.74164811, "epoch": 0.784027, "global_step/max_steps": "697/8890", "percentage": "7.84%", "elapsed_time": "14m 38s", "remaining_time": "2h 52m 2s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.793702} {"loss": 1.16525149, "grad_norm": 1.99532592, "learning_rate": 9.978e-05, "token_acc": 0.68832732, "epoch": 0.78515186, "global_step/max_steps": "698/8890", "percentage": "7.85%", "elapsed_time": "14m 39s", "remaining_time": "2h 51m 59s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.793808} {"loss": 1.16973257, "grad_norm": 1.72738123, "learning_rate": 9.978e-05, "token_acc": 0.69186047, "epoch": 0.78627672, "global_step/max_steps": "699/8890", "percentage": "7.86%", "elapsed_time": "14m 40s", "remaining_time": "2h 51m 57s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.79386} {"loss": 0.89256418, "grad_norm": 1.55478442, "learning_rate": 9.978e-05, "token_acc": 0.74291498, "epoch": 0.78740157, "global_step/max_steps": "700/8890", "percentage": "7.87%", "elapsed_time": "14m 41s", "remaining_time": "2h 51m 53s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.794069} {"loss": 0.91113597, "grad_norm": 1.48337746, "learning_rate": 9.977e-05, "token_acc": 0.72810676, "epoch": 0.78852643, "global_step/max_steps": "701/8890", "percentage": "7.89%", "elapsed_time": "14m 42s", "remaining_time": "2h 51m 51s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.794192} {"loss": 0.90137613, "grad_norm": 1.48380339, "learning_rate": 9.977e-05, "token_acc": 0.72797677, "epoch": 0.78965129, "global_step/max_steps": "702/8890", "percentage": "7.90%", "elapsed_time": "14m 43s", "remaining_time": "2h 51m 49s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.794251} {"loss": 0.85841072, "grad_norm": 1.7905556, "learning_rate": 9.977e-05, "token_acc": 0.75034106, "epoch": 0.79077615, "global_step/max_steps": "703/8890", "percentage": "7.91%", "elapsed_time": "14m 45s", "remaining_time": "2h 51m 46s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.794328} {"loss": 0.73436058, "grad_norm": 1.54315019, "learning_rate": 9.977e-05, "token_acc": 0.78337531, "epoch": 0.79190101, "global_step/max_steps": "704/8890", "percentage": "7.92%", "elapsed_time": "14m 46s", "remaining_time": "2h 51m 42s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.794543} {"loss": 1.03552532, "grad_norm": 1.59327805, "learning_rate": 9.977e-05, "token_acc": 0.70353581, "epoch": 0.79302587, "global_step/max_steps": "705/8890", "percentage": "7.93%", "elapsed_time": "14m 47s", "remaining_time": "2h 51m 42s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.794496} {"loss": 0.78112531, "grad_norm": 1.36998498, "learning_rate": 9.976e-05, "token_acc": 0.76828012, "epoch": 0.79415073, "global_step/max_steps": "706/8890", "percentage": "7.94%", "elapsed_time": "14m 48s", "remaining_time": "2h 51m 39s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.794578} {"loss": 0.74274147, "grad_norm": 1.5678091, "learning_rate": 9.976e-05, "token_acc": 0.78770302, "epoch": 0.79527559, "global_step/max_steps": "707/8890", "percentage": "7.95%", "elapsed_time": "14m 49s", "remaining_time": "2h 51m 36s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.794707} {"loss": 0.95315516, "grad_norm": 1.58570659, "learning_rate": 9.976e-05, "token_acc": 0.74078341, "epoch": 0.79640045, "global_step/max_steps": "708/8890", "percentage": "7.96%", "elapsed_time": "14m 50s", "remaining_time": "2h 51m 33s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.794861} {"loss": 0.95589799, "grad_norm": 1.36698008, "learning_rate": 9.976e-05, "token_acc": 0.7338403, "epoch": 0.79752531, "global_step/max_steps": "709/8890", "percentage": "7.98%", "elapsed_time": "14m 52s", "remaining_time": "2h 51m 33s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.794767} {"loss": 0.91706705, "grad_norm": 1.59820354, "learning_rate": 9.976e-05, "token_acc": 0.74196787, "epoch": 0.79865017, "global_step/max_steps": "710/8890", "percentage": "7.99%", "elapsed_time": "14m 53s", "remaining_time": "2h 51m 29s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.794959} {"loss": 0.94959712, "grad_norm": 1.50104284, "learning_rate": 9.976e-05, "token_acc": 0.72282609, "epoch": 0.79977503, "global_step/max_steps": "711/8890", "percentage": "8.00%", "elapsed_time": "14m 54s", "remaining_time": "2h 51m 27s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.795042} {"loss": 0.79541773, "grad_norm": 1.45402217, "learning_rate": 9.975e-05, "token_acc": 0.77664975, "epoch": 0.80089989, "global_step/max_steps": "712/8890", "percentage": "8.01%", "elapsed_time": "14m 55s", "remaining_time": "2h 51m 24s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.795157} {"loss": 0.80534095, "grad_norm": 1.58039439, "learning_rate": 9.975e-05, "token_acc": 0.74465409, "epoch": 0.80202475, "global_step/max_steps": "713/8890", "percentage": "8.02%", "elapsed_time": "14m 56s", "remaining_time": "2h 51m 22s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.795267} {"loss": 0.68552649, "grad_norm": 1.67869616, "learning_rate": 9.975e-05, "token_acc": 0.79866667, "epoch": 0.80314961, "global_step/max_steps": "714/8890", "percentage": "8.03%", "elapsed_time": "14m 57s", "remaining_time": "2h 51m 18s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.795417} {"loss": 1.01887059, "grad_norm": 1.80942869, "learning_rate": 9.975e-05, "token_acc": 0.7002584, "epoch": 0.80427447, "global_step/max_steps": "715/8890", "percentage": "8.04%", "elapsed_time": "14m 58s", "remaining_time": "2h 51m 14s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.795627} {"loss": 0.95060778, "grad_norm": 1.64630044, "learning_rate": 9.975e-05, "token_acc": 0.72012917, "epoch": 0.80539933, "global_step/max_steps": "716/8890", "percentage": "8.05%", "elapsed_time": "14m 59s", "remaining_time": "2h 51m 14s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.795571} {"loss": 0.86701739, "grad_norm": 1.30966222, "learning_rate": 9.974e-05, "token_acc": 0.76714393, "epoch": 0.80652418, "global_step/max_steps": "717/8890", "percentage": "8.07%", "elapsed_time": "15m 1s", "remaining_time": "2h 51m 12s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.795655} {"loss": 1.03421783, "grad_norm": 1.56482124, "learning_rate": 9.974e-05, "token_acc": 0.70588235, "epoch": 0.80764904, "global_step/max_steps": "718/8890", "percentage": "8.08%", "elapsed_time": "15m 2s", "remaining_time": "2h 51m 9s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.795745} {"loss": 1.06847107, "grad_norm": 1.51062822, "learning_rate": 9.974e-05, "token_acc": 0.69710272, "epoch": 0.8087739, "global_step/max_steps": "719/8890", "percentage": "8.09%", "elapsed_time": "15m 3s", "remaining_time": "2h 51m 7s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.795819} {"loss": 0.92926323, "grad_norm": 1.61687827, "learning_rate": 9.974e-05, "token_acc": 0.73695652, "epoch": 0.80989876, "global_step/max_steps": "720/8890", "percentage": "8.10%", "elapsed_time": "15m 4s", "remaining_time": "2h 51m 4s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.795936} {"loss": 0.95124298, "grad_norm": 2.0188601, "learning_rate": 9.974e-05, "token_acc": 0.7300885, "epoch": 0.81102362, "global_step/max_steps": "721/8890", "percentage": "8.11%", "elapsed_time": "15m 5s", "remaining_time": "2h 50m 59s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.796214} {"loss": 1.04909062, "grad_norm": 1.6795336, "learning_rate": 9.973e-05, "token_acc": 0.71277618, "epoch": 0.81214848, "global_step/max_steps": "722/8890", "percentage": "8.12%", "elapsed_time": "15m 6s", "remaining_time": "2h 50m 57s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.796308} {"loss": 0.95255685, "grad_norm": 1.68047976, "learning_rate": 9.973e-05, "token_acc": 0.7195122, "epoch": 0.81327334, "global_step/max_steps": "723/8890", "percentage": "8.13%", "elapsed_time": "15m 7s", "remaining_time": "2h 50m 54s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.796396} {"loss": 0.9758243, "grad_norm": 1.71614969, "learning_rate": 9.973e-05, "token_acc": 0.71495327, "epoch": 0.8143982, "global_step/max_steps": "724/8890", "percentage": "8.14%", "elapsed_time": "15m 8s", "remaining_time": "2h 50m 52s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.79649} {"loss": 0.77313197, "grad_norm": 1.47424269, "learning_rate": 9.973e-05, "token_acc": 0.77810651, "epoch": 0.81552306, "global_step/max_steps": "725/8890", "percentage": "8.16%", "elapsed_time": "15m 10s", "remaining_time": "2h 50m 49s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.79662} {"loss": 0.7582655, "grad_norm": 1.62210786, "learning_rate": 9.973e-05, "token_acc": 0.76430976, "epoch": 0.81664792, "global_step/max_steps": "726/8890", "percentage": "8.17%", "elapsed_time": "15m 11s", "remaining_time": "2h 50m 45s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.796864} {"loss": 0.90563267, "grad_norm": 1.87253606, "learning_rate": 9.973e-05, "token_acc": 0.74352332, "epoch": 0.81777278, "global_step/max_steps": "727/8890", "percentage": "8.18%", "elapsed_time": "15m 12s", "remaining_time": "2h 50m 41s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.797043} {"loss": 0.98190194, "grad_norm": 1.73792744, "learning_rate": 9.972e-05, "token_acc": 0.72919109, "epoch": 0.81889764, "global_step/max_steps": "728/8890", "percentage": "8.19%", "elapsed_time": "15m 13s", "remaining_time": "2h 50m 39s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.797133} {"loss": 0.78167391, "grad_norm": 1.50342798, "learning_rate": 9.972e-05, "token_acc": 0.78375734, "epoch": 0.8200225, "global_step/max_steps": "729/8890", "percentage": "8.20%", "elapsed_time": "15m 14s", "remaining_time": "2h 50m 36s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.797226} {"loss": 0.77189553, "grad_norm": 1.58144319, "learning_rate": 9.972e-05, "token_acc": 0.77920411, "epoch": 0.82114736, "global_step/max_steps": "730/8890", "percentage": "8.21%", "elapsed_time": "15m 15s", "remaining_time": "2h 50m 34s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.797333} {"loss": 0.95439178, "grad_norm": 1.53709042, "learning_rate": 9.972e-05, "token_acc": 0.71708683, "epoch": 0.82227222, "global_step/max_steps": "731/8890", "percentage": "8.22%", "elapsed_time": "15m 16s", "remaining_time": "2h 50m 30s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.797544} {"loss": 0.8479445, "grad_norm": 1.70397592, "learning_rate": 9.972e-05, "token_acc": 0.75206612, "epoch": 0.82339708, "global_step/max_steps": "732/8890", "percentage": "8.23%", "elapsed_time": "15m 17s", "remaining_time": "2h 50m 27s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.797628} {"loss": 1.08226418, "grad_norm": 1.59661615, "learning_rate": 9.971e-05, "token_acc": 0.69621422, "epoch": 0.82452193, "global_step/max_steps": "733/8890", "percentage": "8.25%", "elapsed_time": "15m 18s", "remaining_time": "2h 50m 25s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.797708} {"loss": 0.9742626, "grad_norm": 1.2308141, "learning_rate": 9.971e-05, "token_acc": 0.73286713, "epoch": 0.82564679, "global_step/max_steps": "734/8890", "percentage": "8.26%", "elapsed_time": "15m 20s", "remaining_time": "2h 50m 25s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.797594} {"loss": 1.10393035, "grad_norm": 1.42645085, "learning_rate": 9.971e-05, "token_acc": 0.69285714, "epoch": 0.82677165, "global_step/max_steps": "735/8890", "percentage": "8.27%", "elapsed_time": "15m 21s", "remaining_time": "2h 50m 25s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.797553} {"loss": 0.92828172, "grad_norm": 1.84311259, "learning_rate": 9.971e-05, "token_acc": 0.7397882, "epoch": 0.82789651, "global_step/max_steps": "736/8890", "percentage": "8.28%", "elapsed_time": "15m 22s", "remaining_time": "2h 50m 21s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.797752} {"loss": 0.87249982, "grad_norm": 1.75279152, "learning_rate": 9.971e-05, "token_acc": 0.72948328, "epoch": 0.82902137, "global_step/max_steps": "737/8890", "percentage": "8.29%", "elapsed_time": "15m 23s", "remaining_time": "2h 50m 18s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.797831} {"loss": 0.76584715, "grad_norm": 1.58469975, "learning_rate": 9.97e-05, "token_acc": 0.76696543, "epoch": 0.83014623, "global_step/max_steps": "738/8890", "percentage": "8.30%", "elapsed_time": "15m 24s", "remaining_time": "2h 50m 15s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.798} {"loss": 0.75075412, "grad_norm": 1.18617177, "learning_rate": 9.97e-05, "token_acc": 0.78694818, "epoch": 0.83127109, "global_step/max_steps": "739/8890", "percentage": "8.31%", "elapsed_time": "15m 26s", "remaining_time": "2h 50m 14s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.797948} {"loss": 0.96469426, "grad_norm": 1.54912925, "learning_rate": 9.97e-05, "token_acc": 0.70693277, "epoch": 0.83239595, "global_step/max_steps": "740/8890", "percentage": "8.32%", "elapsed_time": "15m 27s", "remaining_time": "2h 50m 14s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.797872} {"loss": 0.97378415, "grad_norm": 1.53235567, "learning_rate": 9.97e-05, "token_acc": 0.72164948, "epoch": 0.83352081, "global_step/max_steps": "741/8890", "percentage": "8.34%", "elapsed_time": "15m 28s", "remaining_time": "2h 50m 11s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.798035} {"loss": 0.87018895, "grad_norm": 1.5263263, "learning_rate": 9.97e-05, "token_acc": 0.73617021, "epoch": 0.83464567, "global_step/max_steps": "742/8890", "percentage": "8.35%", "elapsed_time": "15m 29s", "remaining_time": "2h 50m 8s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.798175} {"loss": 0.7698884, "grad_norm": 1.19143164, "learning_rate": 9.969e-05, "token_acc": 0.76391097, "epoch": 0.83577053, "global_step/max_steps": "743/8890", "percentage": "8.36%", "elapsed_time": "15m 30s", "remaining_time": "2h 50m 6s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.798193} {"loss": 0.66986746, "grad_norm": 1.40502179, "learning_rate": 9.969e-05, "token_acc": 0.79182156, "epoch": 0.83689539, "global_step/max_steps": "744/8890", "percentage": "8.37%", "elapsed_time": "15m 31s", "remaining_time": "2h 50m 3s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.798365} {"loss": 0.88447213, "grad_norm": 1.61303186, "learning_rate": 9.969e-05, "token_acc": 0.73690476, "epoch": 0.83802025, "global_step/max_steps": "745/8890", "percentage": "8.38%", "elapsed_time": "15m 33s", "remaining_time": "2h 50m 1s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.798423} {"loss": 0.90054154, "grad_norm": 1.46637261, "learning_rate": 9.969e-05, "token_acc": 0.75684211, "epoch": 0.83914511, "global_step/max_steps": "746/8890", "percentage": "8.39%", "elapsed_time": "15m 34s", "remaining_time": "2h 49m 58s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.79851} {"loss": 0.85648012, "grad_norm": 1.42622578, "learning_rate": 9.968e-05, "token_acc": 0.75744212, "epoch": 0.84026997, "global_step/max_steps": "747/8890", "percentage": "8.40%", "elapsed_time": "15m 35s", "remaining_time": "2h 49m 55s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.798676} {"loss": 0.7817294, "grad_norm": 1.45724297, "learning_rate": 9.968e-05, "token_acc": 0.76116303, "epoch": 0.84139483, "global_step/max_steps": "748/8890", "percentage": "8.41%", "elapsed_time": "15m 36s", "remaining_time": "2h 49m 52s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.798847} {"loss": 0.87736595, "grad_norm": 1.51074433, "learning_rate": 9.968e-05, "token_acc": 0.74848851, "epoch": 0.84251969, "global_step/max_steps": "749/8890", "percentage": "8.43%", "elapsed_time": "15m 37s", "remaining_time": "2h 49m 48s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.799008} {"loss": 0.80112064, "grad_norm": 1.57183516, "learning_rate": 9.968e-05, "token_acc": 0.77822581, "epoch": 0.84364454, "global_step/max_steps": "750/8890", "percentage": "8.44%", "elapsed_time": "15m 38s", "remaining_time": "2h 49m 46s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.799093} {"loss": 0.96823734, "grad_norm": 1.45406771, "learning_rate": 9.968e-05, "token_acc": 0.71428571, "epoch": 0.8447694, "global_step/max_steps": "751/8890", "percentage": "8.45%", "elapsed_time": "15m 39s", "remaining_time": "2h 49m 45s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.799064} {"loss": 0.65352505, "grad_norm": 1.35156131, "learning_rate": 9.967e-05, "token_acc": 0.80503145, "epoch": 0.84589426, "global_step/max_steps": "752/8890", "percentage": "8.46%", "elapsed_time": "15m 41s", "remaining_time": "2h 49m 43s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.799132} {"loss": 1.0321703, "grad_norm": 1.5917747, "learning_rate": 9.967e-05, "token_acc": 0.70588235, "epoch": 0.84701912, "global_step/max_steps": "753/8890", "percentage": "8.47%", "elapsed_time": "15m 42s", "remaining_time": "2h 49m 41s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.799216} {"loss": 0.91817564, "grad_norm": 1.63433754, "learning_rate": 9.967e-05, "token_acc": 0.72678762, "epoch": 0.84814398, "global_step/max_steps": "754/8890", "percentage": "8.48%", "elapsed_time": "15m 43s", "remaining_time": "2h 49m 40s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.799147} {"loss": 1.04285038, "grad_norm": 1.89033794, "learning_rate": 9.967e-05, "token_acc": 0.68907563, "epoch": 0.84926884, "global_step/max_steps": "755/8890", "percentage": "8.49%", "elapsed_time": "15m 44s", "remaining_time": "2h 49m 37s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.799279} {"loss": 0.87380278, "grad_norm": 1.6912719, "learning_rate": 9.967e-05, "token_acc": 0.74817518, "epoch": 0.8503937, "global_step/max_steps": "756/8890", "percentage": "8.50%", "elapsed_time": "15m 45s", "remaining_time": "2h 49m 33s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.799518} {"loss": 0.9893558, "grad_norm": 1.51005781, "learning_rate": 9.966e-05, "token_acc": 0.7213254, "epoch": 0.85151856, "global_step/max_steps": "757/8890", "percentage": "8.52%", "elapsed_time": "15m 46s", "remaining_time": "2h 49m 31s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.799584} {"loss": 0.93050557, "grad_norm": 1.767295, "learning_rate": 9.966e-05, "token_acc": 0.72055138, "epoch": 0.85264342, "global_step/max_steps": "758/8890", "percentage": "8.53%", "elapsed_time": "15m 47s", "remaining_time": "2h 49m 29s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.799673} {"loss": 1.0346241, "grad_norm": 1.49478877, "learning_rate": 9.966e-05, "token_acc": 0.70503597, "epoch": 0.85376828, "global_step/max_steps": "759/8890", "percentage": "8.54%", "elapsed_time": "15m 49s", "remaining_time": "2h 49m 26s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.799757} {"loss": 0.98095542, "grad_norm": 1.42161763, "learning_rate": 9.966e-05, "token_acc": 0.73315364, "epoch": 0.85489314, "global_step/max_steps": "760/8890", "percentage": "8.55%", "elapsed_time": "15m 50s", "remaining_time": "2h 49m 24s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.799814} {"loss": 1.07115912, "grad_norm": 1.61614656, "learning_rate": 9.965e-05, "token_acc": 0.69569472, "epoch": 0.856018, "global_step/max_steps": "761/8890", "percentage": "8.56%", "elapsed_time": "15m 51s", "remaining_time": "2h 49m 19s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.800103} {"loss": 0.96388745, "grad_norm": 1.57207644, "learning_rate": 9.965e-05, "token_acc": 0.72757112, "epoch": 0.85714286, "global_step/max_steps": "762/8890", "percentage": "8.57%", "elapsed_time": "15m 52s", "remaining_time": "2h 49m 17s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.80016} {"loss": 0.84935069, "grad_norm": 1.5193857, "learning_rate": 9.965e-05, "token_acc": 0.74836601, "epoch": 0.85826772, "global_step/max_steps": "763/8890", "percentage": "8.58%", "elapsed_time": "15m 53s", "remaining_time": "2h 49m 16s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.800139} {"loss": 0.91928726, "grad_norm": 1.50749934, "learning_rate": 9.965e-05, "token_acc": 0.74219653, "epoch": 0.85939258, "global_step/max_steps": "764/8890", "percentage": "8.59%", "elapsed_time": "15m 54s", "remaining_time": "2h 49m 14s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.800236} {"loss": 0.84670866, "grad_norm": 1.65628099, "learning_rate": 9.965e-05, "token_acc": 0.73869347, "epoch": 0.86051744, "global_step/max_steps": "765/8890", "percentage": "8.61%", "elapsed_time": "15m 55s", "remaining_time": "2h 49m 10s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.800443} {"loss": 1.05871224, "grad_norm": 1.49688625, "learning_rate": 9.964e-05, "token_acc": 0.72958736, "epoch": 0.86164229, "global_step/max_steps": "766/8890", "percentage": "8.62%", "elapsed_time": "15m 56s", "remaining_time": "2h 49m 6s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.800692} {"loss": 0.94710594, "grad_norm": 1.60490608, "learning_rate": 9.964e-05, "token_acc": 0.73248408, "epoch": 0.86276715, "global_step/max_steps": "767/8890", "percentage": "8.63%", "elapsed_time": "15m 57s", "remaining_time": "2h 49m 3s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.800776} {"loss": 0.97453058, "grad_norm": 1.73593533, "learning_rate": 9.964e-05, "token_acc": 0.71113689, "epoch": 0.86389201, "global_step/max_steps": "768/8890", "percentage": "8.64%", "elapsed_time": "15m 58s", "remaining_time": "2h 49m 1s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.800854} {"loss": 1.0290916, "grad_norm": 1.31332397, "learning_rate": 9.964e-05, "token_acc": 0.70833333, "epoch": 0.86501687, "global_step/max_steps": "769/8890", "percentage": "8.65%", "elapsed_time": "16m 0s", "remaining_time": "2h 48m 59s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.800939} {"loss": 0.78912604, "grad_norm": 1.66537738, "learning_rate": 9.964e-05, "token_acc": 0.74884793, "epoch": 0.86614173, "global_step/max_steps": "770/8890", "percentage": "8.66%", "elapsed_time": "16m 1s", "remaining_time": "2h 48m 57s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.801} {"loss": 0.81430948, "grad_norm": 1.49062681, "learning_rate": 9.963e-05, "token_acc": 0.75875876, "epoch": 0.86726659, "global_step/max_steps": "771/8890", "percentage": "8.67%", "elapsed_time": "16m 2s", "remaining_time": "2h 48m 55s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.801082} {"loss": 0.74458915, "grad_norm": 1.59700453, "learning_rate": 9.963e-05, "token_acc": 0.77925532, "epoch": 0.86839145, "global_step/max_steps": "772/8890", "percentage": "8.68%", "elapsed_time": "16m 3s", "remaining_time": "2h 48m 52s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.801174} {"loss": 0.73704171, "grad_norm": 1.46995592, "learning_rate": 9.963e-05, "token_acc": 0.78817204, "epoch": 0.86951631, "global_step/max_steps": "773/8890", "percentage": "8.70%", "elapsed_time": "16m 4s", "remaining_time": "2h 48m 50s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.801261} {"loss": 0.95043719, "grad_norm": 1.51700711, "learning_rate": 9.963e-05, "token_acc": 0.72033024, "epoch": 0.87064117, "global_step/max_steps": "774/8890", "percentage": "8.71%", "elapsed_time": "16m 5s", "remaining_time": "2h 48m 48s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.801336} {"loss": 0.79885817, "grad_norm": 1.74697363, "learning_rate": 9.962e-05, "token_acc": 0.75289017, "epoch": 0.87176603, "global_step/max_steps": "775/8890", "percentage": "8.72%", "elapsed_time": "16m 7s", "remaining_time": "2h 48m 45s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.801437} {"loss": 1.01957607, "grad_norm": 1.69232714, "learning_rate": 9.962e-05, "token_acc": 0.71792153, "epoch": 0.87289089, "global_step/max_steps": "776/8890", "percentage": "8.73%", "elapsed_time": "16m 8s", "remaining_time": "2h 48m 42s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.801607} {"loss": 0.87256467, "grad_norm": 1.57079542, "learning_rate": 9.962e-05, "token_acc": 0.74505239, "epoch": 0.87401575, "global_step/max_steps": "777/8890", "percentage": "8.74%", "elapsed_time": "16m 9s", "remaining_time": "2h 48m 40s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.801679} {"loss": 0.89648712, "grad_norm": 1.53311849, "learning_rate": 9.962e-05, "token_acc": 0.75178147, "epoch": 0.87514061, "global_step/max_steps": "778/8890", "percentage": "8.75%", "elapsed_time": "16m 10s", "remaining_time": "2h 48m 37s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.801762} {"loss": 0.89178908, "grad_norm": 1.57584453, "learning_rate": 9.961e-05, "token_acc": 0.7447479, "epoch": 0.87626547, "global_step/max_steps": "779/8890", "percentage": "8.76%", "elapsed_time": "16m 11s", "remaining_time": "2h 48m 35s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.801839} {"loss": 0.80061185, "grad_norm": 1.32513499, "learning_rate": 9.961e-05, "token_acc": 0.76802218, "epoch": 0.87739033, "global_step/max_steps": "780/8890", "percentage": "8.77%", "elapsed_time": "16m 12s", "remaining_time": "2h 48m 34s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.801799} {"loss": 1.08298886, "grad_norm": 1.38437712, "learning_rate": 9.961e-05, "token_acc": 0.67952014, "epoch": 0.87851519, "global_step/max_steps": "781/8890", "percentage": "8.79%", "elapsed_time": "16m 13s", "remaining_time": "2h 48m 32s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.801886} {"loss": 0.74004996, "grad_norm": 1.55560637, "learning_rate": 9.961e-05, "token_acc": 0.78511236, "epoch": 0.87964004, "global_step/max_steps": "782/8890", "percentage": "8.80%", "elapsed_time": "16m 15s", "remaining_time": "2h 48m 29s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.801981} {"loss": 1.01843214, "grad_norm": 1.45090032, "learning_rate": 9.961e-05, "token_acc": 0.72259295, "epoch": 0.8807649, "global_step/max_steps": "783/8890", "percentage": "8.81%", "elapsed_time": "16m 16s", "remaining_time": "2h 48m 27s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.802045} {"loss": 0.65548909, "grad_norm": 1.52307999, "learning_rate": 9.96e-05, "token_acc": 0.7881997, "epoch": 0.88188976, "global_step/max_steps": "784/8890", "percentage": "8.82%", "elapsed_time": "16m 17s", "remaining_time": "2h 48m 24s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.80218} {"loss": 0.87967998, "grad_norm": 1.50617647, "learning_rate": 9.96e-05, "token_acc": 0.74413146, "epoch": 0.88301462, "global_step/max_steps": "785/8890", "percentage": "8.83%", "elapsed_time": "16m 18s", "remaining_time": "2h 48m 22s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.80226} {"loss": 0.79071981, "grad_norm": 1.47442639, "learning_rate": 9.96e-05, "token_acc": 0.78266178, "epoch": 0.88413948, "global_step/max_steps": "786/8890", "percentage": "8.84%", "elapsed_time": "16m 19s", "remaining_time": "2h 48m 20s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.802363} {"loss": 0.97436953, "grad_norm": 1.44732118, "learning_rate": 9.96e-05, "token_acc": 0.73178138, "epoch": 0.88526434, "global_step/max_steps": "787/8890", "percentage": "8.85%", "elapsed_time": "16m 20s", "remaining_time": "2h 48m 15s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.802645} {"loss": 0.89961213, "grad_norm": 2.05336857, "learning_rate": 9.959e-05, "token_acc": 0.72479564, "epoch": 0.8863892, "global_step/max_steps": "788/8890", "percentage": "8.86%", "elapsed_time": "16m 21s", "remaining_time": "2h 48m 12s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.802746} {"loss": 0.85378087, "grad_norm": 1.49186146, "learning_rate": 9.959e-05, "token_acc": 0.75995694, "epoch": 0.88751406, "global_step/max_steps": "789/8890", "percentage": "8.88%", "elapsed_time": "16m 22s", "remaining_time": "2h 48m 11s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.802724} {"loss": 0.88198429, "grad_norm": 1.50507414, "learning_rate": 9.959e-05, "token_acc": 0.74102285, "epoch": 0.88863892, "global_step/max_steps": "790/8890", "percentage": "8.89%", "elapsed_time": "16m 23s", "remaining_time": "2h 48m 9s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.802851} {"loss": 0.95202088, "grad_norm": 1.51058304, "learning_rate": 9.959e-05, "token_acc": 0.73303671, "epoch": 0.88976378, "global_step/max_steps": "791/8890", "percentage": "8.90%", "elapsed_time": "16m 25s", "remaining_time": "2h 48m 6s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.802945} {"loss": 1.08701038, "grad_norm": 1.60714304, "learning_rate": 9.958e-05, "token_acc": 0.68635875, "epoch": 0.89088864, "global_step/max_steps": "792/8890", "percentage": "8.91%", "elapsed_time": "16m 26s", "remaining_time": "2h 48m 4s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.803037} {"loss": 0.89472115, "grad_norm": 1.48037088, "learning_rate": 9.958e-05, "token_acc": 0.74344569, "epoch": 0.8920135, "global_step/max_steps": "793/8890", "percentage": "8.92%", "elapsed_time": "16m 27s", "remaining_time": "2h 48m 1s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.803133} {"loss": 0.95410383, "grad_norm": 1.44853854, "learning_rate": 9.958e-05, "token_acc": 0.74187192, "epoch": 0.89313836, "global_step/max_steps": "794/8890", "percentage": "8.93%", "elapsed_time": "16m 28s", "remaining_time": "2h 47m 59s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.803183} {"loss": 1.0168035, "grad_norm": 1.56526387, "learning_rate": 9.958e-05, "token_acc": 0.70974359, "epoch": 0.89426322, "global_step/max_steps": "795/8890", "percentage": "8.94%", "elapsed_time": "16m 29s", "remaining_time": "2h 47m 57s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.803288} {"loss": 0.90653926, "grad_norm": 1.74766099, "learning_rate": 9.957e-05, "token_acc": 0.75066667, "epoch": 0.89538808, "global_step/max_steps": "796/8890", "percentage": "8.95%", "elapsed_time": "16m 30s", "remaining_time": "2h 47m 54s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.803384} {"loss": 1.00283265, "grad_norm": 1.57142889, "learning_rate": 9.957e-05, "token_acc": 0.73144531, "epoch": 0.89651294, "global_step/max_steps": "797/8890", "percentage": "8.97%", "elapsed_time": "16m 31s", "remaining_time": "2h 47m 52s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.803451} {"loss": 0.91431028, "grad_norm": 1.57511532, "learning_rate": 9.957e-05, "token_acc": 0.73361976, "epoch": 0.8976378, "global_step/max_steps": "798/8890", "percentage": "8.98%", "elapsed_time": "16m 33s", "remaining_time": "2h 47m 50s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.803562} {"loss": 1.05787301, "grad_norm": 1.60210335, "learning_rate": 9.957e-05, "token_acc": 0.70648816, "epoch": 0.89876265, "global_step/max_steps": "799/8890", "percentage": "8.99%", "elapsed_time": "16m 34s", "remaining_time": "2h 47m 47s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.803662} {"loss": 0.87995517, "grad_norm": 1.56001639, "learning_rate": 9.956e-05, "token_acc": 0.73333333, "epoch": 0.89988751, "global_step/max_steps": "800/8890", "percentage": "9.00%", "elapsed_time": "16m 35s", "remaining_time": "2h 47m 45s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.803749} {"loss": 0.82097679, "grad_norm": 1.64808738, "learning_rate": 9.956e-05, "token_acc": 0.76050955, "epoch": 0.90101237, "global_step/max_steps": "801/8890", "percentage": "9.01%", "elapsed_time": "16m 36s", "remaining_time": "2h 47m 43s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.803799} {"loss": 0.85961401, "grad_norm": 1.43119645, "learning_rate": 9.956e-05, "token_acc": 0.74351297, "epoch": 0.90213723, "global_step/max_steps": "802/8890", "percentage": "9.02%", "elapsed_time": "16m 37s", "remaining_time": "2h 47m 38s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.804076} {"loss": 0.92536688, "grad_norm": 1.41889262, "learning_rate": 9.956e-05, "token_acc": 0.72947368, "epoch": 0.90326209, "global_step/max_steps": "803/8890", "percentage": "9.03%", "elapsed_time": "16m 38s", "remaining_time": "2h 47m 36s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.804135} {"loss": 1.10206008, "grad_norm": 1.48365343, "learning_rate": 9.955e-05, "token_acc": 0.69991015, "epoch": 0.90438695, "global_step/max_steps": "804/8890", "percentage": "9.04%", "elapsed_time": "16m 39s", "remaining_time": "2h 47m 35s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.80411} {"loss": 0.96275216, "grad_norm": 1.64238679, "learning_rate": 9.955e-05, "token_acc": 0.73382173, "epoch": 0.90551181, "global_step/max_steps": "805/8890", "percentage": "9.06%", "elapsed_time": "16m 40s", "remaining_time": "2h 47m 33s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.804208} {"loss": 0.89184737, "grad_norm": 1.30541468, "learning_rate": 9.955e-05, "token_acc": 0.74709562, "epoch": 0.90663667, "global_step/max_steps": "806/8890", "percentage": "9.07%", "elapsed_time": "16m 41s", "remaining_time": "2h 47m 29s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.80441} {"loss": 0.80132443, "grad_norm": 1.58776343, "learning_rate": 9.955e-05, "token_acc": 0.77305825, "epoch": 0.90776153, "global_step/max_steps": "807/8890", "percentage": "9.08%", "elapsed_time": "16m 43s", "remaining_time": "2h 47m 26s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.804574} {"loss": 0.89041907, "grad_norm": 1.89932048, "learning_rate": 9.954e-05, "token_acc": 0.74272588, "epoch": 0.90888639, "global_step/max_steps": "808/8890", "percentage": "9.09%", "elapsed_time": "16m 44s", "remaining_time": "2h 47m 23s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.804694} {"loss": 0.8574959, "grad_norm": 1.5045588, "learning_rate": 9.954e-05, "token_acc": 0.74423077, "epoch": 0.91001125, "global_step/max_steps": "809/8890", "percentage": "9.10%", "elapsed_time": "16m 45s", "remaining_time": "2h 47m 21s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.804775} {"loss": 0.65899009, "grad_norm": 1.39975548, "learning_rate": 9.954e-05, "token_acc": 0.79339853, "epoch": 0.91113611, "global_step/max_steps": "810/8890", "percentage": "9.11%", "elapsed_time": "16m 46s", "remaining_time": "2h 47m 19s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.80483} {"loss": 0.88234091, "grad_norm": 1.71385121, "learning_rate": 9.954e-05, "token_acc": 0.75170532, "epoch": 0.91226097, "global_step/max_steps": "811/8890", "percentage": "9.12%", "elapsed_time": "16m 47s", "remaining_time": "2h 47m 17s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.804914} {"loss": 0.77473712, "grad_norm": 1.61374187, "learning_rate": 9.953e-05, "token_acc": 0.76353276, "epoch": 0.91338583, "global_step/max_steps": "812/8890", "percentage": "9.13%", "elapsed_time": "16m 48s", "remaining_time": "2h 47m 14s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.805043} {"loss": 0.68859178, "grad_norm": 1.44301021, "learning_rate": 9.953e-05, "token_acc": 0.77117573, "epoch": 0.91451069, "global_step/max_steps": "813/8890", "percentage": "9.15%", "elapsed_time": "16m 49s", "remaining_time": "2h 47m 11s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.805144} {"loss": 0.8961761, "grad_norm": 1.5991658, "learning_rate": 9.953e-05, "token_acc": 0.73333333, "epoch": 0.91563555, "global_step/max_steps": "814/8890", "percentage": "9.16%", "elapsed_time": "16m 50s", "remaining_time": "2h 47m 8s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.805288} {"loss": 0.85115623, "grad_norm": 1.50393927, "learning_rate": 9.953e-05, "token_acc": 0.73033708, "epoch": 0.9167604, "global_step/max_steps": "815/8890", "percentage": "9.17%", "elapsed_time": "16m 52s", "remaining_time": "2h 47m 8s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.805234} {"loss": 0.90734416, "grad_norm": 1.6683749, "learning_rate": 9.952e-05, "token_acc": 0.74949698, "epoch": 0.91788526, "global_step/max_steps": "816/8890", "percentage": "9.18%", "elapsed_time": "16m 53s", "remaining_time": "2h 47m 5s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.805325} {"loss": 0.97187799, "grad_norm": 1.59012163, "learning_rate": 9.952e-05, "token_acc": 0.72736521, "epoch": 0.91901012, "global_step/max_steps": "817/8890", "percentage": "9.19%", "elapsed_time": "16m 54s", "remaining_time": "2h 47m 5s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.805244} {"loss": 0.88494229, "grad_norm": 1.50112128, "learning_rate": 9.952e-05, "token_acc": 0.73809524, "epoch": 0.92013498, "global_step/max_steps": "818/8890", "percentage": "9.20%", "elapsed_time": "16m 55s", "remaining_time": "2h 47m 3s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.805341} {"loss": 1.03186119, "grad_norm": 1.60687327, "learning_rate": 9.952e-05, "token_acc": 0.70734463, "epoch": 0.92125984, "global_step/max_steps": "819/8890", "percentage": "9.21%", "elapsed_time": "16m 56s", "remaining_time": "2h 47m 1s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.805407} {"loss": 0.81539571, "grad_norm": 1.8417269, "learning_rate": 9.951e-05, "token_acc": 0.73542601, "epoch": 0.9223847, "global_step/max_steps": "820/8890", "percentage": "9.22%", "elapsed_time": "16m 57s", "remaining_time": "2h 46m 55s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.805756} {"loss": 1.02384984, "grad_norm": 1.8921175, "learning_rate": 9.951e-05, "token_acc": 0.70672389, "epoch": 0.92350956, "global_step/max_steps": "821/8890", "percentage": "9.24%", "elapsed_time": "16m 58s", "remaining_time": "2h 46m 51s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.805935} {"loss": 0.87190062, "grad_norm": 1.58075166, "learning_rate": 9.951e-05, "token_acc": 0.75135722, "epoch": 0.92463442, "global_step/max_steps": "822/8890", "percentage": "9.25%", "elapsed_time": "16m 59s", "remaining_time": "2h 46m 48s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.806102} {"loss": 1.01621318, "grad_norm": 1.61294878, "learning_rate": 9.951e-05, "token_acc": 0.7183908, "epoch": 0.92575928, "global_step/max_steps": "823/8890", "percentage": "9.26%", "elapsed_time": "17m 1s", "remaining_time": "2h 46m 47s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.806063} {"loss": 1.01222992, "grad_norm": 1.67307162, "learning_rate": 9.95e-05, "token_acc": 0.7016129, "epoch": 0.92688414, "global_step/max_steps": "824/8890", "percentage": "9.27%", "elapsed_time": "17m 2s", "remaining_time": "2h 46m 45s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.806137} {"loss": 0.83182967, "grad_norm": 1.71881402, "learning_rate": 9.95e-05, "token_acc": 0.76645161, "epoch": 0.928009, "global_step/max_steps": "825/8890", "percentage": "9.28%", "elapsed_time": "17m 3s", "remaining_time": "2h 46m 43s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.806198} {"loss": 0.73390061, "grad_norm": 1.49706888, "learning_rate": 9.95e-05, "token_acc": 0.77452229, "epoch": 0.92913386, "global_step/max_steps": "826/8890", "percentage": "9.29%", "elapsed_time": "17m 4s", "remaining_time": "2h 46m 41s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.80628} {"loss": 0.94533771, "grad_norm": 1.60734999, "learning_rate": 9.95e-05, "token_acc": 0.72599784, "epoch": 0.93025872, "global_step/max_steps": "827/8890", "percentage": "9.30%", "elapsed_time": "17m 5s", "remaining_time": "2h 46m 37s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.806507} {"loss": 1.02332222, "grad_norm": 1.53701961, "learning_rate": 9.949e-05, "token_acc": 0.74074074, "epoch": 0.93138358, "global_step/max_steps": "828/8890", "percentage": "9.31%", "elapsed_time": "17m 6s", "remaining_time": "2h 46m 34s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.806626} {"loss": 0.75915933, "grad_norm": 1.46994984, "learning_rate": 9.949e-05, "token_acc": 0.77827548, "epoch": 0.93250844, "global_step/max_steps": "829/8890", "percentage": "9.33%", "elapsed_time": "17m 7s", "remaining_time": "2h 46m 32s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.80669} {"loss": 0.79165077, "grad_norm": 1.45499575, "learning_rate": 9.949e-05, "token_acc": 0.76586889, "epoch": 0.9336333, "global_step/max_steps": "830/8890", "percentage": "9.34%", "elapsed_time": "17m 8s", "remaining_time": "2h 46m 30s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.806752} {"loss": 0.96038842, "grad_norm": 1.51184833, "learning_rate": 9.949e-05, "token_acc": 0.73139535, "epoch": 0.93475816, "global_step/max_steps": "831/8890", "percentage": "9.35%", "elapsed_time": "17m 9s", "remaining_time": "2h 46m 28s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.806839} {"loss": 0.941769, "grad_norm": 1.5936923, "learning_rate": 9.948e-05, "token_acc": 0.72166998, "epoch": 0.93588301, "global_step/max_steps": "832/8890", "percentage": "9.36%", "elapsed_time": "17m 11s", "remaining_time": "2h 46m 26s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.806918} {"loss": 0.69139159, "grad_norm": 1.63033676, "learning_rate": 9.948e-05, "token_acc": 0.78507463, "epoch": 0.93700787, "global_step/max_steps": "833/8890", "percentage": "9.37%", "elapsed_time": "17m 12s", "remaining_time": "2h 46m 23s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.807011} {"loss": 0.92930084, "grad_norm": 1.65949488, "learning_rate": 9.948e-05, "token_acc": 0.7232267, "epoch": 0.93813273, "global_step/max_steps": "834/8890", "percentage": "9.38%", "elapsed_time": "17m 13s", "remaining_time": "2h 46m 21s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.807066} {"loss": 0.86676544, "grad_norm": 1.42367554, "learning_rate": 9.947e-05, "token_acc": 0.7429171, "epoch": 0.93925759, "global_step/max_steps": "835/8890", "percentage": "9.39%", "elapsed_time": "17m 14s", "remaining_time": "2h 46m 18s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.807211} {"loss": 0.90658975, "grad_norm": 1.38632655, "learning_rate": 9.947e-05, "token_acc": 0.74791319, "epoch": 0.94038245, "global_step/max_steps": "836/8890", "percentage": "9.40%", "elapsed_time": "17m 15s", "remaining_time": "2h 46m 16s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.80729} {"loss": 1.06713092, "grad_norm": 1.46307206, "learning_rate": 9.947e-05, "token_acc": 0.7097561, "epoch": 0.94150731, "global_step/max_steps": "837/8890", "percentage": "9.42%", "elapsed_time": "17m 16s", "remaining_time": "2h 46m 15s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.807252} {"loss": 0.8739484, "grad_norm": 1.52715206, "learning_rate": 9.947e-05, "token_acc": 0.73507463, "epoch": 0.94263217, "global_step/max_steps": "838/8890", "percentage": "9.43%", "elapsed_time": "17m 17s", "remaining_time": "2h 46m 13s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.807334} {"loss": 0.87663203, "grad_norm": 1.54761708, "learning_rate": 9.946e-05, "token_acc": 0.7497006, "epoch": 0.94375703, "global_step/max_steps": "839/8890", "percentage": "9.44%", "elapsed_time": "17m 19s", "remaining_time": "2h 46m 10s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.807488} {"loss": 0.77247375, "grad_norm": 1.72699082, "learning_rate": 9.946e-05, "token_acc": 0.75642965, "epoch": 0.94488189, "global_step/max_steps": "840/8890", "percentage": "9.45%", "elapsed_time": "17m 20s", "remaining_time": "2h 46m 14s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.807055} {"loss": 0.78386086, "grad_norm": 1.57973027, "learning_rate": 9.946e-05, "token_acc": 0.76379066, "epoch": 0.94600675, "global_step/max_steps": "841/8890", "percentage": "9.46%", "elapsed_time": "17m 21s", "remaining_time": "2h 46m 12s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.807118} {"loss": 0.68837345, "grad_norm": 1.57203245, "learning_rate": 9.946e-05, "token_acc": 0.80246914, "epoch": 0.94713161, "global_step/max_steps": "842/8890", "percentage": "9.47%", "elapsed_time": "17m 22s", "remaining_time": "2h 46m 9s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.8073} {"loss": 0.92788756, "grad_norm": 1.83173096, "learning_rate": 9.945e-05, "token_acc": 0.72868217, "epoch": 0.94825647, "global_step/max_steps": "843/8890", "percentage": "9.48%", "elapsed_time": "17m 24s", "remaining_time": "2h 46m 6s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.807445} {"loss": 0.75605911, "grad_norm": 1.41129172, "learning_rate": 9.945e-05, "token_acc": 0.78403756, "epoch": 0.94938133, "global_step/max_steps": "844/8890", "percentage": "9.49%", "elapsed_time": "17m 25s", "remaining_time": "2h 46m 3s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.807536} {"loss": 0.89975071, "grad_norm": 1.75522971, "learning_rate": 9.945e-05, "token_acc": 0.72645161, "epoch": 0.95050619, "global_step/max_steps": "845/8890", "percentage": "9.51%", "elapsed_time": "17m 26s", "remaining_time": "2h 46m 1s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.807614} {"loss": 0.88920271, "grad_norm": 1.68352425, "learning_rate": 9.944e-05, "token_acc": 0.73826458, "epoch": 0.95163105, "global_step/max_steps": "846/8890", "percentage": "9.52%", "elapsed_time": "17m 27s", "remaining_time": "2h 45m 59s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.807711} {"loss": 0.85749555, "grad_norm": 1.43758202, "learning_rate": 9.944e-05, "token_acc": 0.74772036, "epoch": 0.95275591, "global_step/max_steps": "847/8890", "percentage": "9.53%", "elapsed_time": "17m 28s", "remaining_time": "2h 45m 57s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.807772} {"loss": 0.85486382, "grad_norm": 1.68253422, "learning_rate": 9.944e-05, "token_acc": 0.75, "epoch": 0.95388076, "global_step/max_steps": "848/8890", "percentage": "9.54%", "elapsed_time": "17m 29s", "remaining_time": "2h 45m 53s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.807943} {"loss": 1.02468038, "grad_norm": 1.10523987, "learning_rate": 9.944e-05, "token_acc": 0.74076655, "epoch": 0.95500562, "global_step/max_steps": "849/8890", "percentage": "9.55%", "elapsed_time": "17m 30s", "remaining_time": "2h 45m 53s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.807871} {"loss": 1.00264382, "grad_norm": 1.44575012, "learning_rate": 9.943e-05, "token_acc": 0.73563218, "epoch": 0.95613048, "global_step/max_steps": "850/8890", "percentage": "9.56%", "elapsed_time": "17m 31s", "remaining_time": "2h 45m 50s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.808011} {"loss": 0.82949972, "grad_norm": 1.65150249, "learning_rate": 9.943e-05, "token_acc": 0.75244755, "epoch": 0.95725534, "global_step/max_steps": "851/8890", "percentage": "9.57%", "elapsed_time": "17m 33s", "remaining_time": "2h 45m 48s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.808091} {"loss": 0.95400882, "grad_norm": 1.56581831, "learning_rate": 9.943e-05, "token_acc": 0.71316964, "epoch": 0.9583802, "global_step/max_steps": "852/8890", "percentage": "9.58%", "elapsed_time": "17m 34s", "remaining_time": "2h 45m 45s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.80818} {"loss": 1.08978117, "grad_norm": 1.84950626, "learning_rate": 9.943e-05, "token_acc": 0.69745958, "epoch": 0.95950506, "global_step/max_steps": "853/8890", "percentage": "9.60%", "elapsed_time": "17m 35s", "remaining_time": "2h 45m 41s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.808409} {"loss": 1.08215344, "grad_norm": 1.80981147, "learning_rate": 9.942e-05, "token_acc": 0.7007722, "epoch": 0.96062992, "global_step/max_steps": "854/8890", "percentage": "9.61%", "elapsed_time": "17m 36s", "remaining_time": "2h 45m 39s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.808492} {"loss": 1.00160587, "grad_norm": 1.45957005, "learning_rate": 9.942e-05, "token_acc": 0.7005291, "epoch": 0.96175478, "global_step/max_steps": "855/8890", "percentage": "9.62%", "elapsed_time": "17m 37s", "remaining_time": "2h 45m 37s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.808552} {"loss": 0.71830308, "grad_norm": 1.72227836, "learning_rate": 9.942e-05, "token_acc": 0.7496063, "epoch": 0.96287964, "global_step/max_steps": "856/8890", "percentage": "9.63%", "elapsed_time": "17m 38s", "remaining_time": "2h 45m 35s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.808631} {"loss": 0.82230568, "grad_norm": 1.54521132, "learning_rate": 9.941e-05, "token_acc": 0.75794621, "epoch": 0.9640045, "global_step/max_steps": "857/8890", "percentage": "9.64%", "elapsed_time": "17m 39s", "remaining_time": "2h 45m 33s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.808707} {"loss": 0.94932246, "grad_norm": 1.56012404, "learning_rate": 9.941e-05, "token_acc": 0.73503326, "epoch": 0.96512936, "global_step/max_steps": "858/8890", "percentage": "9.65%", "elapsed_time": "17m 40s", "remaining_time": "2h 45m 31s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.808768} {"loss": 0.90756702, "grad_norm": 1.7973938, "learning_rate": 9.941e-05, "token_acc": 0.73278237, "epoch": 0.96625422, "global_step/max_steps": "859/8890", "percentage": "9.66%", "elapsed_time": "17m 41s", "remaining_time": "2h 45m 28s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.808855} {"loss": 0.78529942, "grad_norm": 1.39138234, "learning_rate": 9.941e-05, "token_acc": 0.78674948, "epoch": 0.96737908, "global_step/max_steps": "860/8890", "percentage": "9.67%", "elapsed_time": "17m 43s", "remaining_time": "2h 45m 25s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.808995} {"loss": 0.84050757, "grad_norm": 1.33024859, "learning_rate": 9.94e-05, "token_acc": 0.75461255, "epoch": 0.96850394, "global_step/max_steps": "861/8890", "percentage": "9.69%", "elapsed_time": "17m 44s", "remaining_time": "2h 45m 23s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.809053} {"loss": 0.92978251, "grad_norm": 1.55347705, "learning_rate": 9.94e-05, "token_acc": 0.72923777, "epoch": 0.9696288, "global_step/max_steps": "862/8890", "percentage": "9.70%", "elapsed_time": "17m 45s", "remaining_time": "2h 45m 21s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.809166} {"loss": 0.92286575, "grad_norm": 1.17047048, "learning_rate": 9.94e-05, "token_acc": 0.74068768, "epoch": 0.97075366, "global_step/max_steps": "863/8890", "percentage": "9.71%", "elapsed_time": "17m 46s", "remaining_time": "2h 45m 19s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.809247} {"loss": 0.90707517, "grad_norm": 1.60448813, "learning_rate": 9.939e-05, "token_acc": 0.7403599, "epoch": 0.97187852, "global_step/max_steps": "864/8890", "percentage": "9.72%", "elapsed_time": "17m 47s", "remaining_time": "2h 45m 17s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.809286} {"loss": 0.72568691, "grad_norm": 1.32362235, "learning_rate": 9.939e-05, "token_acc": 0.77803204, "epoch": 0.97300337, "global_step/max_steps": "865/8890", "percentage": "9.73%", "elapsed_time": "17m 48s", "remaining_time": "2h 45m 15s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.809363} {"loss": 0.78387702, "grad_norm": 1.60741949, "learning_rate": 9.939e-05, "token_acc": 0.74442793, "epoch": 0.97412823, "global_step/max_steps": "866/8890", "percentage": "9.74%", "elapsed_time": "17m 49s", "remaining_time": "2h 45m 13s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.809422} {"loss": 0.92436814, "grad_norm": 1.56482553, "learning_rate": 9.939e-05, "token_acc": 0.72325581, "epoch": 0.97525309, "global_step/max_steps": "867/8890", "percentage": "9.75%", "elapsed_time": "17m 51s", "remaining_time": "2h 45m 11s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.809486} {"loss": 1.03534698, "grad_norm": 1.65473485, "learning_rate": 9.938e-05, "token_acc": 0.71052632, "epoch": 0.97637795, "global_step/max_steps": "868/8890", "percentage": "9.76%", "elapsed_time": "17m 52s", "remaining_time": "2h 45m 9s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.809543} {"loss": 0.86637229, "grad_norm": 1.4519192, "learning_rate": 9.938e-05, "token_acc": 0.76117775, "epoch": 0.97750281, "global_step/max_steps": "869/8890", "percentage": "9.78%", "elapsed_time": "17m 53s", "remaining_time": "2h 45m 6s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.809636} {"loss": 0.72062325, "grad_norm": 1.58022463, "learning_rate": 9.938e-05, "token_acc": 0.7818448, "epoch": 0.97862767, "global_step/max_steps": "870/8890", "percentage": "9.79%", "elapsed_time": "17m 54s", "remaining_time": "2h 45m 4s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.809767} {"loss": 0.84324563, "grad_norm": 1.62096334, "learning_rate": 9.937e-05, "token_acc": 0.75221239, "epoch": 0.97975253, "global_step/max_steps": "871/8890", "percentage": "9.80%", "elapsed_time": "17m 55s", "remaining_time": "2h 45m 2s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.809824} {"loss": 0.92498726, "grad_norm": 1.53974247, "learning_rate": 9.937e-05, "token_acc": 0.73086124, "epoch": 0.98087739, "global_step/max_steps": "872/8890", "percentage": "9.81%", "elapsed_time": "17m 56s", "remaining_time": "2h 44m 59s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.809916} {"loss": 1.07390618, "grad_norm": 1.58757973, "learning_rate": 9.937e-05, "token_acc": 0.70823529, "epoch": 0.98200225, "global_step/max_steps": "873/8890", "percentage": "9.82%", "elapsed_time": "17m 57s", "remaining_time": "2h 44m 57s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.80998} {"loss": 0.75858378, "grad_norm": 1.58988702, "learning_rate": 9.936e-05, "token_acc": 0.77415307, "epoch": 0.98312711, "global_step/max_steps": "874/8890", "percentage": "9.83%", "elapsed_time": "17m 58s", "remaining_time": "2h 44m 54s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.810112} {"loss": 0.86893314, "grad_norm": 1.68658793, "learning_rate": 9.936e-05, "token_acc": 0.72624799, "epoch": 0.98425197, "global_step/max_steps": "875/8890", "percentage": "9.84%", "elapsed_time": "17m 59s", "remaining_time": "2h 44m 52s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.81024} {"loss": 0.89655763, "grad_norm": 1.57992172, "learning_rate": 9.936e-05, "token_acc": 0.74300254, "epoch": 0.98537683, "global_step/max_steps": "876/8890", "percentage": "9.85%", "elapsed_time": "18m 1s", "remaining_time": "2h 44m 50s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.810307} {"loss": 0.92575747, "grad_norm": 1.57239175, "learning_rate": 9.936e-05, "token_acc": 0.72597403, "epoch": 0.98650169, "global_step/max_steps": "877/8890", "percentage": "9.87%", "elapsed_time": "18m 2s", "remaining_time": "2h 44m 48s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.810356} {"loss": 1.03424597, "grad_norm": 1.62393069, "learning_rate": 9.935e-05, "token_acc": 0.72142857, "epoch": 0.98762655, "global_step/max_steps": "878/8890", "percentage": "9.88%", "elapsed_time": "18m 3s", "remaining_time": "2h 44m 45s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.810512} {"loss": 0.79166263, "grad_norm": 1.37977314, "learning_rate": 9.935e-05, "token_acc": 0.75194661, "epoch": 0.98875141, "global_step/max_steps": "879/8890", "percentage": "9.89%", "elapsed_time": "18m 4s", "remaining_time": "2h 44m 41s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.810668} {"loss": 0.8159436, "grad_norm": 1.72158849, "learning_rate": 9.935e-05, "token_acc": 0.75903614, "epoch": 0.98987627, "global_step/max_steps": "880/8890", "percentage": "9.90%", "elapsed_time": "18m 5s", "remaining_time": "2h 44m 39s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.810759} {"loss": 0.7226643, "grad_norm": 1.28062308, "learning_rate": 9.934e-05, "token_acc": 0.78516058, "epoch": 0.99100112, "global_step/max_steps": "881/8890", "percentage": "9.91%", "elapsed_time": "18m 6s", "remaining_time": "2h 44m 37s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.810824} {"loss": 0.68499589, "grad_norm": 1.53581452, "learning_rate": 9.934e-05, "token_acc": 0.78205128, "epoch": 0.99212598, "global_step/max_steps": "882/8890", "percentage": "9.92%", "elapsed_time": "18m 7s", "remaining_time": "2h 44m 35s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.810889} {"loss": 0.92004204, "grad_norm": 1.73393321, "learning_rate": 9.934e-05, "token_acc": 0.72506739, "epoch": 0.99325084, "global_step/max_steps": "883/8890", "percentage": "9.93%", "elapsed_time": "18m 8s", "remaining_time": "2h 44m 33s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.81093} {"loss": 0.91515523, "grad_norm": 1.4197396, "learning_rate": 9.933e-05, "token_acc": 0.74463739, "epoch": 0.9943757, "global_step/max_steps": "884/8890", "percentage": "9.94%", "elapsed_time": "18m 10s", "remaining_time": "2h 44m 32s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.810904} {"loss": 0.82821178, "grad_norm": 1.35687029, "learning_rate": 9.933e-05, "token_acc": 0.76428571, "epoch": 0.99550056, "global_step/max_steps": "885/8890", "percentage": "9.96%", "elapsed_time": "18m 11s", "remaining_time": "2h 44m 29s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.811065} {"loss": 0.82218981, "grad_norm": 1.39852893, "learning_rate": 9.933e-05, "token_acc": 0.7654321, "epoch": 0.99662542, "global_step/max_steps": "886/8890", "percentage": "9.97%", "elapsed_time": "18m 12s", "remaining_time": "2h 44m 27s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.811154} {"loss": 0.90026087, "grad_norm": 1.56051409, "learning_rate": 9.933e-05, "token_acc": 0.74065685, "epoch": 0.99775028, "global_step/max_steps": "887/8890", "percentage": "9.98%", "elapsed_time": "18m 13s", "remaining_time": "2h 44m 26s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.81113} {"loss": 0.78718096, "grad_norm": 1.38198805, "learning_rate": 9.932e-05, "token_acc": 0.76266137, "epoch": 0.99887514, "global_step/max_steps": "888/8890", "percentage": "9.99%", "elapsed_time": "18m 14s", "remaining_time": "2h 44m 25s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.811079} {"loss": 0.83235157, "grad_norm": 1.50470042, "learning_rate": 9.932e-05, "token_acc": 0.75095785, "epoch": 1.0, "global_step/max_steps": "889/8890", "percentage": "10.00%", "elapsed_time": "18m 15s", "remaining_time": "2h 44m 23s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.811162} {"loss": 0.81513655, "grad_norm": 1.38861942, "learning_rate": 9.932e-05, "token_acc": 0.76120959, "epoch": 1.00112486, "global_step/max_steps": "890/8890", "percentage": "10.01%", "elapsed_time": "18m 17s", "remaining_time": "2h 44m 26s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.810789} {"loss": 0.6345759, "grad_norm": 1.23334587, "learning_rate": 9.931e-05, "token_acc": 0.7991453, "epoch": 1.00224972, "global_step/max_steps": "891/8890", "percentage": "10.02%", "elapsed_time": "18m 18s", "remaining_time": "2h 44m 25s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.810844} {"loss": 0.69632065, "grad_norm": 1.45699501, "learning_rate": 9.931e-05, "token_acc": 0.78891821, "epoch": 1.00337458, "global_step/max_steps": "892/8890", "percentage": "10.03%", "elapsed_time": "18m 20s", "remaining_time": "2h 44m 23s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.810897} {"loss": 0.55355954, "grad_norm": 1.20904899, "learning_rate": 9.931e-05, "token_acc": 0.82460137, "epoch": 1.00449944, "global_step/max_steps": "893/8890", "percentage": "10.04%", "elapsed_time": "18m 21s", "remaining_time": "2h 44m 22s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.810835} {"loss": 0.7547068, "grad_norm": 1.54407907, "learning_rate": 9.93e-05, "token_acc": 0.77083333, "epoch": 1.0056243, "global_step/max_steps": "894/8890", "percentage": "10.06%", "elapsed_time": "18m 22s", "remaining_time": "2h 44m 20s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.810893} {"loss": 0.80791569, "grad_norm": 1.51585042, "learning_rate": 9.93e-05, "token_acc": 0.75771971, "epoch": 1.00674916, "global_step/max_steps": "895/8890", "percentage": "10.07%", "elapsed_time": "18m 23s", "remaining_time": "2h 44m 18s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.81097} {"loss": 0.78264678, "grad_norm": 1.43394339, "learning_rate": 9.93e-05, "token_acc": 0.76528384, "epoch": 1.00787402, "global_step/max_steps": "896/8890", "percentage": "10.08%", "elapsed_time": "18m 24s", "remaining_time": "2h 44m 16s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.81104} {"loss": 0.83417976, "grad_norm": 1.48197508, "learning_rate": 9.929e-05, "token_acc": 0.7387234, "epoch": 1.00899888, "global_step/max_steps": "897/8890", "percentage": "10.09%", "elapsed_time": "18m 25s", "remaining_time": "2h 44m 14s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.811118} {"loss": 0.68376344, "grad_norm": 1.83568323, "learning_rate": 9.929e-05, "token_acc": 0.77433004, "epoch": 1.01012373, "global_step/max_steps": "898/8890", "percentage": "10.10%", "elapsed_time": "18m 27s", "remaining_time": "2h 44m 12s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.811193} {"loss": 0.87847877, "grad_norm": 1.58930922, "learning_rate": 9.929e-05, "token_acc": 0.75538793, "epoch": 1.01124859, "global_step/max_steps": "899/8890", "percentage": "10.11%", "elapsed_time": "18m 28s", "remaining_time": "2h 44m 10s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.811243} {"loss": 0.69583642, "grad_norm": 1.66077542, "learning_rate": 9.929e-05, "token_acc": 0.77617801, "epoch": 1.01237345, "global_step/max_steps": "900/8890", "percentage": "10.12%", "elapsed_time": "18m 29s", "remaining_time": "2h 44m 8s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.811285} {"eval_loss": 0.90412235, "eval_runtime": 31.9567, "eval_samples_per_second": 25.128, "eval_steps_per_second": 3.161, "eval_token_acc": 0.73689906, "epoch": 1.01237345, "global_step/max_steps": "900/8890", "percentage": "10.12%", "elapsed_time": "19m 1s", "remaining_time": "2h 48m 52s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.788563} {"loss": 0.82215756, "grad_norm": 1.51283789, "learning_rate": 9.928e-05, "token_acc": 0.75301205, "epoch": 1.01349831, "global_step/max_steps": "901/8890", "percentage": "10.13%", "elapsed_time": "19m 16s", "remaining_time": "2h 50m 56s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.778946} {"loss": 0.88449609, "grad_norm": 2.06710887, "learning_rate": 9.928e-05, "token_acc": 0.72830725, "epoch": 1.01462317, "global_step/max_steps": "902/8890", "percentage": "10.15%", "elapsed_time": "19m 17s", "remaining_time": "2h 50m 53s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.779039} {"loss": 1.00080442, "grad_norm": 1.86684597, "learning_rate": 9.928e-05, "token_acc": 0.71578947, "epoch": 1.01574803, "global_step/max_steps": "903/8890", "percentage": "10.16%", "elapsed_time": "19m 18s", "remaining_time": "2h 50m 50s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.779147} {"loss": 0.88989687, "grad_norm": 1.53169596, "learning_rate": 9.927e-05, "token_acc": 0.76449275, "epoch": 1.01687289, "global_step/max_steps": "904/8890", "percentage": "10.17%", "elapsed_time": "19m 20s", "remaining_time": "2h 50m 47s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.779306} {"loss": 0.78921819, "grad_norm": 1.46765757, "learning_rate": 9.927e-05, "token_acc": 0.7645479, "epoch": 1.01799775, "global_step/max_steps": "905/8890", "percentage": "10.18%", "elapsed_time": "19m 21s", "remaining_time": "2h 50m 46s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.779289} {"loss": 0.69523382, "grad_norm": 1.68150711, "learning_rate": 9.927e-05, "token_acc": 0.80048368, "epoch": 1.01912261, "global_step/max_steps": "906/8890", "percentage": "10.19%", "elapsed_time": "19m 22s", "remaining_time": "2h 50m 44s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.779365} {"loss": 0.86827809, "grad_norm": 1.94433892, "learning_rate": 9.926e-05, "token_acc": 0.74375821, "epoch": 1.02024747, "global_step/max_steps": "907/8890", "percentage": "10.20%", "elapsed_time": "19m 23s", "remaining_time": "2h 50m 38s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.779709} {"loss": 0.66751671, "grad_norm": 1.69788074, "learning_rate": 9.926e-05, "token_acc": 0.78940887, "epoch": 1.02137233, "global_step/max_steps": "908/8890", "percentage": "10.21%", "elapsed_time": "19m 24s", "remaining_time": "2h 50m 35s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.779797} {"loss": 0.74722242, "grad_norm": 1.58305848, "learning_rate": 9.926e-05, "token_acc": 0.76818182, "epoch": 1.02249719, "global_step/max_steps": "909/8890", "percentage": "10.22%", "elapsed_time": "19m 26s", "remaining_time": "2h 50m 39s", "memory(GiB)": 22.82, "train_speed(iter/s)": 0.77942} {"loss": 0.70856577, "grad_norm": 1.64926946, "learning_rate": 9.925e-05, "token_acc": 0.77710843, "epoch": 1.02362205, "global_step/max_steps": "910/8890", "percentage": "10.24%", "elapsed_time": "19m 27s", "remaining_time": "2h 50m 37s", "memory(GiB)": 22.92, "train_speed(iter/s)": 0.779491} {"loss": 0.71047336, "grad_norm": 1.69343245, "learning_rate": 9.925e-05, "token_acc": 0.79719388, "epoch": 1.02474691, "global_step/max_steps": "911/8890", "percentage": "10.25%", "elapsed_time": "19m 28s", "remaining_time": "2h 50m 36s", "memory(GiB)": 22.92, "train_speed(iter/s)": 0.779428} {"loss": 0.70885378, "grad_norm": 1.69967103, "learning_rate": 9.925e-05, "token_acc": 0.79306358, "epoch": 1.02587177, "global_step/max_steps": "912/8890", "percentage": "10.26%", "elapsed_time": "19m 29s", "remaining_time": "2h 50m 33s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.779612} {"loss": 0.79873061, "grad_norm": 1.59444189, "learning_rate": 9.924e-05, "token_acc": 0.74507659, "epoch": 1.02699663, "global_step/max_steps": "913/8890", "percentage": "10.27%", "elapsed_time": "19m 31s", "remaining_time": "2h 50m 32s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.77958} {"loss": 0.78883666, "grad_norm": 1.62251091, "learning_rate": 9.924e-05, "token_acc": 0.76947368, "epoch": 1.02812148, "global_step/max_steps": "914/8890", "percentage": "10.28%", "elapsed_time": "19m 32s", "remaining_time": "2h 50m 29s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.779669} {"loss": 0.58292216, "grad_norm": 1.61656952, "learning_rate": 9.924e-05, "token_acc": 0.8097561, "epoch": 1.02924634, "global_step/max_steps": "915/8890", "percentage": "10.29%", "elapsed_time": "19m 33s", "remaining_time": "2h 50m 26s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.779825} {"loss": 0.86457998, "grad_norm": 1.87629116, "learning_rate": 9.923e-05, "token_acc": 0.75061425, "epoch": 1.0303712, "global_step/max_steps": "916/8890", "percentage": "10.30%", "elapsed_time": "19m 34s", "remaining_time": "2h 50m 23s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.779981} {"loss": 0.85327578, "grad_norm": 1.79170907, "learning_rate": 9.923e-05, "token_acc": 0.7480916, "epoch": 1.03149606, "global_step/max_steps": "917/8890", "percentage": "10.31%", "elapsed_time": "19m 35s", "remaining_time": "2h 50m 20s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780077} {"loss": 0.7383064, "grad_norm": 1.61617482, "learning_rate": 9.923e-05, "token_acc": 0.77663934, "epoch": 1.03262092, "global_step/max_steps": "918/8890", "percentage": "10.33%", "elapsed_time": "19m 36s", "remaining_time": "2h 50m 17s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780207} {"loss": 0.88935018, "grad_norm": 1.94336092, "learning_rate": 9.922e-05, "token_acc": 0.74810127, "epoch": 1.03374578, "global_step/max_steps": "919/8890", "percentage": "10.34%", "elapsed_time": "19m 37s", "remaining_time": "2h 50m 15s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780312} {"loss": 0.78625798, "grad_norm": 1.8638016, "learning_rate": 9.922e-05, "token_acc": 0.75857988, "epoch": 1.03487064, "global_step/max_steps": "920/8890", "percentage": "10.35%", "elapsed_time": "19m 38s", "remaining_time": "2h 50m 11s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780496} {"loss": 0.86531937, "grad_norm": 1.70839095, "learning_rate": 9.922e-05, "token_acc": 0.74735831, "epoch": 1.0359955, "global_step/max_steps": "921/8890", "percentage": "10.36%", "elapsed_time": "19m 39s", "remaining_time": "2h 50m 8s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780592} {"loss": 0.80438322, "grad_norm": 1.41415858, "learning_rate": 9.921e-05, "token_acc": 0.7648, "epoch": 1.03712036, "global_step/max_steps": "922/8890", "percentage": "10.37%", "elapsed_time": "19m 41s", "remaining_time": "2h 50m 8s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780554} {"loss": 0.85181302, "grad_norm": 1.90997005, "learning_rate": 9.921e-05, "token_acc": 0.75027503, "epoch": 1.03824522, "global_step/max_steps": "923/8890", "percentage": "10.38%", "elapsed_time": "19m 42s", "remaining_time": "2h 50m 2s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780875} {"loss": 0.85023367, "grad_norm": 1.66239989, "learning_rate": 9.921e-05, "token_acc": 0.74340949, "epoch": 1.03937008, "global_step/max_steps": "924/8890", "percentage": "10.39%", "elapsed_time": "19m 43s", "remaining_time": "2h 50m 0s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780914} {"loss": 0.75321943, "grad_norm": 1.78906012, "learning_rate": 9.92e-05, "token_acc": 0.77623991, "epoch": 1.04049494, "global_step/max_steps": "925/8890", "percentage": "10.40%", "elapsed_time": "19m 44s", "remaining_time": "2h 49m 58s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780987} {"loss": 0.63005638, "grad_norm": 1.93979168, "learning_rate": 9.92e-05, "token_acc": 0.81700288, "epoch": 1.0416198, "global_step/max_steps": "926/8890", "percentage": "10.42%", "elapsed_time": "19m 45s", "remaining_time": "2h 49m 55s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781134} {"loss": 0.61786616, "grad_norm": 2.16059327, "learning_rate": 9.92e-05, "token_acc": 0.79316547, "epoch": 1.04274466, "global_step/max_steps": "927/8890", "percentage": "10.43%", "elapsed_time": "19m 46s", "remaining_time": "2h 49m 52s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781275} {"loss": 0.69694257, "grad_norm": 1.79331756, "learning_rate": 9.92e-05, "token_acc": 0.7839196, "epoch": 1.04386952, "global_step/max_steps": "928/8890", "percentage": "10.44%", "elapsed_time": "19m 47s", "remaining_time": "2h 49m 50s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781354} {"loss": 0.66990447, "grad_norm": 1.82572031, "learning_rate": 9.919e-05, "token_acc": 0.79291553, "epoch": 1.04499438, "global_step/max_steps": "929/8890", "percentage": "10.45%", "elapsed_time": "19m 48s", "remaining_time": "2h 49m 47s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781421} {"loss": 1.05479932, "grad_norm": 2.03761983, "learning_rate": 9.919e-05, "token_acc": 0.71774194, "epoch": 1.04611924, "global_step/max_steps": "930/8890", "percentage": "10.46%", "elapsed_time": "19m 49s", "remaining_time": "2h 49m 45s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781523} {"loss": 0.75885469, "grad_norm": 1.74011326, "learning_rate": 9.919e-05, "token_acc": 0.75812274, "epoch": 1.04724409, "global_step/max_steps": "931/8890", "percentage": "10.47%", "elapsed_time": "19m 51s", "remaining_time": "2h 49m 43s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781579} {"loss": 0.93719834, "grad_norm": 1.91404796, "learning_rate": 9.918e-05, "token_acc": 0.71871768, "epoch": 1.04836895, "global_step/max_steps": "932/8890", "percentage": "10.48%", "elapsed_time": "19m 52s", "remaining_time": "2h 49m 41s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781645} {"loss": 0.95133388, "grad_norm": 2.1707015, "learning_rate": 9.918e-05, "token_acc": 0.73718791, "epoch": 1.04949381, "global_step/max_steps": "933/8890", "percentage": "10.49%", "elapsed_time": "19m 53s", "remaining_time": "2h 49m 38s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781744} {"loss": 0.64792538, "grad_norm": 1.54093814, "learning_rate": 9.917e-05, "token_acc": 0.81622307, "epoch": 1.05061867, "global_step/max_steps": "934/8890", "percentage": "10.51%", "elapsed_time": "19m 54s", "remaining_time": "2h 49m 35s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781852} {"loss": 0.93943489, "grad_norm": 1.92030823, "learning_rate": 9.917e-05, "token_acc": 0.72608126, "epoch": 1.05174353, "global_step/max_steps": "935/8890", "percentage": "10.52%", "elapsed_time": "19m 55s", "remaining_time": "2h 49m 33s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781937} {"loss": 0.91818392, "grad_norm": 1.70450819, "learning_rate": 9.917e-05, "token_acc": 0.73420074, "epoch": 1.05286839, "global_step/max_steps": "936/8890", "percentage": "10.53%", "elapsed_time": "19m 56s", "remaining_time": "2h 49m 30s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782034} {"loss": 0.70591033, "grad_norm": 1.69593298, "learning_rate": 9.916e-05, "token_acc": 0.76102088, "epoch": 1.05399325, "global_step/max_steps": "937/8890", "percentage": "10.54%", "elapsed_time": "19m 58s", "remaining_time": "2h 49m 28s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782121} {"loss": 0.65298367, "grad_norm": 1.60825694, "learning_rate": 9.916e-05, "token_acc": 0.78142695, "epoch": 1.05511811, "global_step/max_steps": "938/8890", "percentage": "10.55%", "elapsed_time": "19m 59s", "remaining_time": "2h 49m 26s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782213} {"loss": 0.82628685, "grad_norm": 1.77927172, "learning_rate": 9.916e-05, "token_acc": 0.76099211, "epoch": 1.05624297, "global_step/max_steps": "939/8890", "percentage": "10.56%", "elapsed_time": "20m 0s", "remaining_time": "2h 49m 23s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782308} {"loss": 0.73450828, "grad_norm": 1.62401974, "learning_rate": 9.915e-05, "token_acc": 0.77616927, "epoch": 1.05736783, "global_step/max_steps": "940/8890", "percentage": "10.57%", "elapsed_time": "20m 1s", "remaining_time": "2h 49m 20s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782462} {"loss": 0.46905547, "grad_norm": 1.30899763, "learning_rate": 9.915e-05, "token_acc": 0.83864542, "epoch": 1.05849269, "global_step/max_steps": "941/8890", "percentage": "10.58%", "elapsed_time": "20m 2s", "remaining_time": "2h 49m 17s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782547} {"loss": 0.73500657, "grad_norm": 1.98499262, "learning_rate": 9.915e-05, "token_acc": 0.78077374, "epoch": 1.05961755, "global_step/max_steps": "942/8890", "percentage": "10.60%", "elapsed_time": "20m 3s", "remaining_time": "2h 49m 16s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.78257} {"loss": 0.72625172, "grad_norm": 1.97304249, "learning_rate": 9.914e-05, "token_acc": 0.76054422, "epoch": 1.06074241, "global_step/max_steps": "943/8890", "percentage": "10.61%", "elapsed_time": "20m 4s", "remaining_time": "2h 49m 13s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782669} {"loss": 0.85118043, "grad_norm": 1.89915454, "learning_rate": 9.914e-05, "token_acc": 0.74114774, "epoch": 1.06186727, "global_step/max_steps": "944/8890", "percentage": "10.62%", "elapsed_time": "20m 5s", "remaining_time": "2h 49m 11s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782777} {"loss": 0.74860954, "grad_norm": 2.3391943, "learning_rate": 9.914e-05, "token_acc": 0.7823741, "epoch": 1.06299213, "global_step/max_steps": "945/8890", "percentage": "10.63%", "elapsed_time": "20m 7s", "remaining_time": "2h 49m 8s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782864} {"loss": 0.78470802, "grad_norm": 1.60849106, "learning_rate": 9.913e-05, "token_acc": 0.76548673, "epoch": 1.06411699, "global_step/max_steps": "946/8890", "percentage": "10.64%", "elapsed_time": "20m 8s", "remaining_time": "2h 49m 6s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782961} {"loss": 0.96439892, "grad_norm": 2.07588387, "learning_rate": 9.913e-05, "token_acc": 0.71125975, "epoch": 1.06524184, "global_step/max_steps": "947/8890", "percentage": "10.65%", "elapsed_time": "20m 9s", "remaining_time": "2h 49m 3s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.783099} {"loss": 0.64144838, "grad_norm": 2.14224172, "learning_rate": 9.913e-05, "token_acc": 0.79454254, "epoch": 1.0663667, "global_step/max_steps": "948/8890", "percentage": "10.66%", "elapsed_time": "20m 10s", "remaining_time": "2h 48m 59s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.783244} {"loss": 0.70365542, "grad_norm": 1.68244267, "learning_rate": 9.912e-05, "token_acc": 0.78679654, "epoch": 1.06749156, "global_step/max_steps": "949/8890", "percentage": "10.67%", "elapsed_time": "20m 11s", "remaining_time": "2h 48m 57s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.78333} {"loss": 0.78139925, "grad_norm": 1.87680876, "learning_rate": 9.912e-05, "token_acc": 0.74716981, "epoch": 1.06861642, "global_step/max_steps": "950/8890", "percentage": "10.69%", "elapsed_time": "20m 12s", "remaining_time": "2h 48m 55s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.783407} {"loss": 0.77554059, "grad_norm": 1.72023463, "learning_rate": 9.912e-05, "token_acc": 0.77105263, "epoch": 1.06974128, "global_step/max_steps": "951/8890", "percentage": "10.70%", "elapsed_time": "20m 13s", "remaining_time": "2h 48m 54s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.783381} {"loss": 0.81959087, "grad_norm": 1.74953496, "learning_rate": 9.911e-05, "token_acc": 0.76413043, "epoch": 1.07086614, "global_step/max_steps": "952/8890", "percentage": "10.71%", "elapsed_time": "20m 15s", "remaining_time": "2h 48m 51s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.783466} {"loss": 0.85958606, "grad_norm": 1.49754965, "learning_rate": 9.911e-05, "token_acc": 0.75, "epoch": 1.071991, "global_step/max_steps": "953/8890", "percentage": "10.72%", "elapsed_time": "20m 16s", "remaining_time": "2h 48m 49s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.783559} {"loss": 0.72824901, "grad_norm": 1.45021701, "learning_rate": 9.911e-05, "token_acc": 0.79238754, "epoch": 1.07311586, "global_step/max_steps": "954/8890", "percentage": "10.73%", "elapsed_time": "20m 17s", "remaining_time": "2h 48m 47s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.783647} {"loss": 0.91762477, "grad_norm": 1.96525133, "learning_rate": 9.91e-05, "token_acc": 0.74065421, "epoch": 1.07424072, "global_step/max_steps": "955/8890", "percentage": "10.74%", "elapsed_time": "20m 18s", "remaining_time": "2h 48m 44s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.783734} {"loss": 0.88368487, "grad_norm": 1.88230968, "learning_rate": 9.91e-05, "token_acc": 0.74358974, "epoch": 1.07536558, "global_step/max_steps": "956/8890", "percentage": "10.75%", "elapsed_time": "20m 19s", "remaining_time": "2h 48m 42s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.783805} {"loss": 0.92260832, "grad_norm": 2.08895659, "learning_rate": 9.91e-05, "token_acc": 0.71929825, "epoch": 1.07649044, "global_step/max_steps": "957/8890", "percentage": "10.76%", "elapsed_time": "20m 20s", "remaining_time": "2h 48m 39s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.7839} {"loss": 0.79567653, "grad_norm": 1.81810451, "learning_rate": 9.909e-05, "token_acc": 0.76796231, "epoch": 1.0776153, "global_step/max_steps": "958/8890", "percentage": "10.78%", "elapsed_time": "20m 21s", "remaining_time": "2h 48m 37s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.783987} {"loss": 0.83858323, "grad_norm": 1.889189, "learning_rate": 9.909e-05, "token_acc": 0.75866189, "epoch": 1.07874016, "global_step/max_steps": "959/8890", "percentage": "10.79%", "elapsed_time": "20m 23s", "remaining_time": "2h 48m 34s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.784092} {"loss": 0.80443943, "grad_norm": 1.86494291, "learning_rate": 9.909e-05, "token_acc": 0.76689977, "epoch": 1.07986502, "global_step/max_steps": "960/8890", "percentage": "10.80%", "elapsed_time": "20m 23s", "remaining_time": "2h 48m 30s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.78433} {"loss": 0.57372415, "grad_norm": 1.49883556, "learning_rate": 9.908e-05, "token_acc": 0.82882883, "epoch": 1.08098988, "global_step/max_steps": "961/8890", "percentage": "10.81%", "elapsed_time": "20m 25s", "remaining_time": "2h 48m 28s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.784412} {"loss": 0.7240085, "grad_norm": 1.79562628, "learning_rate": 9.908e-05, "token_acc": 0.77401894, "epoch": 1.08211474, "global_step/max_steps": "962/8890", "percentage": "10.82%", "elapsed_time": "20m 26s", "remaining_time": "2h 48m 25s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.78455} {"loss": 0.74582589, "grad_norm": 2.06374359, "learning_rate": 9.907e-05, "token_acc": 0.78115942, "epoch": 1.0832396, "global_step/max_steps": "963/8890", "percentage": "10.83%", "elapsed_time": "20m 27s", "remaining_time": "2h 48m 22s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.784621} {"loss": 0.87788373, "grad_norm": 1.97113681, "learning_rate": 9.907e-05, "token_acc": 0.73976222, "epoch": 1.08436445, "global_step/max_steps": "964/8890", "percentage": "10.84%", "elapsed_time": "20m 28s", "remaining_time": "2h 48m 20s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.784694} {"loss": 0.6535238, "grad_norm": 1.61212742, "learning_rate": 9.907e-05, "token_acc": 0.78890229, "epoch": 1.08548931, "global_step/max_steps": "965/8890", "percentage": "10.85%", "elapsed_time": "20m 29s", "remaining_time": "2h 48m 18s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.784769} {"loss": 0.66970819, "grad_norm": 1.65153635, "learning_rate": 9.906e-05, "token_acc": 0.80496454, "epoch": 1.08661417, "global_step/max_steps": "966/8890", "percentage": "10.87%", "elapsed_time": "20m 30s", "remaining_time": "2h 48m 13s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.785085} {"loss": 0.68220365, "grad_norm": 1.50203919, "learning_rate": 9.906e-05, "token_acc": 0.79032258, "epoch": 1.08773903, "global_step/max_steps": "967/8890", "percentage": "10.88%", "elapsed_time": "20m 31s", "remaining_time": "2h 48m 10s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.785177} {"loss": 0.78109401, "grad_norm": 1.72599685, "learning_rate": 9.906e-05, "token_acc": 0.75604839, "epoch": 1.08886389, "global_step/max_steps": "968/8890", "percentage": "10.89%", "elapsed_time": "20m 32s", "remaining_time": "2h 48m 8s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.785253} {"loss": 0.77445197, "grad_norm": 1.81100786, "learning_rate": 9.905e-05, "token_acc": 0.77344702, "epoch": 1.08998875, "global_step/max_steps": "969/8890", "percentage": "10.90%", "elapsed_time": "20m 34s", "remaining_time": "2h 48m 10s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.784971} {"loss": 0.81642306, "grad_norm": 2.06739902, "learning_rate": 9.905e-05, "token_acc": 0.75169607, "epoch": 1.09111361, "global_step/max_steps": "970/8890", "percentage": "10.91%", "elapsed_time": "20m 35s", "remaining_time": "2h 48m 7s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.785104} {"loss": 0.74910206, "grad_norm": 1.76370597, "learning_rate": 9.905e-05, "token_acc": 0.76290832, "epoch": 1.09223847, "global_step/max_steps": "971/8890", "percentage": "10.92%", "elapsed_time": "20m 36s", "remaining_time": "2h 48m 5s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.785169} {"loss": 0.67535353, "grad_norm": 1.46718144, "learning_rate": 9.904e-05, "token_acc": 0.80156403, "epoch": 1.09336333, "global_step/max_steps": "972/8890", "percentage": "10.93%", "elapsed_time": "20m 37s", "remaining_time": "2h 48m 3s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.785265} {"loss": 0.72279173, "grad_norm": 1.68684602, "learning_rate": 9.904e-05, "token_acc": 0.76408451, "epoch": 1.09448819, "global_step/max_steps": "973/8890", "percentage": "10.94%", "elapsed_time": "20m 38s", "remaining_time": "2h 48m 0s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.785352} {"loss": 0.75945073, "grad_norm": 1.47406673, "learning_rate": 9.903e-05, "token_acc": 0.7773344, "epoch": 1.09561305, "global_step/max_steps": "974/8890", "percentage": "10.96%", "elapsed_time": "20m 40s", "remaining_time": "2h 47m 58s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.785439} {"loss": 0.64481401, "grad_norm": 1.75503623, "learning_rate": 9.903e-05, "token_acc": 0.79950187, "epoch": 1.09673791, "global_step/max_steps": "975/8890", "percentage": "10.97%", "elapsed_time": "20m 41s", "remaining_time": "2h 47m 55s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.785531} {"loss": 0.747554, "grad_norm": 1.69011676, "learning_rate": 9.903e-05, "token_acc": 0.7862069, "epoch": 1.09786277, "global_step/max_steps": "976/8890", "percentage": "10.98%", "elapsed_time": "20m 42s", "remaining_time": "2h 47m 55s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.785469} {"loss": 0.70525563, "grad_norm": 1.66037786, "learning_rate": 9.902e-05, "token_acc": 0.78232044, "epoch": 1.09898763, "global_step/max_steps": "977/8890", "percentage": "10.99%", "elapsed_time": "20m 43s", "remaining_time": "2h 47m 53s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.78555} {"loss": 0.91713864, "grad_norm": 1.81012344, "learning_rate": 9.902e-05, "token_acc": 0.73888889, "epoch": 1.10011249, "global_step/max_steps": "978/8890", "percentage": "11.00%", "elapsed_time": "20m 44s", "remaining_time": "2h 47m 49s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.785707} {"loss": 0.66874474, "grad_norm": 1.96896315, "learning_rate": 9.902e-05, "token_acc": 0.79022989, "epoch": 1.10123735, "global_step/max_steps": "979/8890", "percentage": "11.01%", "elapsed_time": "20m 45s", "remaining_time": "2h 47m 47s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.785784} {"loss": 0.64765662, "grad_norm": 1.91497684, "learning_rate": 9.901e-05, "token_acc": 0.80500659, "epoch": 1.1023622, "global_step/max_steps": "980/8890", "percentage": "11.02%", "elapsed_time": "20m 46s", "remaining_time": "2h 47m 44s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.785932} {"loss": 0.85269517, "grad_norm": 1.93130696, "learning_rate": 9.901e-05, "token_acc": 0.75371287, "epoch": 1.10348706, "global_step/max_steps": "981/8890", "percentage": "11.03%", "elapsed_time": "20m 47s", "remaining_time": "2h 47m 41s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.786078} {"loss": 0.6293155, "grad_norm": 1.80341136, "learning_rate": 9.901e-05, "token_acc": 0.81189488, "epoch": 1.10461192, "global_step/max_steps": "982/8890", "percentage": "11.05%", "elapsed_time": "20m 49s", "remaining_time": "2h 47m 39s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.786157} {"loss": 0.7881186, "grad_norm": 1.99532461, "learning_rate": 9.9e-05, "token_acc": 0.76091703, "epoch": 1.10573678, "global_step/max_steps": "983/8890", "percentage": "11.06%", "elapsed_time": "20m 50s", "remaining_time": "2h 47m 35s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.786312} {"loss": 0.68813211, "grad_norm": 1.82470083, "learning_rate": 9.9e-05, "token_acc": 0.78271309, "epoch": 1.10686164, "global_step/max_steps": "984/8890", "percentage": "11.07%", "elapsed_time": "20m 51s", "remaining_time": "2h 47m 33s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.786395} {"loss": 0.94740391, "grad_norm": 1.97429395, "learning_rate": 9.899e-05, "token_acc": 0.72234513, "epoch": 1.1079865, "global_step/max_steps": "985/8890", "percentage": "11.08%", "elapsed_time": "20m 52s", "remaining_time": "2h 47m 30s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.786532} {"loss": 0.75861484, "grad_norm": 1.86386037, "learning_rate": 9.899e-05, "token_acc": 0.77777778, "epoch": 1.10911136, "global_step/max_steps": "986/8890", "percentage": "11.09%", "elapsed_time": "20m 53s", "remaining_time": "2h 47m 28s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.786602} {"loss": 0.75028801, "grad_norm": 1.9223243, "learning_rate": 9.899e-05, "token_acc": 0.76893204, "epoch": 1.11023622, "global_step/max_steps": "987/8890", "percentage": "11.10%", "elapsed_time": "20m 54s", "remaining_time": "2h 47m 26s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.786673} {"loss": 0.70934021, "grad_norm": 1.78622055, "learning_rate": 9.898e-05, "token_acc": 0.78060046, "epoch": 1.11136108, "global_step/max_steps": "988/8890", "percentage": "11.11%", "elapsed_time": "20m 55s", "remaining_time": "2h 47m 23s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.786758} {"loss": 0.7395767, "grad_norm": 1.87900913, "learning_rate": 9.898e-05, "token_acc": 0.76824034, "epoch": 1.11248594, "global_step/max_steps": "989/8890", "percentage": "11.12%", "elapsed_time": "20m 56s", "remaining_time": "2h 47m 21s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.786819} {"loss": 0.69868755, "grad_norm": 1.66073298, "learning_rate": 9.898e-05, "token_acc": 0.80254154, "epoch": 1.1136108, "global_step/max_steps": "990/8890", "percentage": "11.14%", "elapsed_time": "20m 58s", "remaining_time": "2h 47m 19s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.786921} {"loss": 0.83087623, "grad_norm": 2.15578341, "learning_rate": 9.897e-05, "token_acc": 0.75112613, "epoch": 1.11473566, "global_step/max_steps": "991/8890", "percentage": "11.15%", "elapsed_time": "20m 59s", "remaining_time": "2h 47m 16s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.787008} {"loss": 0.80490679, "grad_norm": 1.8445009, "learning_rate": 9.897e-05, "token_acc": 0.76909091, "epoch": 1.11586052, "global_step/max_steps": "992/8890", "percentage": "11.16%", "elapsed_time": "21m 0s", "remaining_time": "2h 47m 14s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.787089} {"loss": 0.79020452, "grad_norm": 2.06878781, "learning_rate": 9.896e-05, "token_acc": 0.7657784, "epoch": 1.11698538, "global_step/max_steps": "993/8890", "percentage": "11.17%", "elapsed_time": "21m 1s", "remaining_time": "2h 47m 10s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.787269} {"loss": 0.56242466, "grad_norm": 1.69972861, "learning_rate": 9.896e-05, "token_acc": 0.81101512, "epoch": 1.11811024, "global_step/max_steps": "994/8890", "percentage": "11.18%", "elapsed_time": "21m 2s", "remaining_time": "2h 47m 8s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.787356} {"loss": 0.7192328, "grad_norm": 1.80463469, "learning_rate": 9.896e-05, "token_acc": 0.79338843, "epoch": 1.1192351, "global_step/max_steps": "995/8890", "percentage": "11.19%", "elapsed_time": "21m 3s", "remaining_time": "2h 47m 5s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.787494} {"loss": 0.79327917, "grad_norm": 1.94285929, "learning_rate": 9.895e-05, "token_acc": 0.76705882, "epoch": 1.12035996, "global_step/max_steps": "996/8890", "percentage": "11.20%", "elapsed_time": "21m 4s", "remaining_time": "2h 47m 2s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.787656} {"loss": 0.72460824, "grad_norm": 1.64637089, "learning_rate": 9.895e-05, "token_acc": 0.76950673, "epoch": 1.12148481, "global_step/max_steps": "997/8890", "percentage": "11.21%", "elapsed_time": "21m 5s", "remaining_time": "2h 46m 58s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.787841} {"loss": 0.56709421, "grad_norm": 1.62625945, "learning_rate": 9.895e-05, "token_acc": 0.83855981, "epoch": 1.12260967, "global_step/max_steps": "998/8890", "percentage": "11.23%", "elapsed_time": "21m 6s", "remaining_time": "2h 46m 56s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.787926} {"loss": 0.82754296, "grad_norm": 2.03609085, "learning_rate": 9.894e-05, "token_acc": 0.76299879, "epoch": 1.12373453, "global_step/max_steps": "999/8890", "percentage": "11.24%", "elapsed_time": "21m 7s", "remaining_time": "2h 46m 53s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.787999} {"loss": 0.53101373, "grad_norm": 1.73922265, "learning_rate": 9.894e-05, "token_acc": 0.83487298, "epoch": 1.12485939, "global_step/max_steps": "1000/8890", "percentage": "11.25%", "elapsed_time": "21m 8s", "remaining_time": "2h 46m 51s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.788132} {"loss": 0.75518548, "grad_norm": 1.87220967, "learning_rate": 9.893e-05, "token_acc": 0.77137177, "epoch": 1.12598425, "global_step/max_steps": "1001/8890", "percentage": "11.26%", "elapsed_time": "21m 9s", "remaining_time": "2h 46m 48s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.788208} {"loss": 0.98474741, "grad_norm": 2.02144694, "learning_rate": 9.893e-05, "token_acc": 0.74273412, "epoch": 1.12710911, "global_step/max_steps": "1002/8890", "percentage": "11.27%", "elapsed_time": "21m 11s", "remaining_time": "2h 46m 46s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.788296} {"loss": 0.83798915, "grad_norm": 1.79628646, "learning_rate": 9.893e-05, "token_acc": 0.75763747, "epoch": 1.12823397, "global_step/max_steps": "1003/8890", "percentage": "11.28%", "elapsed_time": "21m 12s", "remaining_time": "2h 46m 44s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.788377} {"loss": 0.79166758, "grad_norm": 1.97974479, "learning_rate": 9.892e-05, "token_acc": 0.78125, "epoch": 1.12935883, "global_step/max_steps": "1004/8890", "percentage": "11.29%", "elapsed_time": "21m 13s", "remaining_time": "2h 46m 41s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.788515} {"loss": 0.82005453, "grad_norm": 2.06537533, "learning_rate": 9.892e-05, "token_acc": 0.74971815, "epoch": 1.13048369, "global_step/max_steps": "1005/8890", "percentage": "11.30%", "elapsed_time": "21m 14s", "remaining_time": "2h 46m 39s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.788513} {"loss": 0.57443947, "grad_norm": 1.80086255, "learning_rate": 9.892e-05, "token_acc": 0.81971466, "epoch": 1.13160855, "global_step/max_steps": "1006/8890", "percentage": "11.32%", "elapsed_time": "21m 15s", "remaining_time": "2h 46m 37s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.78858} {"loss": 0.87810957, "grad_norm": 2.03664827, "learning_rate": 9.891e-05, "token_acc": 0.76681128, "epoch": 1.13273341, "global_step/max_steps": "1007/8890", "percentage": "11.33%", "elapsed_time": "21m 16s", "remaining_time": "2h 46m 35s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.788655} {"loss": 0.67735326, "grad_norm": 2.01296115, "learning_rate": 9.891e-05, "token_acc": 0.7902571, "epoch": 1.13385827, "global_step/max_steps": "1008/8890", "percentage": "11.34%", "elapsed_time": "21m 17s", "remaining_time": "2h 46m 33s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.788733} {"loss": 0.75467914, "grad_norm": 1.68983161, "learning_rate": 9.89e-05, "token_acc": 0.78879706, "epoch": 1.13498313, "global_step/max_steps": "1009/8890", "percentage": "11.35%", "elapsed_time": "21m 19s", "remaining_time": "2h 46m 30s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.788846} {"loss": 0.59621239, "grad_norm": 1.55206156, "learning_rate": 9.89e-05, "token_acc": 0.81997919, "epoch": 1.13610799, "global_step/max_steps": "1010/8890", "percentage": "11.36%", "elapsed_time": "21m 20s", "remaining_time": "2h 46m 28s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.788936} {"loss": 0.88082296, "grad_norm": 2.14293861, "learning_rate": 9.89e-05, "token_acc": 0.74635922, "epoch": 1.13723285, "global_step/max_steps": "1011/8890", "percentage": "11.37%", "elapsed_time": "21m 21s", "remaining_time": "2h 46m 25s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.789063} {"loss": 0.6994648, "grad_norm": 1.96823525, "learning_rate": 9.889e-05, "token_acc": 0.78424242, "epoch": 1.13835771, "global_step/max_steps": "1012/8890", "percentage": "11.38%", "elapsed_time": "21m 22s", "remaining_time": "2h 46m 21s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.789228} {"loss": 0.79466271, "grad_norm": 1.71783781, "learning_rate": 9.889e-05, "token_acc": 0.77254098, "epoch": 1.13948256, "global_step/max_steps": "1013/8890", "percentage": "11.39%", "elapsed_time": "21m 23s", "remaining_time": "2h 46m 19s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.789303} {"loss": 0.87061608, "grad_norm": 1.72954416, "learning_rate": 9.888e-05, "token_acc": 0.74416517, "epoch": 1.14060742, "global_step/max_steps": "1014/8890", "percentage": "11.41%", "elapsed_time": "21m 24s", "remaining_time": "2h 46m 18s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.789294} {"loss": 0.75932103, "grad_norm": 2.08414388, "learning_rate": 9.888e-05, "token_acc": 0.77985612, "epoch": 1.14173228, "global_step/max_steps": "1015/8890", "percentage": "11.42%", "elapsed_time": "21m 26s", "remaining_time": "2h 46m 17s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.78925} {"loss": 0.85244656, "grad_norm": 1.80544078, "learning_rate": 9.888e-05, "token_acc": 0.7394297, "epoch": 1.14285714, "global_step/max_steps": "1016/8890", "percentage": "11.43%", "elapsed_time": "21m 27s", "remaining_time": "2h 46m 16s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.789228} {"loss": 0.86452132, "grad_norm": 1.66883397, "learning_rate": 9.887e-05, "token_acc": 0.74346076, "epoch": 1.143982, "global_step/max_steps": "1017/8890", "percentage": "11.44%", "elapsed_time": "21m 28s", "remaining_time": "2h 46m 14s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.789301} {"loss": 0.71962899, "grad_norm": 1.815045, "learning_rate": 9.887e-05, "token_acc": 0.7755102, "epoch": 1.14510686, "global_step/max_steps": "1018/8890", "percentage": "11.45%", "elapsed_time": "21m 29s", "remaining_time": "2h 46m 11s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.789442} {"loss": 0.79907757, "grad_norm": 2.23440313, "learning_rate": 9.886e-05, "token_acc": 0.75501433, "epoch": 1.14623172, "global_step/max_steps": "1019/8890", "percentage": "11.46%", "elapsed_time": "21m 30s", "remaining_time": "2h 46m 8s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.789576} {"loss": 0.69866812, "grad_norm": 1.7661705, "learning_rate": 9.886e-05, "token_acc": 0.80836237, "epoch": 1.14735658, "global_step/max_steps": "1020/8890", "percentage": "11.47%", "elapsed_time": "21m 31s", "remaining_time": "2h 46m 5s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.789691} {"loss": 0.79971516, "grad_norm": 1.78623521, "learning_rate": 9.886e-05, "token_acc": 0.76856436, "epoch": 1.14848144, "global_step/max_steps": "1021/8890", "percentage": "11.48%", "elapsed_time": "21m 32s", "remaining_time": "2h 46m 3s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.789784} {"loss": 0.68811476, "grad_norm": 1.87142384, "learning_rate": 9.885e-05, "token_acc": 0.78489117, "epoch": 1.1496063, "global_step/max_steps": "1022/8890", "percentage": "11.50%", "elapsed_time": "21m 33s", "remaining_time": "2h 46m 1s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.78985} {"loss": 0.70031923, "grad_norm": 1.82593441, "learning_rate": 9.885e-05, "token_acc": 0.78293737, "epoch": 1.15073116, "global_step/max_steps": "1023/8890", "percentage": "11.51%", "elapsed_time": "21m 35s", "remaining_time": "2h 45m 59s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.789937} {"loss": 0.64979076, "grad_norm": 1.70930123, "learning_rate": 9.884e-05, "token_acc": 0.81328321, "epoch": 1.15185602, "global_step/max_steps": "1024/8890", "percentage": "11.52%", "elapsed_time": "21m 36s", "remaining_time": "2h 45m 56s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.790025} {"loss": 0.72240531, "grad_norm": 1.94859016, "learning_rate": 9.884e-05, "token_acc": 0.78823529, "epoch": 1.15298088, "global_step/max_steps": "1025/8890", "percentage": "11.53%", "elapsed_time": "21m 37s", "remaining_time": "2h 45m 54s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.790103} {"loss": 1.01276815, "grad_norm": 1.92526042, "learning_rate": 9.884e-05, "token_acc": 0.70708955, "epoch": 1.15410574, "global_step/max_steps": "1026/8890", "percentage": "11.54%", "elapsed_time": "21m 38s", "remaining_time": "2h 45m 53s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.790083} {"loss": 0.65747905, "grad_norm": 1.7990464, "learning_rate": 9.883e-05, "token_acc": 0.78751501, "epoch": 1.1552306, "global_step/max_steps": "1027/8890", "percentage": "11.55%", "elapsed_time": "21m 39s", "remaining_time": "2h 45m 51s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.790164} {"loss": 0.63154662, "grad_norm": 2.00041795, "learning_rate": 9.883e-05, "token_acc": 0.79435484, "epoch": 1.15635546, "global_step/max_steps": "1028/8890", "percentage": "11.56%", "elapsed_time": "21m 40s", "remaining_time": "2h 45m 49s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.790215} {"loss": 0.48309642, "grad_norm": 1.5275619, "learning_rate": 9.882e-05, "token_acc": 0.84707288, "epoch": 1.15748031, "global_step/max_steps": "1029/8890", "percentage": "11.57%", "elapsed_time": "21m 42s", "remaining_time": "2h 45m 47s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.790267} {"loss": 0.67935884, "grad_norm": 1.59401155, "learning_rate": 9.882e-05, "token_acc": 0.8, "epoch": 1.15860517, "global_step/max_steps": "1030/8890", "percentage": "11.59%", "elapsed_time": "21m 43s", "remaining_time": "2h 45m 45s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.790329} {"loss": 0.87445879, "grad_norm": 1.9372592, "learning_rate": 9.882e-05, "token_acc": 0.73454158, "epoch": 1.15973003, "global_step/max_steps": "1031/8890", "percentage": "11.60%", "elapsed_time": "21m 44s", "remaining_time": "2h 45m 41s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.790511} {"loss": 0.74440169, "grad_norm": 2.0884099, "learning_rate": 9.881e-05, "token_acc": 0.78534704, "epoch": 1.16085489, "global_step/max_steps": "1032/8890", "percentage": "11.61%", "elapsed_time": "21m 45s", "remaining_time": "2h 45m 38s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.790662} {"loss": 0.72309577, "grad_norm": 2.13332224, "learning_rate": 9.881e-05, "token_acc": 0.77841727, "epoch": 1.16197975, "global_step/max_steps": "1033/8890", "percentage": "11.62%", "elapsed_time": "21m 46s", "remaining_time": "2h 45m 36s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.790742} {"loss": 0.80993181, "grad_norm": 2.09895039, "learning_rate": 9.88e-05, "token_acc": 0.75769231, "epoch": 1.16310461, "global_step/max_steps": "1034/8890", "percentage": "11.63%", "elapsed_time": "21m 47s", "remaining_time": "2h 45m 34s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.790813} {"loss": 0.92905939, "grad_norm": 2.01306152, "learning_rate": 9.88e-05, "token_acc": 0.73869347, "epoch": 1.16422947, "global_step/max_steps": "1035/8890", "percentage": "11.64%", "elapsed_time": "21m 48s", "remaining_time": "2h 45m 30s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.790958} {"loss": 0.7967304, "grad_norm": 1.96764421, "learning_rate": 9.88e-05, "token_acc": 0.76554536, "epoch": 1.16535433, "global_step/max_steps": "1036/8890", "percentage": "11.65%", "elapsed_time": "21m 49s", "remaining_time": "2h 45m 28s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.791043} {"loss": 0.69695234, "grad_norm": 1.694695, "learning_rate": 9.879e-05, "token_acc": 0.79000847, "epoch": 1.16647919, "global_step/max_steps": "1037/8890", "percentage": "11.66%", "elapsed_time": "21m 50s", "remaining_time": "2h 45m 26s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.791113} {"loss": 0.80621946, "grad_norm": 2.14677691, "learning_rate": 9.879e-05, "token_acc": 0.77496992, "epoch": 1.16760405, "global_step/max_steps": "1038/8890", "percentage": "11.68%", "elapsed_time": "21m 51s", "remaining_time": "2h 45m 24s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.791203} {"loss": 0.69404566, "grad_norm": 1.70419288, "learning_rate": 9.878e-05, "token_acc": 0.79814815, "epoch": 1.16872891, "global_step/max_steps": "1039/8890", "percentage": "11.69%", "elapsed_time": "21m 52s", "remaining_time": "2h 45m 20s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.791423} {"loss": 0.83940876, "grad_norm": 1.76496875, "learning_rate": 9.878e-05, "token_acc": 0.76793893, "epoch": 1.16985377, "global_step/max_steps": "1040/8890", "percentage": "11.70%", "elapsed_time": "21m 54s", "remaining_time": "2h 45m 18s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.791471} {"loss": 0.67968178, "grad_norm": 1.84037602, "learning_rate": 9.878e-05, "token_acc": 0.7864184, "epoch": 1.17097863, "global_step/max_steps": "1041/8890", "percentage": "11.71%", "elapsed_time": "21m 55s", "remaining_time": "2h 45m 15s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.791551} {"loss": 0.56852257, "grad_norm": 1.89851344, "learning_rate": 9.877e-05, "token_acc": 0.81911765, "epoch": 1.17210349, "global_step/max_steps": "1042/8890", "percentage": "11.72%", "elapsed_time": "21m 56s", "remaining_time": "2h 45m 13s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.791641} {"loss": 0.63084614, "grad_norm": 1.84147131, "learning_rate": 9.877e-05, "token_acc": 0.81486146, "epoch": 1.17322835, "global_step/max_steps": "1043/8890", "percentage": "11.73%", "elapsed_time": "21m 57s", "remaining_time": "2h 45m 10s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.791765} {"loss": 0.99079788, "grad_norm": 1.97950757, "learning_rate": 9.876e-05, "token_acc": 0.71560403, "epoch": 1.17435321, "global_step/max_steps": "1044/8890", "percentage": "11.74%", "elapsed_time": "21m 58s", "remaining_time": "2h 45m 8s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.791849} {"loss": 0.63935977, "grad_norm": 1.59531164, "learning_rate": 9.876e-05, "token_acc": 0.81850854, "epoch": 1.17547807, "global_step/max_steps": "1045/8890", "percentage": "11.75%", "elapsed_time": "21m 59s", "remaining_time": "2h 45m 7s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.791839} {"loss": 0.78833663, "grad_norm": 1.7751193, "learning_rate": 9.876e-05, "token_acc": 0.78159645, "epoch": 1.17660292, "global_step/max_steps": "1046/8890", "percentage": "11.77%", "elapsed_time": "22m 0s", "remaining_time": "2h 45m 5s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.791877} {"loss": 0.77906322, "grad_norm": 1.55400896, "learning_rate": 9.875e-05, "token_acc": 0.78162651, "epoch": 1.17772778, "global_step/max_steps": "1047/8890", "percentage": "11.78%", "elapsed_time": "22m 2s", "remaining_time": "2h 45m 5s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.791791} {"loss": 0.92324889, "grad_norm": 2.11535096, "learning_rate": 9.875e-05, "token_acc": 0.74556213, "epoch": 1.17885264, "global_step/max_steps": "1048/8890", "percentage": "11.79%", "elapsed_time": "22m 3s", "remaining_time": "2h 45m 2s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.791882} {"loss": 0.78763306, "grad_norm": 1.89884245, "learning_rate": 9.874e-05, "token_acc": 0.75255392, "epoch": 1.1799775, "global_step/max_steps": "1049/8890", "percentage": "11.80%", "elapsed_time": "22m 4s", "remaining_time": "2h 45m 0s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.79197} {"loss": 0.77665037, "grad_norm": 1.84024382, "learning_rate": 9.874e-05, "token_acc": 0.77800407, "epoch": 1.18110236, "global_step/max_steps": "1050/8890", "percentage": "11.81%", "elapsed_time": "22m 5s", "remaining_time": "2h 44m 58s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.792053} {"loss": 0.92571795, "grad_norm": 2.01187611, "learning_rate": 9.873e-05, "token_acc": 0.72619048, "epoch": 1.18222722, "global_step/max_steps": "1051/8890", "percentage": "11.82%", "elapsed_time": "22m 6s", "remaining_time": "2h 44m 55s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.792183} {"loss": 0.78205222, "grad_norm": 1.64343822, "learning_rate": 9.873e-05, "token_acc": 0.7537248, "epoch": 1.18335208, "global_step/max_steps": "1052/8890", "percentage": "11.83%", "elapsed_time": "22m 7s", "remaining_time": "2h 44m 53s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.792231} {"loss": 0.72294068, "grad_norm": 1.61934245, "learning_rate": 9.873e-05, "token_acc": 0.7875226, "epoch": 1.18447694, "global_step/max_steps": "1053/8890", "percentage": "11.84%", "elapsed_time": "22m 9s", "remaining_time": "2h 44m 51s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.792315} {"loss": 0.85390079, "grad_norm": 1.77018631, "learning_rate": 9.872e-05, "token_acc": 0.7468599, "epoch": 1.1856018, "global_step/max_steps": "1054/8890", "percentage": "11.86%", "elapsed_time": "22m 10s", "remaining_time": "2h 44m 50s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.792299} {"loss": 0.78715467, "grad_norm": 1.59094989, "learning_rate": 9.872e-05, "token_acc": 0.77726433, "epoch": 1.18672666, "global_step/max_steps": "1055/8890", "percentage": "11.87%", "elapsed_time": "22m 11s", "remaining_time": "2h 44m 49s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.79226} {"loss": 0.53944623, "grad_norm": 1.73991656, "learning_rate": 9.871e-05, "token_acc": 0.83378016, "epoch": 1.18785152, "global_step/max_steps": "1056/8890", "percentage": "11.88%", "elapsed_time": "22m 12s", "remaining_time": "2h 44m 47s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.792327} {"loss": 0.74443722, "grad_norm": 1.79495013, "learning_rate": 9.871e-05, "token_acc": 0.76812977, "epoch": 1.18897638, "global_step/max_steps": "1057/8890", "percentage": "11.89%", "elapsed_time": "22m 13s", "remaining_time": "2h 44m 45s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.792381} {"loss": 0.92938358, "grad_norm": 2.01162815, "learning_rate": 9.871e-05, "token_acc": 0.74712644, "epoch": 1.19010124, "global_step/max_steps": "1058/8890", "percentage": "11.90%", "elapsed_time": "22m 15s", "remaining_time": "2h 44m 43s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.792438} {"loss": 0.56345564, "grad_norm": 1.40911114, "learning_rate": 9.87e-05, "token_acc": 0.82402002, "epoch": 1.1912261, "global_step/max_steps": "1059/8890", "percentage": "11.91%", "elapsed_time": "22m 16s", "remaining_time": "2h 44m 41s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.792519} {"loss": 0.73718691, "grad_norm": 2.14565516, "learning_rate": 9.87e-05, "token_acc": 0.76728723, "epoch": 1.19235096, "global_step/max_steps": "1060/8890", "percentage": "11.92%", "elapsed_time": "22m 17s", "remaining_time": "2h 44m 38s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.79263} {"loss": 0.74404734, "grad_norm": 1.70643377, "learning_rate": 9.869e-05, "token_acc": 0.79670862, "epoch": 1.19347582, "global_step/max_steps": "1061/8890", "percentage": "11.93%", "elapsed_time": "22m 18s", "remaining_time": "2h 44m 36s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.792698} {"loss": 0.72299546, "grad_norm": 1.74702883, "learning_rate": 9.869e-05, "token_acc": 0.77925211, "epoch": 1.19460067, "global_step/max_steps": "1062/8890", "percentage": "11.95%", "elapsed_time": "22m 19s", "remaining_time": "2h 44m 34s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.792775} {"loss": 0.670717, "grad_norm": 1.91653872, "learning_rate": 9.868e-05, "token_acc": 0.79381443, "epoch": 1.19572553, "global_step/max_steps": "1063/8890", "percentage": "11.96%", "elapsed_time": "22m 20s", "remaining_time": "2h 44m 30s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.792967} {"loss": 0.72678423, "grad_norm": 1.85575533, "learning_rate": 9.868e-05, "token_acc": 0.77830189, "epoch": 1.19685039, "global_step/max_steps": "1064/8890", "percentage": "11.97%", "elapsed_time": "22m 21s", "remaining_time": "2h 44m 28s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.793017} {"loss": 0.75062186, "grad_norm": 1.8728472, "learning_rate": 9.868e-05, "token_acc": 0.78857143, "epoch": 1.19797525, "global_step/max_steps": "1065/8890", "percentage": "11.98%", "elapsed_time": "22m 22s", "remaining_time": "2h 44m 26s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.793098} {"loss": 0.69441193, "grad_norm": 1.65757012, "learning_rate": 9.867e-05, "token_acc": 0.79937952, "epoch": 1.19910011, "global_step/max_steps": "1066/8890", "percentage": "11.99%", "elapsed_time": "22m 23s", "remaining_time": "2h 44m 24s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.793165} {"loss": 0.6921218, "grad_norm": 1.96152472, "learning_rate": 9.867e-05, "token_acc": 0.78991597, "epoch": 1.20022497, "global_step/max_steps": "1067/8890", "percentage": "12.00%", "elapsed_time": "22m 25s", "remaining_time": "2h 44m 22s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.793213} {"loss": 0.85607541, "grad_norm": 1.99687743, "learning_rate": 9.866e-05, "token_acc": 0.73875803, "epoch": 1.20134983, "global_step/max_steps": "1068/8890", "percentage": "12.01%", "elapsed_time": "22m 26s", "remaining_time": "2h 44m 20s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.793281} {"loss": 0.5352453, "grad_norm": 1.51214576, "learning_rate": 9.866e-05, "token_acc": 0.83417594, "epoch": 1.20247469, "global_step/max_steps": "1069/8890", "percentage": "12.02%", "elapsed_time": "22m 27s", "remaining_time": "2h 44m 18s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.793353} {"loss": 0.79240406, "grad_norm": 2.07560968, "learning_rate": 9.865e-05, "token_acc": 0.77305825, "epoch": 1.20359955, "global_step/max_steps": "1070/8890", "percentage": "12.04%", "elapsed_time": "22m 28s", "remaining_time": "2h 44m 16s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.793422} {"loss": 0.66672331, "grad_norm": 1.73617995, "learning_rate": 9.865e-05, "token_acc": 0.78899083, "epoch": 1.20472441, "global_step/max_steps": "1071/8890", "percentage": "12.05%", "elapsed_time": "22m 29s", "remaining_time": "2h 44m 14s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.793416} {"loss": 0.63516796, "grad_norm": 2.01416802, "learning_rate": 9.865e-05, "token_acc": 0.79719189, "epoch": 1.20584927, "global_step/max_steps": "1072/8890", "percentage": "12.06%", "elapsed_time": "22m 30s", "remaining_time": "2h 44m 12s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.793504} {"loss": 0.75908947, "grad_norm": 1.78446591, "learning_rate": 9.864e-05, "token_acc": 0.77497477, "epoch": 1.20697413, "global_step/max_steps": "1073/8890", "percentage": "12.07%", "elapsed_time": "22m 32s", "remaining_time": "2h 44m 10s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.793584} {"loss": 0.9093616, "grad_norm": 2.11978698, "learning_rate": 9.864e-05, "token_acc": 0.73450292, "epoch": 1.20809899, "global_step/max_steps": "1074/8890", "percentage": "12.08%", "elapsed_time": "22m 33s", "remaining_time": "2h 44m 8s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.793662} {"loss": 0.73803884, "grad_norm": 2.09633088, "learning_rate": 9.863e-05, "token_acc": 0.78973105, "epoch": 1.20922385, "global_step/max_steps": "1075/8890", "percentage": "12.09%", "elapsed_time": "22m 34s", "remaining_time": "2h 44m 5s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.793739} {"loss": 0.84677219, "grad_norm": 1.80241406, "learning_rate": 9.863e-05, "token_acc": 0.75233645, "epoch": 1.21034871, "global_step/max_steps": "1076/8890", "percentage": "12.10%", "elapsed_time": "22m 35s", "remaining_time": "2h 44m 3s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.79379} {"loss": 0.60814703, "grad_norm": 1.72217762, "learning_rate": 9.862e-05, "token_acc": 0.81573034, "epoch": 1.21147357, "global_step/max_steps": "1077/8890", "percentage": "12.11%", "elapsed_time": "22m 36s", "remaining_time": "2h 44m 2s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.793839} {"loss": 0.59948599, "grad_norm": 2.06869173, "learning_rate": 9.862e-05, "token_acc": 0.80343008, "epoch": 1.21259843, "global_step/max_steps": "1078/8890", "percentage": "12.13%", "elapsed_time": "22m 37s", "remaining_time": "2h 43m 59s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.793945} {"loss": 0.74706054, "grad_norm": 1.97057009, "learning_rate": 9.862e-05, "token_acc": 0.76939655, "epoch": 1.21372328, "global_step/max_steps": "1079/8890", "percentage": "12.14%", "elapsed_time": "22m 38s", "remaining_time": "2h 43m 57s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.794009} {"loss": 0.75195932, "grad_norm": 2.05565906, "learning_rate": 9.861e-05, "token_acc": 0.77290837, "epoch": 1.21484814, "global_step/max_steps": "1080/8890", "percentage": "12.15%", "elapsed_time": "22m 40s", "remaining_time": "2h 43m 55s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.794063} {"loss": 0.8023026, "grad_norm": 2.06042337, "learning_rate": 9.861e-05, "token_acc": 0.75059102, "epoch": 1.215973, "global_step/max_steps": "1081/8890", "percentage": "12.16%", "elapsed_time": "22m 41s", "remaining_time": "2h 43m 53s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.794129} {"loss": 0.77973926, "grad_norm": 2.21525812, "learning_rate": 9.86e-05, "token_acc": 0.76802508, "epoch": 1.21709786, "global_step/max_steps": "1082/8890", "percentage": "12.17%", "elapsed_time": "22m 42s", "remaining_time": "2h 43m 50s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.794249} {"loss": 0.84061235, "grad_norm": 1.31887531, "learning_rate": 9.86e-05, "token_acc": 0.75685558, "epoch": 1.21822272, "global_step/max_steps": "1083/8890", "percentage": "12.18%", "elapsed_time": "22m 43s", "remaining_time": "2h 43m 49s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.79422} {"loss": 0.74007481, "grad_norm": 1.99410284, "learning_rate": 9.859e-05, "token_acc": 0.7690387, "epoch": 1.21934758, "global_step/max_steps": "1084/8890", "percentage": "12.19%", "elapsed_time": "22m 44s", "remaining_time": "2h 43m 47s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.794283} {"loss": 0.80555528, "grad_norm": 1.93882418, "learning_rate": 9.859e-05, "token_acc": 0.76564052, "epoch": 1.22047244, "global_step/max_steps": "1085/8890", "percentage": "12.20%", "elapsed_time": "22m 45s", "remaining_time": "2h 43m 45s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.794357} {"loss": 0.65129733, "grad_norm": 2.05676937, "learning_rate": 9.859e-05, "token_acc": 0.78796992, "epoch": 1.2215973, "global_step/max_steps": "1086/8890", "percentage": "12.22%", "elapsed_time": "22m 46s", "remaining_time": "2h 43m 42s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.794501} {"loss": 0.7885077, "grad_norm": 2.04051328, "learning_rate": 9.858e-05, "token_acc": 0.76506765, "epoch": 1.22272216, "global_step/max_steps": "1087/8890", "percentage": "12.23%", "elapsed_time": "22m 47s", "remaining_time": "2h 43m 39s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.794677} {"loss": 0.91892636, "grad_norm": 2.06090212, "learning_rate": 9.858e-05, "token_acc": 0.74018127, "epoch": 1.22384702, "global_step/max_steps": "1088/8890", "percentage": "12.24%", "elapsed_time": "22m 49s", "remaining_time": "2h 43m 37s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.794734} {"loss": 0.70759314, "grad_norm": 1.73331177, "learning_rate": 9.857e-05, "token_acc": 0.78571429, "epoch": 1.22497188, "global_step/max_steps": "1089/8890", "percentage": "12.25%", "elapsed_time": "22m 50s", "remaining_time": "2h 43m 35s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.794783} {"loss": 0.54648566, "grad_norm": 1.9723748, "learning_rate": 9.857e-05, "token_acc": 0.83825816, "epoch": 1.22609674, "global_step/max_steps": "1090/8890", "percentage": "12.26%", "elapsed_time": "22m 51s", "remaining_time": "2h 43m 33s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.794852} {"loss": 0.67345911, "grad_norm": 1.91769803, "learning_rate": 9.856e-05, "token_acc": 0.78463329, "epoch": 1.2272216, "global_step/max_steps": "1091/8890", "percentage": "12.27%", "elapsed_time": "22m 52s", "remaining_time": "2h 43m 31s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.794893} {"loss": 0.64625919, "grad_norm": 2.11492777, "learning_rate": 9.856e-05, "token_acc": 0.79872204, "epoch": 1.22834646, "global_step/max_steps": "1092/8890", "percentage": "12.28%", "elapsed_time": "22m 53s", "remaining_time": "2h 43m 28s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.795006} {"loss": 0.5706501, "grad_norm": 1.89577496, "learning_rate": 9.855e-05, "token_acc": 0.81145251, "epoch": 1.22947132, "global_step/max_steps": "1093/8890", "percentage": "12.29%", "elapsed_time": "22m 54s", "remaining_time": "2h 43m 27s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.795043} {"loss": 0.78868532, "grad_norm": 2.0141027, "learning_rate": 9.855e-05, "token_acc": 0.75919732, "epoch": 1.23059618, "global_step/max_steps": "1094/8890", "percentage": "12.31%", "elapsed_time": "22m 56s", "remaining_time": "2h 43m 26s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.795011} {"loss": 0.86263162, "grad_norm": 1.76695442, "learning_rate": 9.855e-05, "token_acc": 0.74666667, "epoch": 1.23172103, "global_step/max_steps": "1095/8890", "percentage": "12.32%", "elapsed_time": "22m 57s", "remaining_time": "2h 43m 23s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.795097} {"loss": 0.78824693, "grad_norm": 1.92022002, "learning_rate": 9.854e-05, "token_acc": 0.76076555, "epoch": 1.23284589, "global_step/max_steps": "1096/8890", "percentage": "12.33%", "elapsed_time": "22m 58s", "remaining_time": "2h 43m 21s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.795157} {"loss": 0.77690166, "grad_norm": 1.80399466, "learning_rate": 9.854e-05, "token_acc": 0.77543538, "epoch": 1.23397075, "global_step/max_steps": "1097/8890", "percentage": "12.34%", "elapsed_time": "22m 59s", "remaining_time": "2h 43m 21s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.79512} {"loss": 0.73467815, "grad_norm": 1.91468263, "learning_rate": 9.853e-05, "token_acc": 0.8041958, "epoch": 1.23509561, "global_step/max_steps": "1098/8890", "percentage": "12.35%", "elapsed_time": "23m 0s", "remaining_time": "2h 43m 18s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.79519} {"loss": 0.64090514, "grad_norm": 1.98462403, "learning_rate": 9.853e-05, "token_acc": 0.81395349, "epoch": 1.23622047, "global_step/max_steps": "1099/8890", "percentage": "12.36%", "elapsed_time": "23m 1s", "remaining_time": "2h 43m 16s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.795309} {"loss": 0.62572086, "grad_norm": 1.58866644, "learning_rate": 9.852e-05, "token_acc": 0.82337199, "epoch": 1.23734533, "global_step/max_steps": "1100/8890", "percentage": "12.37%", "elapsed_time": "23m 3s", "remaining_time": "2h 43m 15s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.795297} {"loss": 0.80398458, "grad_norm": 2.18419528, "learning_rate": 9.852e-05, "token_acc": 0.76102941, "epoch": 1.23847019, "global_step/max_steps": "1101/8890", "percentage": "12.38%", "elapsed_time": "23m 3s", "remaining_time": "2h 43m 10s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.79557} {"loss": 0.85030681, "grad_norm": 1.9526397, "learning_rate": 9.851e-05, "token_acc": 0.74358974, "epoch": 1.23959505, "global_step/max_steps": "1102/8890", "percentage": "12.40%", "elapsed_time": "23m 5s", "remaining_time": "2h 43m 8s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.795614} {"loss": 0.82295722, "grad_norm": 1.97495961, "learning_rate": 9.851e-05, "token_acc": 0.7447479, "epoch": 1.24071991, "global_step/max_steps": "1103/8890", "percentage": "12.41%", "elapsed_time": "23m 6s", "remaining_time": "2h 43m 6s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.795657} {"loss": 0.86308742, "grad_norm": 2.07444406, "learning_rate": 9.851e-05, "token_acc": 0.74033149, "epoch": 1.24184477, "global_step/max_steps": "1104/8890", "percentage": "12.42%", "elapsed_time": "23m 7s", "remaining_time": "2h 43m 4s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.795711} {"loss": 0.69598138, "grad_norm": 2.22381449, "learning_rate": 9.85e-05, "token_acc": 0.78759894, "epoch": 1.24296963, "global_step/max_steps": "1105/8890", "percentage": "12.43%", "elapsed_time": "23m 8s", "remaining_time": "2h 43m 2s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.795779} {"loss": 0.797225, "grad_norm": 2.22032022, "learning_rate": 9.85e-05, "token_acc": 0.75555556, "epoch": 1.24409449, "global_step/max_steps": "1106/8890", "percentage": "12.44%", "elapsed_time": "23m 9s", "remaining_time": "2h 43m 0s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.795894} {"loss": 0.66712105, "grad_norm": 1.62964308, "learning_rate": 9.849e-05, "token_acc": 0.80228471, "epoch": 1.24521935, "global_step/max_steps": "1107/8890", "percentage": "12.45%", "elapsed_time": "23m 10s", "remaining_time": "2h 42m 58s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.795949} {"loss": 0.89010376, "grad_norm": 1.84857643, "learning_rate": 9.849e-05, "token_acc": 0.74137931, "epoch": 1.24634421, "global_step/max_steps": "1108/8890", "percentage": "12.46%", "elapsed_time": "23m 11s", "remaining_time": "2h 42m 55s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.796084} {"loss": 0.71536535, "grad_norm": 2.06394243, "learning_rate": 9.848e-05, "token_acc": 0.7859116, "epoch": 1.24746907, "global_step/max_steps": "1109/8890", "percentage": "12.47%", "elapsed_time": "23m 12s", "remaining_time": "2h 42m 53s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.796148} {"loss": 0.88946301, "grad_norm": 2.20039058, "learning_rate": 9.848e-05, "token_acc": 0.75672515, "epoch": 1.24859393, "global_step/max_steps": "1110/8890", "percentage": "12.49%", "elapsed_time": "23m 14s", "remaining_time": "2h 42m 52s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.796127} {"loss": 0.89366436, "grad_norm": 1.92718816, "learning_rate": 9.847e-05, "token_acc": 0.74065685, "epoch": 1.24971879, "global_step/max_steps": "1111/8890", "percentage": "12.50%", "elapsed_time": "23m 15s", "remaining_time": "2h 42m 50s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.796193} {"loss": 0.74490941, "grad_norm": 1.77138281, "learning_rate": 9.847e-05, "token_acc": 0.79119754, "epoch": 1.25084364, "global_step/max_steps": "1112/8890", "percentage": "12.51%", "elapsed_time": "23m 16s", "remaining_time": "2h 42m 48s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.796255} {"loss": 0.700634, "grad_norm": 2.00650883, "learning_rate": 9.846e-05, "token_acc": 0.78571429, "epoch": 1.2519685, "global_step/max_steps": "1113/8890", "percentage": "12.52%", "elapsed_time": "23m 17s", "remaining_time": "2h 42m 46s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.796323} {"loss": 0.79623878, "grad_norm": 2.30159187, "learning_rate": 9.846e-05, "token_acc": 0.76033058, "epoch": 1.25309336, "global_step/max_steps": "1114/8890", "percentage": "12.53%", "elapsed_time": "23m 18s", "remaining_time": "2h 42m 44s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.79636} {"loss": 0.71941698, "grad_norm": 1.92615414, "learning_rate": 9.845e-05, "token_acc": 0.79635258, "epoch": 1.25421822, "global_step/max_steps": "1115/8890", "percentage": "12.54%", "elapsed_time": "23m 19s", "remaining_time": "2h 42m 39s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.796666} {"loss": 0.67675889, "grad_norm": 1.87069213, "learning_rate": 9.845e-05, "token_acc": 0.78974359, "epoch": 1.25534308, "global_step/max_steps": "1116/8890", "percentage": "12.55%", "elapsed_time": "23m 20s", "remaining_time": "2h 42m 36s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.796762} {"loss": 0.90716696, "grad_norm": 2.13439012, "learning_rate": 9.845e-05, "token_acc": 0.73728814, "epoch": 1.25646794, "global_step/max_steps": "1117/8890", "percentage": "12.56%", "elapsed_time": "23m 21s", "remaining_time": "2h 42m 32s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.797002} {"loss": 0.81707656, "grad_norm": 2.03423595, "learning_rate": 9.844e-05, "token_acc": 0.74935401, "epoch": 1.2575928, "global_step/max_steps": "1118/8890", "percentage": "12.58%", "elapsed_time": "23m 22s", "remaining_time": "2h 42m 30s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.797056} {"loss": 0.68956017, "grad_norm": 1.7935425, "learning_rate": 9.844e-05, "token_acc": 0.78208232, "epoch": 1.25871766, "global_step/max_steps": "1119/8890", "percentage": "12.59%", "elapsed_time": "23m 23s", "remaining_time": "2h 42m 25s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.797357} {"loss": 0.95166552, "grad_norm": 1.96147406, "learning_rate": 9.843e-05, "token_acc": 0.7230444, "epoch": 1.25984252, "global_step/max_steps": "1120/8890", "percentage": "12.60%", "elapsed_time": "23m 24s", "remaining_time": "2h 42m 24s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.797409} {"loss": 0.69087374, "grad_norm": 1.92166221, "learning_rate": 9.843e-05, "token_acc": 0.77887324, "epoch": 1.26096738, "global_step/max_steps": "1121/8890", "percentage": "12.61%", "elapsed_time": "23m 25s", "remaining_time": "2h 42m 21s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.797484} {"loss": 0.69973701, "grad_norm": 2.17909217, "learning_rate": 9.842e-05, "token_acc": 0.76883562, "epoch": 1.26209224, "global_step/max_steps": "1122/8890", "percentage": "12.62%", "elapsed_time": "23m 26s", "remaining_time": "2h 42m 19s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.797587} {"loss": 0.68686378, "grad_norm": 1.51092243, "learning_rate": 9.842e-05, "token_acc": 0.80584192, "epoch": 1.2632171, "global_step/max_steps": "1123/8890", "percentage": "12.63%", "elapsed_time": "23m 27s", "remaining_time": "2h 42m 17s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.797642} {"loss": 0.91594672, "grad_norm": 1.77316606, "learning_rate": 9.841e-05, "token_acc": 0.73962264, "epoch": 1.26434196, "global_step/max_steps": "1124/8890", "percentage": "12.64%", "elapsed_time": "23m 29s", "remaining_time": "2h 42m 16s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.797633} {"loss": 0.64410669, "grad_norm": 1.78486753, "learning_rate": 9.841e-05, "token_acc": 0.79445727, "epoch": 1.26546682, "global_step/max_steps": "1125/8890", "percentage": "12.65%", "elapsed_time": "23m 30s", "remaining_time": "2h 42m 14s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.797685} {"loss": 0.8414185, "grad_norm": 1.97072089, "learning_rate": 9.84e-05, "token_acc": 0.73927393, "epoch": 1.26659168, "global_step/max_steps": "1126/8890", "percentage": "12.67%", "elapsed_time": "23m 31s", "remaining_time": "2h 42m 12s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.797732} {"loss": 0.7751345, "grad_norm": 1.6855396, "learning_rate": 9.84e-05, "token_acc": 0.78769841, "epoch": 1.26771654, "global_step/max_steps": "1127/8890", "percentage": "12.68%", "elapsed_time": "23m 32s", "remaining_time": "2h 42m 11s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.797722} {"loss": 0.90871674, "grad_norm": 1.68130064, "learning_rate": 9.839e-05, "token_acc": 0.74692443, "epoch": 1.26884139, "global_step/max_steps": "1128/8890", "percentage": "12.69%", "elapsed_time": "23m 33s", "remaining_time": "2h 42m 8s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.797864} {"loss": 0.71580088, "grad_norm": 1.91439712, "learning_rate": 9.839e-05, "token_acc": 0.78571429, "epoch": 1.26996625, "global_step/max_steps": "1129/8890", "percentage": "12.70%", "elapsed_time": "23m 34s", "remaining_time": "2h 42m 5s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.797975} {"loss": 0.83513236, "grad_norm": 1.99769139, "learning_rate": 9.839e-05, "token_acc": 0.74715026, "epoch": 1.27109111, "global_step/max_steps": "1130/8890", "percentage": "12.71%", "elapsed_time": "23m 35s", "remaining_time": "2h 42m 3s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.798044} {"loss": 0.71577013, "grad_norm": 1.80858648, "learning_rate": 9.838e-05, "token_acc": 0.77945619, "epoch": 1.27221597, "global_step/max_steps": "1131/8890", "percentage": "12.72%", "elapsed_time": "23m 37s", "remaining_time": "2h 42m 1s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.798115} {"loss": 0.69783211, "grad_norm": 1.91983724, "learning_rate": 9.838e-05, "token_acc": 0.77491601, "epoch": 1.27334083, "global_step/max_steps": "1132/8890", "percentage": "12.73%", "elapsed_time": "23m 38s", "remaining_time": "2h 41m 59s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.798179} {"loss": 0.9586184, "grad_norm": 1.95172954, "learning_rate": 9.837e-05, "token_acc": 0.72057502, "epoch": 1.27446569, "global_step/max_steps": "1133/8890", "percentage": "12.74%", "elapsed_time": "23m 39s", "remaining_time": "2h 41m 58s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.798162} {"loss": 0.70323998, "grad_norm": 2.05635405, "learning_rate": 9.837e-05, "token_acc": 0.77906977, "epoch": 1.27559055, "global_step/max_steps": "1134/8890", "percentage": "12.76%", "elapsed_time": "23m 40s", "remaining_time": "2h 41m 56s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.798227} {"loss": 0.79138297, "grad_norm": 2.02168941, "learning_rate": 9.836e-05, "token_acc": 0.75368899, "epoch": 1.27671541, "global_step/max_steps": "1135/8890", "percentage": "12.77%", "elapsed_time": "23m 41s", "remaining_time": "2h 41m 53s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.798363} {"loss": 0.5243752, "grad_norm": 1.9038254, "learning_rate": 9.836e-05, "token_acc": 0.82158921, "epoch": 1.27784027, "global_step/max_steps": "1136/8890", "percentage": "12.78%", "elapsed_time": "23m 42s", "remaining_time": "2h 41m 50s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.798479} {"loss": 0.5047226, "grad_norm": 1.92292917, "learning_rate": 9.835e-05, "token_acc": 0.8431912, "epoch": 1.27896513, "global_step/max_steps": "1137/8890", "percentage": "12.79%", "elapsed_time": "23m 43s", "remaining_time": "2h 41m 48s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.798602} {"loss": 0.86565179, "grad_norm": 1.96324182, "learning_rate": 9.835e-05, "token_acc": 0.73197115, "epoch": 1.28008999, "global_step/max_steps": "1138/8890", "percentage": "12.80%", "elapsed_time": "23m 44s", "remaining_time": "2h 41m 46s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.798677} {"loss": 0.76505494, "grad_norm": 1.91608214, "learning_rate": 9.834e-05, "token_acc": 0.77198364, "epoch": 1.28121485, "global_step/max_steps": "1139/8890", "percentage": "12.81%", "elapsed_time": "23m 46s", "remaining_time": "2h 41m 44s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.798736} {"loss": 0.66064572, "grad_norm": 2.03440952, "learning_rate": 9.834e-05, "token_acc": 0.80530973, "epoch": 1.28233971, "global_step/max_steps": "1140/8890", "percentage": "12.82%", "elapsed_time": "23m 47s", "remaining_time": "2h 41m 41s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.798833} {"loss": 0.57873577, "grad_norm": 1.83865368, "learning_rate": 9.833e-05, "token_acc": 0.8220339, "epoch": 1.28346457, "global_step/max_steps": "1141/8890", "percentage": "12.83%", "elapsed_time": "23m 48s", "remaining_time": "2h 41m 38s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.798965} {"loss": 0.73223877, "grad_norm": 1.84835327, "learning_rate": 9.833e-05, "token_acc": 0.76674641, "epoch": 1.28458943, "global_step/max_steps": "1142/8890", "percentage": "12.85%", "elapsed_time": "23m 49s", "remaining_time": "2h 41m 36s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.799031} {"loss": 0.68259621, "grad_norm": 1.60138822, "learning_rate": 9.832e-05, "token_acc": 0.79623477, "epoch": 1.28571429, "global_step/max_steps": "1143/8890", "percentage": "12.86%", "elapsed_time": "23m 50s", "remaining_time": "2h 41m 34s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.799099} {"loss": 0.70708251, "grad_norm": 1.85737443, "learning_rate": 9.832e-05, "token_acc": 0.78280543, "epoch": 1.28683915, "global_step/max_steps": "1144/8890", "percentage": "12.87%", "elapsed_time": "23m 51s", "remaining_time": "2h 41m 32s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.799136} {"loss": 0.82147205, "grad_norm": 1.73913789, "learning_rate": 9.831e-05, "token_acc": 0.74028436, "epoch": 1.287964, "global_step/max_steps": "1145/8890", "percentage": "12.88%", "elapsed_time": "23m 52s", "remaining_time": "2h 41m 31s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.799186} {"loss": 0.82103205, "grad_norm": 2.11848474, "learning_rate": 9.831e-05, "token_acc": 0.74782609, "epoch": 1.28908886, "global_step/max_steps": "1146/8890", "percentage": "12.89%", "elapsed_time": "23m 53s", "remaining_time": "2h 41m 29s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.799244} {"loss": 0.74365449, "grad_norm": 1.65769196, "learning_rate": 9.83e-05, "token_acc": 0.77828467, "epoch": 1.29021372, "global_step/max_steps": "1147/8890", "percentage": "12.90%", "elapsed_time": "23m 54s", "remaining_time": "2h 41m 27s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.799312} {"loss": 0.66875225, "grad_norm": 1.79958189, "learning_rate": 9.83e-05, "token_acc": 0.78953488, "epoch": 1.29133858, "global_step/max_steps": "1148/8890", "percentage": "12.91%", "elapsed_time": "23m 56s", "remaining_time": "2h 41m 25s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.799369} {"loss": 0.62838817, "grad_norm": 2.18545938, "learning_rate": 9.83e-05, "token_acc": 0.8029316, "epoch": 1.29246344, "global_step/max_steps": "1149/8890", "percentage": "12.92%", "elapsed_time": "23m 57s", "remaining_time": "2h 41m 21s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.799537} {"loss": 0.73875374, "grad_norm": 1.94570005, "learning_rate": 9.829e-05, "token_acc": 0.79099307, "epoch": 1.2935883, "global_step/max_steps": "1150/8890", "percentage": "12.94%", "elapsed_time": "23m 58s", "remaining_time": "2h 41m 19s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.799598} {"loss": 0.77581525, "grad_norm": 2.46495223, "learning_rate": 9.829e-05, "token_acc": 0.76565008, "epoch": 1.29471316, "global_step/max_steps": "1151/8890", "percentage": "12.95%", "elapsed_time": "23m 59s", "remaining_time": "2h 41m 17s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.799665} {"loss": 0.58880395, "grad_norm": 1.61463773, "learning_rate": 9.828e-05, "token_acc": 0.80712788, "epoch": 1.29583802, "global_step/max_steps": "1152/8890", "percentage": "12.96%", "elapsed_time": "24m 0s", "remaining_time": "2h 41m 15s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.799784} {"loss": 0.82058001, "grad_norm": 1.9203676, "learning_rate": 9.828e-05, "token_acc": 0.75693674, "epoch": 1.29696288, "global_step/max_steps": "1153/8890", "percentage": "12.97%", "elapsed_time": "24m 1s", "remaining_time": "2h 41m 12s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.799857} {"loss": 0.78231573, "grad_norm": 1.87149692, "learning_rate": 9.827e-05, "token_acc": 0.78595318, "epoch": 1.29808774, "global_step/max_steps": "1154/8890", "percentage": "12.98%", "elapsed_time": "24m 2s", "remaining_time": "2h 41m 10s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.799983} {"loss": 0.83597517, "grad_norm": 1.90934527, "learning_rate": 9.827e-05, "token_acc": 0.7472119, "epoch": 1.2992126, "global_step/max_steps": "1155/8890", "percentage": "12.99%", "elapsed_time": "24m 3s", "remaining_time": "2h 41m 7s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.800107} {"loss": 0.75200957, "grad_norm": 1.88693297, "learning_rate": 9.826e-05, "token_acc": 0.76068376, "epoch": 1.30033746, "global_step/max_steps": "1156/8890", "percentage": "13.00%", "elapsed_time": "24m 4s", "remaining_time": "2h 41m 5s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.80018} {"loss": 0.69066203, "grad_norm": 2.25816917, "learning_rate": 9.826e-05, "token_acc": 0.78726968, "epoch": 1.30146232, "global_step/max_steps": "1157/8890", "percentage": "13.01%", "elapsed_time": "24m 5s", "remaining_time": "2h 41m 2s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.80034} {"loss": 0.89955258, "grad_norm": 2.0555737, "learning_rate": 9.825e-05, "token_acc": 0.7375, "epoch": 1.30258718, "global_step/max_steps": "1158/8890", "percentage": "13.03%", "elapsed_time": "24m 6s", "remaining_time": "2h 41m 0s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.800401} {"loss": 0.88888222, "grad_norm": 1.94304144, "learning_rate": 9.825e-05, "token_acc": 0.74054054, "epoch": 1.30371204, "global_step/max_steps": "1159/8890", "percentage": "13.04%", "elapsed_time": "24m 7s", "remaining_time": "2h 40m 57s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.800519} {"loss": 0.67443532, "grad_norm": 1.94822013, "learning_rate": 9.824e-05, "token_acc": 0.79518072, "epoch": 1.3048369, "global_step/max_steps": "1160/8890", "percentage": "13.05%", "elapsed_time": "24m 8s", "remaining_time": "2h 40m 55s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.80058} {"loss": 0.76343167, "grad_norm": 2.03749585, "learning_rate": 9.824e-05, "token_acc": 0.75386598, "epoch": 1.30596175, "global_step/max_steps": "1161/8890", "percentage": "13.06%", "elapsed_time": "24m 10s", "remaining_time": "2h 40m 53s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.800644} {"loss": 0.6768223, "grad_norm": 1.73200929, "learning_rate": 9.823e-05, "token_acc": 0.79531443, "epoch": 1.30708661, "global_step/max_steps": "1162/8890", "percentage": "13.07%", "elapsed_time": "24m 11s", "remaining_time": "2h 40m 51s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.800682} {"loss": 0.89577764, "grad_norm": 2.06142306, "learning_rate": 9.823e-05, "token_acc": 0.76206897, "epoch": 1.30821147, "global_step/max_steps": "1163/8890", "percentage": "13.08%", "elapsed_time": "24m 12s", "remaining_time": "2h 40m 49s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.800743} {"loss": 0.68265283, "grad_norm": 1.82373273, "learning_rate": 9.822e-05, "token_acc": 0.78451883, "epoch": 1.30933633, "global_step/max_steps": "1164/8890", "percentage": "13.09%", "elapsed_time": "24m 13s", "remaining_time": "2h 40m 48s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.800727} {"loss": 0.74625427, "grad_norm": 1.83115089, "learning_rate": 9.822e-05, "token_acc": 0.77518558, "epoch": 1.31046119, "global_step/max_steps": "1165/8890", "percentage": "13.10%", "elapsed_time": "24m 14s", "remaining_time": "2h 40m 47s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.800763} {"loss": 0.62975395, "grad_norm": 1.93634713, "learning_rate": 9.821e-05, "token_acc": 0.81012658, "epoch": 1.31158605, "global_step/max_steps": "1166/8890", "percentage": "13.12%", "elapsed_time": "24m 16s", "remaining_time": "2h 40m 45s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.80081} {"loss": 0.75302535, "grad_norm": 1.87742138, "learning_rate": 9.821e-05, "token_acc": 0.76914153, "epoch": 1.31271091, "global_step/max_steps": "1167/8890", "percentage": "13.13%", "elapsed_time": "24m 17s", "remaining_time": "2h 40m 43s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.800854} {"loss": 0.74573553, "grad_norm": 2.1197648, "learning_rate": 9.82e-05, "token_acc": 0.78313253, "epoch": 1.31383577, "global_step/max_steps": "1168/8890", "percentage": "13.14%", "elapsed_time": "24m 18s", "remaining_time": "2h 40m 40s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.800967} {"loss": 0.77993381, "grad_norm": 1.88526511, "learning_rate": 9.82e-05, "token_acc": 0.76154806, "epoch": 1.31496063, "global_step/max_steps": "1169/8890", "percentage": "13.15%", "elapsed_time": "24m 19s", "remaining_time": "2h 40m 38s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.801081} {"loss": 0.72837245, "grad_norm": 1.64253533, "learning_rate": 9.819e-05, "token_acc": 0.77799228, "epoch": 1.31608549, "global_step/max_steps": "1170/8890", "percentage": "13.16%", "elapsed_time": "24m 20s", "remaining_time": "2h 40m 36s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.801147} {"loss": 0.76211762, "grad_norm": 1.73266339, "learning_rate": 9.819e-05, "token_acc": 0.7786333, "epoch": 1.31721035, "global_step/max_steps": "1171/8890", "percentage": "13.17%", "elapsed_time": "24m 21s", "remaining_time": "2h 40m 34s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.80119} {"loss": 0.77152556, "grad_norm": 1.85260725, "learning_rate": 9.818e-05, "token_acc": 0.77315914, "epoch": 1.31833521, "global_step/max_steps": "1172/8890", "percentage": "13.18%", "elapsed_time": "24m 22s", "remaining_time": "2h 40m 33s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.80114} {"loss": 0.82493925, "grad_norm": 1.93475831, "learning_rate": 9.818e-05, "token_acc": 0.75440529, "epoch": 1.31946007, "global_step/max_steps": "1173/8890", "percentage": "13.19%", "elapsed_time": "24m 23s", "remaining_time": "2h 40m 31s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.801256} {"loss": 0.71915507, "grad_norm": 1.8014034, "learning_rate": 9.817e-05, "token_acc": 0.80769231, "epoch": 1.32058493, "global_step/max_steps": "1174/8890", "percentage": "13.21%", "elapsed_time": "24m 25s", "remaining_time": "2h 40m 30s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.801244} {"loss": 0.72703075, "grad_norm": 1.83340168, "learning_rate": 9.817e-05, "token_acc": 0.78604651, "epoch": 1.32170979, "global_step/max_steps": "1175/8890", "percentage": "13.22%", "elapsed_time": "24m 26s", "remaining_time": "2h 40m 27s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.801369} {"loss": 0.90025139, "grad_norm": 1.79811144, "learning_rate": 9.816e-05, "token_acc": 0.73210634, "epoch": 1.32283465, "global_step/max_steps": "1176/8890", "percentage": "13.23%", "elapsed_time": "24m 27s", "remaining_time": "2h 40m 25s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.801407} {"loss": 0.69905382, "grad_norm": 1.75056422, "learning_rate": 9.816e-05, "token_acc": 0.77695167, "epoch": 1.32395951, "global_step/max_steps": "1177/8890", "percentage": "13.24%", "elapsed_time": "24m 28s", "remaining_time": "2h 40m 23s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.801472} {"loss": 0.72953647, "grad_norm": 1.6244204, "learning_rate": 9.815e-05, "token_acc": 0.7681592, "epoch": 1.32508436, "global_step/max_steps": "1178/8890", "percentage": "13.25%", "elapsed_time": "24m 29s", "remaining_time": "2h 40m 21s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.801534} {"loss": 0.63279998, "grad_norm": 1.72434831, "learning_rate": 9.815e-05, "token_acc": 0.79879518, "epoch": 1.32620922, "global_step/max_steps": "1179/8890", "percentage": "13.26%", "elapsed_time": "24m 30s", "remaining_time": "2h 40m 18s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.801656} {"loss": 0.85406876, "grad_norm": 1.82429302, "learning_rate": 9.814e-05, "token_acc": 0.75158562, "epoch": 1.32733408, "global_step/max_steps": "1180/8890", "percentage": "13.27%", "elapsed_time": "24m 31s", "remaining_time": "2h 40m 16s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.80171} {"loss": 0.70454788, "grad_norm": 1.7290256, "learning_rate": 9.814e-05, "token_acc": 0.78442438, "epoch": 1.32845894, "global_step/max_steps": "1181/8890", "percentage": "13.28%", "elapsed_time": "24m 32s", "remaining_time": "2h 40m 14s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.801809} {"loss": 0.73538327, "grad_norm": 1.77526569, "learning_rate": 9.813e-05, "token_acc": 0.80141011, "epoch": 1.3295838, "global_step/max_steps": "1182/8890", "percentage": "13.30%", "elapsed_time": "24m 33s", "remaining_time": "2h 40m 11s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.801916} {"loss": 0.68643999, "grad_norm": 1.90847147, "learning_rate": 9.813e-05, "token_acc": 0.79395604, "epoch": 1.33070866, "global_step/max_steps": "1183/8890", "percentage": "13.31%", "elapsed_time": "24m 34s", "remaining_time": "2h 40m 8s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.802075} {"loss": 0.79469246, "grad_norm": 1.99096823, "learning_rate": 9.812e-05, "token_acc": 0.76336746, "epoch": 1.33183352, "global_step/max_steps": "1184/8890", "percentage": "13.32%", "elapsed_time": "24m 36s", "remaining_time": "2h 40m 6s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.802132} {"loss": 0.71630049, "grad_norm": 1.79249036, "learning_rate": 9.812e-05, "token_acc": 0.78607595, "epoch": 1.33295838, "global_step/max_steps": "1185/8890", "percentage": "13.33%", "elapsed_time": "24m 37s", "remaining_time": "2h 40m 5s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.802184} {"loss": 0.77131236, "grad_norm": 1.81674826, "learning_rate": 9.811e-05, "token_acc": 0.76727642, "epoch": 1.33408324, "global_step/max_steps": "1186/8890", "percentage": "13.34%", "elapsed_time": "24m 38s", "remaining_time": "2h 40m 2s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.802305} {"loss": 0.73970115, "grad_norm": 1.79600728, "learning_rate": 9.811e-05, "token_acc": 0.77425945, "epoch": 1.3352081, "global_step/max_steps": "1187/8890", "percentage": "13.35%", "elapsed_time": "24m 39s", "remaining_time": "2h 40m 0s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.802365} {"loss": 0.68692434, "grad_norm": 1.6907357, "learning_rate": 9.81e-05, "token_acc": 0.79233871, "epoch": 1.33633296, "global_step/max_steps": "1188/8890", "percentage": "13.36%", "elapsed_time": "24m 40s", "remaining_time": "2h 39m 57s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.80249} {"loss": 0.7611798, "grad_norm": 1.85425889, "learning_rate": 9.81e-05, "token_acc": 0.77062147, "epoch": 1.33745782, "global_step/max_steps": "1189/8890", "percentage": "13.37%", "elapsed_time": "24m 41s", "remaining_time": "2h 39m 55s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.802547} {"loss": 0.82622695, "grad_norm": 1.85047925, "learning_rate": 9.809e-05, "token_acc": 0.73995536, "epoch": 1.33858268, "global_step/max_steps": "1190/8890", "percentage": "13.39%", "elapsed_time": "24m 42s", "remaining_time": "2h 39m 52s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.802727} {"loss": 0.7734524, "grad_norm": 1.81228185, "learning_rate": 9.809e-05, "token_acc": 0.77808471, "epoch": 1.33970754, "global_step/max_steps": "1191/8890", "percentage": "13.40%", "elapsed_time": "24m 43s", "remaining_time": "2h 39m 50s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.802774} {"loss": 0.64274025, "grad_norm": 2.11122394, "learning_rate": 9.808e-05, "token_acc": 0.80147059, "epoch": 1.3408324, "global_step/max_steps": "1192/8890", "percentage": "13.41%", "elapsed_time": "24m 44s", "remaining_time": "2h 39m 48s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.802819} {"loss": 0.50810802, "grad_norm": 1.30592275, "learning_rate": 9.808e-05, "token_acc": 0.83015742, "epoch": 1.34195726, "global_step/max_steps": "1193/8890", "percentage": "13.42%", "elapsed_time": "24m 46s", "remaining_time": "2h 39m 48s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.802757} {"loss": 0.49726599, "grad_norm": 1.7677902, "learning_rate": 9.807e-05, "token_acc": 0.82588598, "epoch": 1.34308211, "global_step/max_steps": "1194/8890", "percentage": "13.43%", "elapsed_time": "24m 47s", "remaining_time": "2h 39m 46s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.802806} {"loss": 0.59080195, "grad_norm": 1.67075372, "learning_rate": 9.807e-05, "token_acc": 0.81556503, "epoch": 1.34420697, "global_step/max_steps": "1195/8890", "percentage": "13.44%", "elapsed_time": "24m 48s", "remaining_time": "2h 39m 44s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.802871} {"loss": 0.67766994, "grad_norm": 1.95354569, "learning_rate": 9.806e-05, "token_acc": 0.78933333, "epoch": 1.34533183, "global_step/max_steps": "1196/8890", "percentage": "13.45%", "elapsed_time": "24m 49s", "remaining_time": "2h 39m 42s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.80294} {"loss": 0.88724703, "grad_norm": 2.09912848, "learning_rate": 9.806e-05, "token_acc": 0.75206612, "epoch": 1.34645669, "global_step/max_steps": "1197/8890", "percentage": "13.46%", "elapsed_time": "24m 50s", "remaining_time": "2h 39m 40s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.802962} {"loss": 0.9282819, "grad_norm": 1.83315432, "learning_rate": 9.805e-05, "token_acc": 0.7437276, "epoch": 1.34758155, "global_step/max_steps": "1198/8890", "percentage": "13.48%", "elapsed_time": "24m 51s", "remaining_time": "2h 39m 38s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.803008} {"loss": 0.76104581, "grad_norm": 1.95398581, "learning_rate": 9.805e-05, "token_acc": 0.76211454, "epoch": 1.34870641, "global_step/max_steps": "1199/8890", "percentage": "13.49%", "elapsed_time": "24m 53s", "remaining_time": "2h 39m 37s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.803054} {"loss": 0.53057969, "grad_norm": 1.38303638, "learning_rate": 9.804e-05, "token_acc": 0.84333035, "epoch": 1.34983127, "global_step/max_steps": "1200/8890", "percentage": "13.50%", "elapsed_time": "24m 54s", "remaining_time": "2h 39m 35s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.803108} {"eval_loss": 0.90420192, "eval_runtime": 31.5947, "eval_samples_per_second": 25.416, "eval_steps_per_second": 3.197, "eval_token_acc": 0.73965947, "epoch": 1.34983127, "global_step/max_steps": "1200/8890", "percentage": "13.50%", "elapsed_time": "25m 25s", "remaining_time": "2h 42m 57s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.786474} {"loss": 0.88330352, "grad_norm": 1.97117341, "learning_rate": 9.804e-05, "token_acc": 0.74223602, "epoch": 1.35095613, "global_step/max_steps": "1201/8890", "percentage": "13.51%", "elapsed_time": "25m 41s", "remaining_time": "2h 44m 26s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.779281} {"loss": 0.65161645, "grad_norm": 1.94770455, "learning_rate": 9.803e-05, "token_acc": 0.80188679, "epoch": 1.35208099, "global_step/max_steps": "1202/8890", "percentage": "13.52%", "elapsed_time": "25m 42s", "remaining_time": "2h 44m 24s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.779387} {"loss": 0.69067997, "grad_norm": 2.04227781, "learning_rate": 9.803e-05, "token_acc": 0.7706422, "epoch": 1.35320585, "global_step/max_steps": "1203/8890", "percentage": "13.53%", "elapsed_time": "25m 43s", "remaining_time": "2h 44m 21s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.77949} {"loss": 0.67281342, "grad_norm": 1.84964681, "learning_rate": 9.802e-05, "token_acc": 0.81894484, "epoch": 1.35433071, "global_step/max_steps": "1204/8890", "percentage": "13.54%", "elapsed_time": "25m 44s", "remaining_time": "2h 44m 18s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.779631} {"loss": 0.64624119, "grad_norm": 1.6206181, "learning_rate": 9.801e-05, "token_acc": 0.79186834, "epoch": 1.35545557, "global_step/max_steps": "1205/8890", "percentage": "13.55%", "elapsed_time": "25m 45s", "remaining_time": "2h 44m 17s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.779632} {"loss": 0.88590729, "grad_norm": 2.33534336, "learning_rate": 9.801e-05, "token_acc": 0.75588235, "epoch": 1.35658043, "global_step/max_steps": "1206/8890", "percentage": "13.57%", "elapsed_time": "25m 46s", "remaining_time": "2h 44m 14s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.779712} {"loss": 0.62156206, "grad_norm": 1.57240856, "learning_rate": 9.8e-05, "token_acc": 0.80914513, "epoch": 1.35770529, "global_step/max_steps": "1207/8890", "percentage": "13.58%", "elapsed_time": "25m 47s", "remaining_time": "2h 44m 11s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.779877} {"loss": 0.66032588, "grad_norm": 2.01880813, "learning_rate": 9.8e-05, "token_acc": 0.78991597, "epoch": 1.35883015, "global_step/max_steps": "1208/8890", "percentage": "13.59%", "elapsed_time": "25m 48s", "remaining_time": "2h 44m 9s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.779955} {"loss": 0.65113884, "grad_norm": 1.65955186, "learning_rate": 9.799e-05, "token_acc": 0.81309524, "epoch": 1.35995501, "global_step/max_steps": "1209/8890", "percentage": "13.60%", "elapsed_time": "25m 49s", "remaining_time": "2h 44m 7s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780012} {"loss": 0.64560986, "grad_norm": 2.32425308, "learning_rate": 9.799e-05, "token_acc": 0.80246914, "epoch": 1.36107987, "global_step/max_steps": "1210/8890", "percentage": "13.61%", "elapsed_time": "25m 51s", "remaining_time": "2h 44m 4s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780117} {"loss": 0.71395004, "grad_norm": 1.88752234, "learning_rate": 9.798e-05, "token_acc": 0.7662037, "epoch": 1.36220472, "global_step/max_steps": "1211/8890", "percentage": "13.62%", "elapsed_time": "25m 52s", "remaining_time": "2h 44m 2s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780175} {"loss": 0.72807908, "grad_norm": 1.58340609, "learning_rate": 9.798e-05, "token_acc": 0.79648049, "epoch": 1.36332958, "global_step/max_steps": "1212/8890", "percentage": "13.63%", "elapsed_time": "25m 53s", "remaining_time": "2h 44m 0s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.78025} {"loss": 0.82988977, "grad_norm": 1.69538784, "learning_rate": 9.797e-05, "token_acc": 0.75954861, "epoch": 1.36445444, "global_step/max_steps": "1213/8890", "percentage": "13.64%", "elapsed_time": "25m 54s", "remaining_time": "2h 43m 58s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780324} {"loss": 0.76775265, "grad_norm": 1.9305402, "learning_rate": 9.797e-05, "token_acc": 0.75989783, "epoch": 1.3655793, "global_step/max_steps": "1214/8890", "percentage": "13.66%", "elapsed_time": "25m 55s", "remaining_time": "2h 43m 56s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780388} {"loss": 0.8261646, "grad_norm": 1.8348453, "learning_rate": 9.796e-05, "token_acc": 0.75901133, "epoch": 1.36670416, "global_step/max_steps": "1215/8890", "percentage": "13.67%", "elapsed_time": "25m 56s", "remaining_time": "2h 43m 54s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780438} {"loss": 0.63935959, "grad_norm": 1.93466926, "learning_rate": 9.796e-05, "token_acc": 0.79805014, "epoch": 1.36782902, "global_step/max_steps": "1216/8890", "percentage": "13.68%", "elapsed_time": "25m 57s", "remaining_time": "2h 43m 49s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780691} {"loss": 0.76197231, "grad_norm": 1.86782336, "learning_rate": 9.795e-05, "token_acc": 0.78352693, "epoch": 1.36895388, "global_step/max_steps": "1217/8890", "percentage": "13.69%", "elapsed_time": "25m 58s", "remaining_time": "2h 43m 46s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780822} {"loss": 0.7790516, "grad_norm": 2.06659842, "learning_rate": 9.795e-05, "token_acc": 0.77496484, "epoch": 1.37007874, "global_step/max_steps": "1218/8890", "percentage": "13.70%", "elapsed_time": "25m 59s", "remaining_time": "2h 43m 44s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.780899} {"loss": 0.53369707, "grad_norm": 2.04403305, "learning_rate": 9.794e-05, "token_acc": 0.82352941, "epoch": 1.3712036, "global_step/max_steps": "1219/8890", "percentage": "13.71%", "elapsed_time": "26m 0s", "remaining_time": "2h 43m 41s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781058} {"loss": 0.70299149, "grad_norm": 1.9480803, "learning_rate": 9.794e-05, "token_acc": 0.77247191, "epoch": 1.37232846, "global_step/max_steps": "1220/8890", "percentage": "13.72%", "elapsed_time": "26m 1s", "remaining_time": "2h 43m 38s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781176} {"loss": 0.7514329, "grad_norm": 1.79735243, "learning_rate": 9.793e-05, "token_acc": 0.77419355, "epoch": 1.37345332, "global_step/max_steps": "1221/8890", "percentage": "13.73%", "elapsed_time": "26m 2s", "remaining_time": "2h 43m 35s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781296} {"loss": 0.87790596, "grad_norm": 1.87553275, "learning_rate": 9.793e-05, "token_acc": 0.75496689, "epoch": 1.37457818, "global_step/max_steps": "1222/8890", "percentage": "13.75%", "elapsed_time": "26m 3s", "remaining_time": "2h 43m 33s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781353} {"loss": 0.69905585, "grad_norm": 2.19227171, "learning_rate": 9.792e-05, "token_acc": 0.76183844, "epoch": 1.37570304, "global_step/max_steps": "1223/8890", "percentage": "13.76%", "elapsed_time": "26m 5s", "remaining_time": "2h 43m 31s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781454} {"loss": 0.61037898, "grad_norm": 1.60554385, "learning_rate": 9.792e-05, "token_acc": 0.80879346, "epoch": 1.3768279, "global_step/max_steps": "1224/8890", "percentage": "13.77%", "elapsed_time": "26m 6s", "remaining_time": "2h 43m 29s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781488} {"loss": 0.64613843, "grad_norm": 2.13860178, "learning_rate": 9.791e-05, "token_acc": 0.7792, "epoch": 1.37795276, "global_step/max_steps": "1225/8890", "percentage": "13.78%", "elapsed_time": "26m 7s", "remaining_time": "2h 43m 26s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781647} {"loss": 0.95227695, "grad_norm": 1.7272296, "learning_rate": 9.79e-05, "token_acc": 0.7260788, "epoch": 1.37907762, "global_step/max_steps": "1226/8890", "percentage": "13.79%", "elapsed_time": "26m 8s", "remaining_time": "2h 43m 24s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781714} {"loss": 0.58215958, "grad_norm": 1.7826376, "learning_rate": 9.79e-05, "token_acc": 0.81299525, "epoch": 1.38020247, "global_step/max_steps": "1227/8890", "percentage": "13.80%", "elapsed_time": "26m 9s", "remaining_time": "2h 43m 21s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.78179} {"loss": 0.70489538, "grad_norm": 2.31034851, "learning_rate": 9.789e-05, "token_acc": 0.79937792, "epoch": 1.38132733, "global_step/max_steps": "1228/8890", "percentage": "13.81%", "elapsed_time": "26m 10s", "remaining_time": "2h 43m 18s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.781945} {"loss": 0.70836997, "grad_norm": 2.02757859, "learning_rate": 9.789e-05, "token_acc": 0.78437844, "epoch": 1.38245219, "global_step/max_steps": "1229/8890", "percentage": "13.82%", "elapsed_time": "26m 11s", "remaining_time": "2h 43m 16s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782018} {"loss": 0.85810971, "grad_norm": 2.00391722, "learning_rate": 9.788e-05, "token_acc": 0.75377468, "epoch": 1.38357705, "global_step/max_steps": "1230/8890", "percentage": "13.84%", "elapsed_time": "26m 12s", "remaining_time": "2h 43m 13s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782133} {"loss": 0.72013879, "grad_norm": 1.57946348, "learning_rate": 9.788e-05, "token_acc": 0.79771784, "epoch": 1.38470191, "global_step/max_steps": "1231/8890", "percentage": "13.85%", "elapsed_time": "26m 13s", "remaining_time": "2h 43m 12s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782107} {"loss": 0.75226676, "grad_norm": 1.81551945, "learning_rate": 9.787e-05, "token_acc": 0.77250608, "epoch": 1.38582677, "global_step/max_steps": "1232/8890", "percentage": "13.86%", "elapsed_time": "26m 14s", "remaining_time": "2h 43m 9s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782231} {"loss": 0.7015236, "grad_norm": 2.19617844, "learning_rate": 9.787e-05, "token_acc": 0.78778135, "epoch": 1.38695163, "global_step/max_steps": "1233/8890", "percentage": "13.87%", "elapsed_time": "26m 16s", "remaining_time": "2h 43m 7s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782312} {"loss": 0.90881115, "grad_norm": 1.95393717, "learning_rate": 9.786e-05, "token_acc": 0.73574561, "epoch": 1.38807649, "global_step/max_steps": "1234/8890", "percentage": "13.88%", "elapsed_time": "26m 17s", "remaining_time": "2h 43m 5s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782376} {"loss": 0.57966226, "grad_norm": 1.64032185, "learning_rate": 9.786e-05, "token_acc": 0.84185493, "epoch": 1.38920135, "global_step/max_steps": "1235/8890", "percentage": "13.89%", "elapsed_time": "26m 18s", "remaining_time": "2h 43m 3s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.78248} {"loss": 0.7190603, "grad_norm": 1.7642529, "learning_rate": 9.785e-05, "token_acc": 0.76676677, "epoch": 1.39032621, "global_step/max_steps": "1236/8890", "percentage": "13.90%", "elapsed_time": "26m 19s", "remaining_time": "2h 43m 1s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782485} {"loss": 0.71721494, "grad_norm": 1.73216522, "learning_rate": 9.785e-05, "token_acc": 0.7981756, "epoch": 1.39145107, "global_step/max_steps": "1237/8890", "percentage": "13.91%", "elapsed_time": "26m 20s", "remaining_time": "2h 42m 58s", "memory(GiB)": 22.94, "train_speed(iter/s)": 0.782604} {"loss": 0.78267789, "grad_norm": 1.933339, "learning_rate": 9.784e-05, "token_acc": 0.76190476, "epoch": 1.39257593, "global_step/max_steps": "1238/8890", "percentage": "13.93%", "elapsed_time": "26m 21s", "remaining_time": "2h 42m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782657} {"loss": 0.80092901, "grad_norm": 1.8266716, "learning_rate": 9.783e-05, "token_acc": 0.77376033, "epoch": 1.39370079, "global_step/max_steps": "1239/8890", "percentage": "13.94%", "elapsed_time": "26m 22s", "remaining_time": "2h 42m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782761} {"loss": 0.77502632, "grad_norm": 1.8634634, "learning_rate": 9.783e-05, "token_acc": 0.76416185, "epoch": 1.39482565, "global_step/max_steps": "1240/8890", "percentage": "13.95%", "elapsed_time": "26m 23s", "remaining_time": "2h 42m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782869} {"loss": 0.87407243, "grad_norm": 1.93227708, "learning_rate": 9.782e-05, "token_acc": 0.76781857, "epoch": 1.39595051, "global_step/max_steps": "1241/8890", "percentage": "13.96%", "elapsed_time": "26m 24s", "remaining_time": "2h 42m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782999} {"loss": 0.74091268, "grad_norm": 1.74767256, "learning_rate": 9.782e-05, "token_acc": 0.77113867, "epoch": 1.39707537, "global_step/max_steps": "1242/8890", "percentage": "13.97%", "elapsed_time": "26m 26s", "remaining_time": "2h 42m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783063} {"loss": 0.89291656, "grad_norm": 1.77485049, "learning_rate": 9.781e-05, "token_acc": 0.745164, "epoch": 1.39820022, "global_step/max_steps": "1243/8890", "percentage": "13.98%", "elapsed_time": "26m 27s", "remaining_time": "2h 42m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783109} {"loss": 0.78346896, "grad_norm": 2.14975762, "learning_rate": 9.781e-05, "token_acc": 0.7728, "epoch": 1.39932508, "global_step/max_steps": "1244/8890", "percentage": "13.99%", "elapsed_time": "26m 28s", "remaining_time": "2h 42m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783224} {"loss": 0.8027246, "grad_norm": 1.7344166, "learning_rate": 9.78e-05, "token_acc": 0.76070252, "epoch": 1.40044994, "global_step/max_steps": "1245/8890", "percentage": "14.00%", "elapsed_time": "26m 29s", "remaining_time": "2h 42m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783293} {"loss": 0.60590506, "grad_norm": 1.3980608, "learning_rate": 9.78e-05, "token_acc": 0.82451499, "epoch": 1.4015748, "global_step/max_steps": "1246/8890", "percentage": "14.02%", "elapsed_time": "26m 30s", "remaining_time": "2h 42m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783373} {"loss": 0.64461112, "grad_norm": 1.73054504, "learning_rate": 9.779e-05, "token_acc": 0.79689367, "epoch": 1.40269966, "global_step/max_steps": "1247/8890", "percentage": "14.03%", "elapsed_time": "26m 31s", "remaining_time": "2h 42m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783523} {"loss": 0.75431597, "grad_norm": 1.72271764, "learning_rate": 9.779e-05, "token_acc": 0.79298246, "epoch": 1.40382452, "global_step/max_steps": "1248/8890", "percentage": "14.04%", "elapsed_time": "26m 32s", "remaining_time": "2h 42m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783559} {"loss": 0.83806551, "grad_norm": 1.53850079, "learning_rate": 9.778e-05, "token_acc": 0.76525822, "epoch": 1.40494938, "global_step/max_steps": "1249/8890", "percentage": "14.05%", "elapsed_time": "26m 34s", "remaining_time": "2h 42m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783539} {"loss": 0.78135663, "grad_norm": 1.73478937, "learning_rate": 9.777e-05, "token_acc": 0.76591154, "epoch": 1.40607424, "global_step/max_steps": "1250/8890", "percentage": "14.06%", "elapsed_time": "26m 35s", "remaining_time": "2h 42m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783598} {"loss": 0.59303135, "grad_norm": 1.73627305, "learning_rate": 9.777e-05, "token_acc": 0.81564987, "epoch": 1.4071991, "global_step/max_steps": "1251/8890", "percentage": "14.07%", "elapsed_time": "26m 36s", "remaining_time": "2h 42m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783705} {"loss": 0.69859213, "grad_norm": 1.76132774, "learning_rate": 9.776e-05, "token_acc": 0.80248447, "epoch": 1.40832396, "global_step/max_steps": "1252/8890", "percentage": "14.08%", "elapsed_time": "26m 37s", "remaining_time": "2h 42m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783869} {"loss": 0.67965162, "grad_norm": 1.91997814, "learning_rate": 9.776e-05, "token_acc": 0.79300292, "epoch": 1.40944882, "global_step/max_steps": "1253/8890", "percentage": "14.09%", "elapsed_time": "26m 38s", "remaining_time": "2h 42m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783981} {"loss": 0.6765461, "grad_norm": 2.09627557, "learning_rate": 9.775e-05, "token_acc": 0.79146141, "epoch": 1.41057368, "global_step/max_steps": "1254/8890", "percentage": "14.11%", "elapsed_time": "26m 39s", "remaining_time": "2h 42m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784096} {"loss": 1.0019275, "grad_norm": 1.88286912, "learning_rate": 9.775e-05, "token_acc": 0.72617854, "epoch": 1.41169854, "global_step/max_steps": "1255/8890", "percentage": "14.12%", "elapsed_time": "26m 40s", "remaining_time": "2h 42m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784043} {"loss": 0.78651214, "grad_norm": 1.8091749, "learning_rate": 9.774e-05, "token_acc": 0.76964478, "epoch": 1.4128234, "global_step/max_steps": "1256/8890", "percentage": "14.13%", "elapsed_time": "26m 41s", "remaining_time": "2h 42m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784153} {"loss": 0.85581875, "grad_norm": 1.77880192, "learning_rate": 9.774e-05, "token_acc": 0.74920969, "epoch": 1.41394826, "global_step/max_steps": "1257/8890", "percentage": "14.14%", "elapsed_time": "26m 42s", "remaining_time": "2h 42m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784205} {"loss": 0.88546103, "grad_norm": 1.95131361, "learning_rate": 9.773e-05, "token_acc": 0.74056029, "epoch": 1.41507312, "global_step/max_steps": "1258/8890", "percentage": "14.15%", "elapsed_time": "26m 43s", "remaining_time": "2h 42m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784324} {"loss": 0.61924446, "grad_norm": 1.84092391, "learning_rate": 9.773e-05, "token_acc": 0.80801105, "epoch": 1.41619798, "global_step/max_steps": "1259/8890", "percentage": "14.16%", "elapsed_time": "26m 45s", "remaining_time": "2h 42m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78439} {"loss": 0.70621067, "grad_norm": 1.98579025, "learning_rate": 9.772e-05, "token_acc": 0.78540773, "epoch": 1.41732283, "global_step/max_steps": "1260/8890", "percentage": "14.17%", "elapsed_time": "26m 46s", "remaining_time": "2h 42m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7845} {"loss": 0.67540812, "grad_norm": 1.53050101, "learning_rate": 9.771e-05, "token_acc": 0.80016584, "epoch": 1.41844769, "global_step/max_steps": "1261/8890", "percentage": "14.18%", "elapsed_time": "26m 47s", "remaining_time": "2h 42m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784484} {"loss": 0.66114777, "grad_norm": 1.91321099, "learning_rate": 9.771e-05, "token_acc": 0.79736842, "epoch": 1.41957255, "global_step/max_steps": "1262/8890", "percentage": "14.20%", "elapsed_time": "26m 48s", "remaining_time": "2h 42m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784541} {"loss": 0.62608039, "grad_norm": 1.74644244, "learning_rate": 9.77e-05, "token_acc": 0.80784314, "epoch": 1.42069741, "global_step/max_steps": "1263/8890", "percentage": "14.21%", "elapsed_time": "26m 49s", "remaining_time": "2h 42m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784621} {"loss": 0.80680364, "grad_norm": 1.95382535, "learning_rate": 9.77e-05, "token_acc": 0.75652174, "epoch": 1.42182227, "global_step/max_steps": "1264/8890", "percentage": "14.22%", "elapsed_time": "26m 50s", "remaining_time": "2h 41m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784733} {"loss": 0.76667637, "grad_norm": 1.83821952, "learning_rate": 9.769e-05, "token_acc": 0.76548673, "epoch": 1.42294713, "global_step/max_steps": "1265/8890", "percentage": "14.23%", "elapsed_time": "26m 51s", "remaining_time": "2h 41m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784842} {"loss": 0.77008098, "grad_norm": 1.83366203, "learning_rate": 9.769e-05, "token_acc": 0.77353993, "epoch": 1.42407199, "global_step/max_steps": "1266/8890", "percentage": "14.24%", "elapsed_time": "26m 52s", "remaining_time": "2h 41m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784911} {"loss": 0.82855791, "grad_norm": 1.83335102, "learning_rate": 9.768e-05, "token_acc": 0.75, "epoch": 1.42519685, "global_step/max_steps": "1267/8890", "percentage": "14.25%", "elapsed_time": "26m 54s", "remaining_time": "2h 41m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784983} {"loss": 0.87752926, "grad_norm": 1.90154696, "learning_rate": 9.767e-05, "token_acc": 0.74234424, "epoch": 1.42632171, "global_step/max_steps": "1268/8890", "percentage": "14.26%", "elapsed_time": "26m 55s", "remaining_time": "2h 41m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785089} {"loss": 0.93426377, "grad_norm": 2.2239275, "learning_rate": 9.767e-05, "token_acc": 0.76530612, "epoch": 1.42744657, "global_step/max_steps": "1269/8890", "percentage": "14.27%", "elapsed_time": "26m 56s", "remaining_time": "2h 41m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78521} {"loss": 0.81831038, "grad_norm": 1.88979411, "learning_rate": 9.766e-05, "token_acc": 0.76201117, "epoch": 1.42857143, "global_step/max_steps": "1270/8890", "percentage": "14.29%", "elapsed_time": "26m 57s", "remaining_time": "2h 41m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785332} {"loss": 0.79126668, "grad_norm": 1.73031688, "learning_rate": 9.766e-05, "token_acc": 0.75715696, "epoch": 1.42969629, "global_step/max_steps": "1271/8890", "percentage": "14.30%", "elapsed_time": "26m 58s", "remaining_time": "2h 41m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785434} {"loss": 0.68360484, "grad_norm": 1.5996418, "learning_rate": 9.765e-05, "token_acc": 0.79738562, "epoch": 1.43082115, "global_step/max_steps": "1272/8890", "percentage": "14.31%", "elapsed_time": "26m 59s", "remaining_time": "2h 41m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78548} {"loss": 0.77490985, "grad_norm": 2.20756865, "learning_rate": 9.765e-05, "token_acc": 0.7661406, "epoch": 1.43194601, "global_step/max_steps": "1273/8890", "percentage": "14.32%", "elapsed_time": "27m 0s", "remaining_time": "2h 41m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785572} {"loss": 0.88436162, "grad_norm": 1.9885323, "learning_rate": 9.764e-05, "token_acc": 0.74083439, "epoch": 1.43307087, "global_step/max_steps": "1274/8890", "percentage": "14.33%", "elapsed_time": "27m 1s", "remaining_time": "2h 41m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785706} {"loss": 0.7723552, "grad_norm": 1.60536003, "learning_rate": 9.764e-05, "token_acc": 0.79384615, "epoch": 1.43419573, "global_step/max_steps": "1275/8890", "percentage": "14.34%", "elapsed_time": "27m 2s", "remaining_time": "2h 41m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78576} {"loss": 0.57728398, "grad_norm": 1.68437827, "learning_rate": 9.763e-05, "token_acc": 0.82166446, "epoch": 1.43532058, "global_step/max_steps": "1276/8890", "percentage": "14.35%", "elapsed_time": "27m 3s", "remaining_time": "2h 41m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785836} {"loss": 0.62511981, "grad_norm": 1.55884755, "learning_rate": 9.762e-05, "token_acc": 0.79466119, "epoch": 1.43644544, "global_step/max_steps": "1277/8890", "percentage": "14.36%", "elapsed_time": "27m 4s", "remaining_time": "2h 41m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785918} {"loss": 0.73602802, "grad_norm": 1.76922417, "learning_rate": 9.762e-05, "token_acc": 0.79, "epoch": 1.4375703, "global_step/max_steps": "1278/8890", "percentage": "14.38%", "elapsed_time": "27m 5s", "remaining_time": "2h 41m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785987} {"loss": 0.72532564, "grad_norm": 1.5717485, "learning_rate": 9.761e-05, "token_acc": 0.79360165, "epoch": 1.43869516, "global_step/max_steps": "1279/8890", "percentage": "14.39%", "elapsed_time": "27m 7s", "remaining_time": "2h 41m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786043} {"loss": 0.67396224, "grad_norm": 1.83752966, "learning_rate": 9.761e-05, "token_acc": 0.78329298, "epoch": 1.43982002, "global_step/max_steps": "1280/8890", "percentage": "14.40%", "elapsed_time": "27m 8s", "remaining_time": "2h 41m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786173} {"loss": 0.65996897, "grad_norm": 1.81370389, "learning_rate": 9.76e-05, "token_acc": 0.78385417, "epoch": 1.44094488, "global_step/max_steps": "1281/8890", "percentage": "14.41%", "elapsed_time": "27m 9s", "remaining_time": "2h 41m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786349} {"loss": 0.79430389, "grad_norm": 1.84114397, "learning_rate": 9.76e-05, "token_acc": 0.75617615, "epoch": 1.44206974, "global_step/max_steps": "1282/8890", "percentage": "14.42%", "elapsed_time": "27m 10s", "remaining_time": "2h 41m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786344} {"loss": 0.800596, "grad_norm": 1.74581468, "learning_rate": 9.759e-05, "token_acc": 0.7597137, "epoch": 1.4431946, "global_step/max_steps": "1283/8890", "percentage": "14.43%", "elapsed_time": "27m 11s", "remaining_time": "2h 41m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786455} {"loss": 0.65519392, "grad_norm": 1.69950974, "learning_rate": 9.758e-05, "token_acc": 0.80952381, "epoch": 1.44431946, "global_step/max_steps": "1284/8890", "percentage": "14.44%", "elapsed_time": "27m 12s", "remaining_time": "2h 41m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786508} {"loss": 0.71594208, "grad_norm": 1.82328165, "learning_rate": 9.758e-05, "token_acc": 0.78025852, "epoch": 1.44544432, "global_step/max_steps": "1285/8890", "percentage": "14.45%", "elapsed_time": "27m 13s", "remaining_time": "2h 41m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786581} {"loss": 0.79077882, "grad_norm": 1.86406684, "learning_rate": 9.757e-05, "token_acc": 0.76045198, "epoch": 1.44656918, "global_step/max_steps": "1286/8890", "percentage": "14.47%", "elapsed_time": "27m 14s", "remaining_time": "2h 41m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786636} {"loss": 0.57439935, "grad_norm": 1.62299585, "learning_rate": 9.757e-05, "token_acc": 0.82566586, "epoch": 1.44769404, "global_step/max_steps": "1287/8890", "percentage": "14.48%", "elapsed_time": "27m 15s", "remaining_time": "2h 41m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78669} {"loss": 0.81797504, "grad_norm": 2.05115676, "learning_rate": 9.756e-05, "token_acc": 0.77232704, "epoch": 1.4488189, "global_step/max_steps": "1288/8890", "percentage": "14.49%", "elapsed_time": "27m 17s", "remaining_time": "2h 41m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78674} {"loss": 0.67085946, "grad_norm": 2.09959674, "learning_rate": 9.756e-05, "token_acc": 0.8006135, "epoch": 1.44994376, "global_step/max_steps": "1289/8890", "percentage": "14.50%", "elapsed_time": "27m 18s", "remaining_time": "2h 41m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786804} {"loss": 0.90247524, "grad_norm": 1.63390958, "learning_rate": 9.755e-05, "token_acc": 0.75972927, "epoch": 1.45106862, "global_step/max_steps": "1290/8890", "percentage": "14.51%", "elapsed_time": "27m 19s", "remaining_time": "2h 40m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786849} {"loss": 0.63352263, "grad_norm": 1.73408544, "learning_rate": 9.754e-05, "token_acc": 0.81468111, "epoch": 1.45219348, "global_step/max_steps": "1291/8890", "percentage": "14.52%", "elapsed_time": "27m 20s", "remaining_time": "2h 40m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786913} {"loss": 0.9262284, "grad_norm": 1.77538919, "learning_rate": 9.754e-05, "token_acc": 0.74087933, "epoch": 1.45331834, "global_step/max_steps": "1292/8890", "percentage": "14.53%", "elapsed_time": "27m 21s", "remaining_time": "2h 40m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786913} {"loss": 0.90483284, "grad_norm": 1.74045074, "learning_rate": 9.753e-05, "token_acc": 0.73759542, "epoch": 1.45444319, "global_step/max_steps": "1293/8890", "percentage": "14.54%", "elapsed_time": "27m 23s", "remaining_time": "2h 40m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786968} {"loss": 0.74503231, "grad_norm": 2.07046819, "learning_rate": 9.753e-05, "token_acc": 0.79829891, "epoch": 1.45556805, "global_step/max_steps": "1294/8890", "percentage": "14.56%", "elapsed_time": "27m 24s", "remaining_time": "2h 40m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787076} {"loss": 0.78918135, "grad_norm": 1.70274115, "learning_rate": 9.752e-05, "token_acc": 0.77517803, "epoch": 1.45669291, "global_step/max_steps": "1295/8890", "percentage": "14.57%", "elapsed_time": "27m 25s", "remaining_time": "2h 40m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787115} {"loss": 0.74911267, "grad_norm": 1.9491533, "learning_rate": 9.752e-05, "token_acc": 0.76645161, "epoch": 1.45781777, "global_step/max_steps": "1296/8890", "percentage": "14.58%", "elapsed_time": "27m 26s", "remaining_time": "2h 40m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787288} {"loss": 0.79566896, "grad_norm": 1.97670877, "learning_rate": 9.751e-05, "token_acc": 0.76607387, "epoch": 1.45894263, "global_step/max_steps": "1297/8890", "percentage": "14.59%", "elapsed_time": "27m 27s", "remaining_time": "2h 40m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787329} {"loss": 0.52452463, "grad_norm": 1.71915221, "learning_rate": 9.75e-05, "token_acc": 0.82478632, "epoch": 1.46006749, "global_step/max_steps": "1298/8890", "percentage": "14.60%", "elapsed_time": "27m 28s", "remaining_time": "2h 40m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787397} {"loss": 0.69395214, "grad_norm": 1.64877284, "learning_rate": 9.75e-05, "token_acc": 0.79148472, "epoch": 1.46119235, "global_step/max_steps": "1299/8890", "percentage": "14.61%", "elapsed_time": "27m 29s", "remaining_time": "2h 40m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787499} {"loss": 0.75186461, "grad_norm": 1.70746589, "learning_rate": 9.749e-05, "token_acc": 0.77268094, "epoch": 1.46231721, "global_step/max_steps": "1300/8890", "percentage": "14.62%", "elapsed_time": "27m 30s", "remaining_time": "2h 40m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787463} {"loss": 0.70140183, "grad_norm": 1.69524848, "learning_rate": 9.749e-05, "token_acc": 0.79383634, "epoch": 1.46344207, "global_step/max_steps": "1301/8890", "percentage": "14.63%", "elapsed_time": "27m 32s", "remaining_time": "2h 40m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787522} {"loss": 0.57172251, "grad_norm": 1.73898613, "learning_rate": 9.748e-05, "token_acc": 0.80914286, "epoch": 1.46456693, "global_step/max_steps": "1302/8890", "percentage": "14.65%", "elapsed_time": "27m 33s", "remaining_time": "2h 40m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787574} {"loss": 0.64742386, "grad_norm": 1.66150653, "learning_rate": 9.747e-05, "token_acc": 0.80699774, "epoch": 1.46569179, "global_step/max_steps": "1303/8890", "percentage": "14.66%", "elapsed_time": "27m 34s", "remaining_time": "2h 40m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787678} {"loss": 0.92378592, "grad_norm": 1.81300652, "learning_rate": 9.747e-05, "token_acc": 0.73491929, "epoch": 1.46681665, "global_step/max_steps": "1304/8890", "percentage": "14.67%", "elapsed_time": "27m 35s", "remaining_time": "2h 40m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787737} {"loss": 0.73098779, "grad_norm": 1.92194879, "learning_rate": 9.746e-05, "token_acc": 0.78128951, "epoch": 1.46794151, "global_step/max_steps": "1305/8890", "percentage": "14.68%", "elapsed_time": "27m 36s", "remaining_time": "2h 40m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787785} {"loss": 0.94693047, "grad_norm": 2.22459602, "learning_rate": 9.746e-05, "token_acc": 0.73961841, "epoch": 1.46906637, "global_step/max_steps": "1306/8890", "percentage": "14.69%", "elapsed_time": "27m 37s", "remaining_time": "2h 40m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787847} {"loss": 0.6086942, "grad_norm": 1.66489041, "learning_rate": 9.745e-05, "token_acc": 0.81313703, "epoch": 1.47019123, "global_step/max_steps": "1307/8890", "percentage": "14.70%", "elapsed_time": "27m 38s", "remaining_time": "2h 40m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788015} {"loss": 0.75380814, "grad_norm": 1.90860307, "learning_rate": 9.745e-05, "token_acc": 0.77305825, "epoch": 1.47131609, "global_step/max_steps": "1308/8890", "percentage": "14.71%", "elapsed_time": "27m 39s", "remaining_time": "2h 40m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788076} {"loss": 0.56345797, "grad_norm": 1.55619431, "learning_rate": 9.744e-05, "token_acc": 0.82883939, "epoch": 1.47244094, "global_step/max_steps": "1309/8890", "percentage": "14.72%", "elapsed_time": "27m 40s", "remaining_time": "2h 40m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788143} {"loss": 0.7846359, "grad_norm": 1.8888092, "learning_rate": 9.743e-05, "token_acc": 0.76046025, "epoch": 1.4735658, "global_step/max_steps": "1310/8890", "percentage": "14.74%", "elapsed_time": "27m 41s", "remaining_time": "2h 40m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788209} {"loss": 0.65150893, "grad_norm": 1.78713858, "learning_rate": 9.743e-05, "token_acc": 0.80392157, "epoch": 1.47469066, "global_step/max_steps": "1311/8890", "percentage": "14.75%", "elapsed_time": "27m 43s", "remaining_time": "2h 40m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788275} {"loss": 0.7014302, "grad_norm": 1.74832499, "learning_rate": 9.742e-05, "token_acc": 0.78423645, "epoch": 1.47581552, "global_step/max_steps": "1312/8890", "percentage": "14.76%", "elapsed_time": "27m 44s", "remaining_time": "2h 40m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788269} {"loss": 0.80170941, "grad_norm": 1.6675328, "learning_rate": 9.742e-05, "token_acc": 0.76186579, "epoch": 1.47694038, "global_step/max_steps": "1313/8890", "percentage": "14.77%", "elapsed_time": "27m 45s", "remaining_time": "2h 40m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788323} {"loss": 0.66213155, "grad_norm": 1.84850729, "learning_rate": 9.741e-05, "token_acc": 0.79696616, "epoch": 1.47806524, "global_step/max_steps": "1314/8890", "percentage": "14.78%", "elapsed_time": "27m 46s", "remaining_time": "2h 40m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788385} {"loss": 0.76753664, "grad_norm": 1.99000275, "learning_rate": 9.74e-05, "token_acc": 0.78304239, "epoch": 1.4791901, "global_step/max_steps": "1315/8890", "percentage": "14.79%", "elapsed_time": "27m 47s", "remaining_time": "2h 40m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788491} {"loss": 0.65062726, "grad_norm": 2.0351758, "learning_rate": 9.74e-05, "token_acc": 0.78603269, "epoch": 1.48031496, "global_step/max_steps": "1316/8890", "percentage": "14.80%", "elapsed_time": "27m 48s", "remaining_time": "2h 40m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788541} {"loss": 0.76216716, "grad_norm": 1.65200162, "learning_rate": 9.739e-05, "token_acc": 0.77408056, "epoch": 1.48143982, "global_step/max_steps": "1317/8890", "percentage": "14.81%", "elapsed_time": "27m 49s", "remaining_time": "2h 40m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788649} {"loss": 0.78855181, "grad_norm": 1.68721795, "learning_rate": 9.739e-05, "token_acc": 0.76470588, "epoch": 1.48256468, "global_step/max_steps": "1318/8890", "percentage": "14.83%", "elapsed_time": "27m 50s", "remaining_time": "2h 39m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788774} {"loss": 0.61868966, "grad_norm": 1.85901916, "learning_rate": 9.738e-05, "token_acc": 0.81242079, "epoch": 1.48368954, "global_step/max_steps": "1319/8890", "percentage": "14.84%", "elapsed_time": "27m 52s", "remaining_time": "2h 39m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788837} {"loss": 0.72322965, "grad_norm": 1.82788503, "learning_rate": 9.737e-05, "token_acc": 0.7642369, "epoch": 1.4848144, "global_step/max_steps": "1320/8890", "percentage": "14.85%", "elapsed_time": "27m 53s", "remaining_time": "2h 39m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788902} {"loss": 0.60585958, "grad_norm": 2.03284192, "learning_rate": 9.737e-05, "token_acc": 0.79773692, "epoch": 1.48593926, "global_step/max_steps": "1321/8890", "percentage": "14.86%", "elapsed_time": "27m 54s", "remaining_time": "2h 39m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788964} {"loss": 0.69594157, "grad_norm": 1.93519664, "learning_rate": 9.736e-05, "token_acc": 0.78716578, "epoch": 1.48706412, "global_step/max_steps": "1322/8890", "percentage": "14.87%", "elapsed_time": "27m 55s", "remaining_time": "2h 39m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789022} {"loss": 0.80562687, "grad_norm": 1.79877305, "learning_rate": 9.736e-05, "token_acc": 0.77172775, "epoch": 1.48818898, "global_step/max_steps": "1323/8890", "percentage": "14.88%", "elapsed_time": "27m 56s", "remaining_time": "2h 39m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789082} {"loss": 0.81327891, "grad_norm": 1.74009979, "learning_rate": 9.735e-05, "token_acc": 0.77419355, "epoch": 1.48931384, "global_step/max_steps": "1324/8890", "percentage": "14.89%", "elapsed_time": "27m 57s", "remaining_time": "2h 39m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789128} {"loss": 0.73535764, "grad_norm": 1.7300899, "learning_rate": 9.734e-05, "token_acc": 0.78014184, "epoch": 1.4904387, "global_step/max_steps": "1325/8890", "percentage": "14.90%", "elapsed_time": "27m 58s", "remaining_time": "2h 39m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789195} {"loss": 0.89415187, "grad_norm": 1.88912559, "learning_rate": 9.734e-05, "token_acc": 0.75253924, "epoch": 1.49156355, "global_step/max_steps": "1326/8890", "percentage": "14.92%", "elapsed_time": "28m 0s", "remaining_time": "2h 39m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789267} {"loss": 0.67130721, "grad_norm": 1.9761436, "learning_rate": 9.733e-05, "token_acc": 0.79614325, "epoch": 1.49268841, "global_step/max_steps": "1327/8890", "percentage": "14.93%", "elapsed_time": "28m 1s", "remaining_time": "2h 39m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789365} {"loss": 0.82268131, "grad_norm": 1.87338698, "learning_rate": 9.733e-05, "token_acc": 0.75662414, "epoch": 1.49381327, "global_step/max_steps": "1328/8890", "percentage": "14.94%", "elapsed_time": "28m 2s", "remaining_time": "2h 39m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789362} {"loss": 0.72742069, "grad_norm": 1.87011361, "learning_rate": 9.732e-05, "token_acc": 0.78095238, "epoch": 1.49493813, "global_step/max_steps": "1329/8890", "percentage": "14.95%", "elapsed_time": "28m 3s", "remaining_time": "2h 39m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789414} {"loss": 0.8187294, "grad_norm": 1.93196917, "learning_rate": 9.731e-05, "token_acc": 0.76084538, "epoch": 1.49606299, "global_step/max_steps": "1330/8890", "percentage": "14.96%", "elapsed_time": "28m 4s", "remaining_time": "2h 39m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789512} {"loss": 0.6779778, "grad_norm": 2.01928353, "learning_rate": 9.731e-05, "token_acc": 0.77453581, "epoch": 1.49718785, "global_step/max_steps": "1331/8890", "percentage": "14.97%", "elapsed_time": "28m 5s", "remaining_time": "2h 39m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789598} {"loss": 0.86159825, "grad_norm": 1.88372767, "learning_rate": 9.73e-05, "token_acc": 0.73371429, "epoch": 1.49831271, "global_step/max_steps": "1332/8890", "percentage": "14.98%", "elapsed_time": "28m 6s", "remaining_time": "2h 39m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789744} {"loss": 0.61293375, "grad_norm": 1.5223217, "learning_rate": 9.73e-05, "token_acc": 0.81452514, "epoch": 1.49943757, "global_step/max_steps": "1333/8890", "percentage": "14.99%", "elapsed_time": "28m 7s", "remaining_time": "2h 39m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789746} {"loss": 0.66950631, "grad_norm": 1.82988727, "learning_rate": 9.729e-05, "token_acc": 0.79816514, "epoch": 1.50056243, "global_step/max_steps": "1334/8890", "percentage": "15.01%", "elapsed_time": "28m 9s", "remaining_time": "2h 39m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789797} {"loss": 0.69666135, "grad_norm": 1.55306566, "learning_rate": 9.728e-05, "token_acc": 0.78857143, "epoch": 1.50168729, "global_step/max_steps": "1335/8890", "percentage": "15.02%", "elapsed_time": "28m 10s", "remaining_time": "2h 39m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78985} {"loss": 0.87280637, "grad_norm": 1.8138721, "learning_rate": 9.728e-05, "token_acc": 0.73917526, "epoch": 1.50281215, "global_step/max_steps": "1336/8890", "percentage": "15.03%", "elapsed_time": "28m 11s", "remaining_time": "2h 39m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789898} {"loss": 0.71230656, "grad_norm": 1.79587436, "learning_rate": 9.727e-05, "token_acc": 0.78466742, "epoch": 1.50393701, "global_step/max_steps": "1337/8890", "percentage": "15.04%", "elapsed_time": "28m 12s", "remaining_time": "2h 39m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789957} {"loss": 0.74387389, "grad_norm": 1.67687869, "learning_rate": 9.727e-05, "token_acc": 0.78088803, "epoch": 1.50506187, "global_step/max_steps": "1338/8890", "percentage": "15.05%", "elapsed_time": "28m 13s", "remaining_time": "2h 39m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79} {"loss": 0.62295628, "grad_norm": 1.75222468, "learning_rate": 9.726e-05, "token_acc": 0.80569948, "epoch": 1.50618673, "global_step/max_steps": "1339/8890", "percentage": "15.06%", "elapsed_time": "28m 14s", "remaining_time": "2h 39m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790055} {"loss": 0.71272486, "grad_norm": 1.8854903, "learning_rate": 9.725e-05, "token_acc": 0.79343365, "epoch": 1.50731159, "global_step/max_steps": "1340/8890", "percentage": "15.07%", "elapsed_time": "28m 15s", "remaining_time": "2h 39m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79015} {"loss": 0.82403129, "grad_norm": 1.83207262, "learning_rate": 9.725e-05, "token_acc": 0.77153921, "epoch": 1.50843645, "global_step/max_steps": "1341/8890", "percentage": "15.08%", "elapsed_time": "28m 17s", "remaining_time": "2h 39m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790218} {"loss": 0.62266576, "grad_norm": 1.51189685, "learning_rate": 9.724e-05, "token_acc": 0.80289532, "epoch": 1.5095613, "global_step/max_steps": "1342/8890", "percentage": "15.10%", "elapsed_time": "28m 17s", "remaining_time": "2h 39m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790383} {"loss": 0.83457255, "grad_norm": 1.98331559, "learning_rate": 9.724e-05, "token_acc": 0.76849642, "epoch": 1.51068616, "global_step/max_steps": "1343/8890", "percentage": "15.11%", "elapsed_time": "28m 19s", "remaining_time": "2h 39m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790433} {"loss": 0.76867336, "grad_norm": 1.85009956, "learning_rate": 9.723e-05, "token_acc": 0.76238625, "epoch": 1.51181102, "global_step/max_steps": "1344/8890", "percentage": "15.12%", "elapsed_time": "28m 20s", "remaining_time": "2h 39m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790485} {"loss": 0.78742325, "grad_norm": 1.80848312, "learning_rate": 9.722e-05, "token_acc": 0.76600698, "epoch": 1.51293588, "global_step/max_steps": "1345/8890", "percentage": "15.13%", "elapsed_time": "28m 21s", "remaining_time": "2h 39m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790583} {"loss": 0.8956812, "grad_norm": 1.88575935, "learning_rate": 9.722e-05, "token_acc": 0.75049702, "epoch": 1.51406074, "global_step/max_steps": "1346/8890", "percentage": "15.14%", "elapsed_time": "28m 22s", "remaining_time": "2h 39m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790691} {"loss": 0.79971242, "grad_norm": 1.59797823, "learning_rate": 9.721e-05, "token_acc": 0.75, "epoch": 1.5151856, "global_step/max_steps": "1347/8890", "percentage": "15.15%", "elapsed_time": "28m 23s", "remaining_time": "2h 38m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790684} {"loss": 0.56312442, "grad_norm": 1.64100242, "learning_rate": 9.721e-05, "token_acc": 0.81217617, "epoch": 1.51631046, "global_step/max_steps": "1348/8890", "percentage": "15.16%", "elapsed_time": "28m 24s", "remaining_time": "2h 38m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790785} {"loss": 0.78972745, "grad_norm": 1.85959601, "learning_rate": 9.72e-05, "token_acc": 0.77324841, "epoch": 1.51743532, "global_step/max_steps": "1349/8890", "percentage": "15.17%", "elapsed_time": "28m 25s", "remaining_time": "2h 38m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790847} {"loss": 0.65171891, "grad_norm": 1.70386207, "learning_rate": 9.719e-05, "token_acc": 0.78935447, "epoch": 1.51856018, "global_step/max_steps": "1350/8890", "percentage": "15.19%", "elapsed_time": "28m 26s", "remaining_time": "2h 38m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790909} {"loss": 0.78114033, "grad_norm": 1.88788891, "learning_rate": 9.719e-05, "token_acc": 0.76732673, "epoch": 1.51968504, "global_step/max_steps": "1351/8890", "percentage": "15.20%", "elapsed_time": "28m 27s", "remaining_time": "2h 38m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790996} {"loss": 0.69568312, "grad_norm": 1.67045844, "learning_rate": 9.718e-05, "token_acc": 0.80182927, "epoch": 1.5208099, "global_step/max_steps": "1352/8890", "percentage": "15.21%", "elapsed_time": "28m 29s", "remaining_time": "2h 38m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79108} {"loss": 0.70348001, "grad_norm": 1.80370986, "learning_rate": 9.717e-05, "token_acc": 0.77135981, "epoch": 1.52193476, "global_step/max_steps": "1353/8890", "percentage": "15.22%", "elapsed_time": "28m 30s", "remaining_time": "2h 38m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791126} {"loss": 0.69877589, "grad_norm": 1.97044587, "learning_rate": 9.717e-05, "token_acc": 0.78992806, "epoch": 1.52305962, "global_step/max_steps": "1354/8890", "percentage": "15.23%", "elapsed_time": "28m 31s", "remaining_time": "2h 38m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791193} {"loss": 0.73850203, "grad_norm": 1.91949904, "learning_rate": 9.716e-05, "token_acc": 0.78507079, "epoch": 1.52418448, "global_step/max_steps": "1355/8890", "percentage": "15.24%", "elapsed_time": "28m 32s", "remaining_time": "2h 38m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791281} {"loss": 0.75447392, "grad_norm": 2.08081555, "learning_rate": 9.716e-05, "token_acc": 0.76866764, "epoch": 1.52530934, "global_step/max_steps": "1356/8890", "percentage": "15.25%", "elapsed_time": "28m 33s", "remaining_time": "2h 38m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791387} {"loss": 0.70002818, "grad_norm": 1.78758645, "learning_rate": 9.715e-05, "token_acc": 0.77625, "epoch": 1.5264342, "global_step/max_steps": "1357/8890", "percentage": "15.26%", "elapsed_time": "28m 34s", "remaining_time": "2h 38m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791437} {"loss": 0.5367105, "grad_norm": 1.41547787, "learning_rate": 9.714e-05, "token_acc": 0.84298332, "epoch": 1.52755906, "global_step/max_steps": "1358/8890", "percentage": "15.28%", "elapsed_time": "28m 35s", "remaining_time": "2h 38m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791544} {"loss": 0.77132583, "grad_norm": 1.6498692, "learning_rate": 9.714e-05, "token_acc": 0.786268, "epoch": 1.52868391, "global_step/max_steps": "1359/8890", "percentage": "15.29%", "elapsed_time": "28m 36s", "remaining_time": "2h 38m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791597} {"loss": 0.69983077, "grad_norm": 1.83430684, "learning_rate": 9.713e-05, "token_acc": 0.80024814, "epoch": 1.52980877, "global_step/max_steps": "1360/8890", "percentage": "15.30%", "elapsed_time": "28m 37s", "remaining_time": "2h 38m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791663} {"loss": 0.7695514, "grad_norm": 1.84910131, "learning_rate": 9.713e-05, "token_acc": 0.77245509, "epoch": 1.53093363, "global_step/max_steps": "1361/8890", "percentage": "15.31%", "elapsed_time": "28m 38s", "remaining_time": "2h 38m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791753} {"loss": 0.65876758, "grad_norm": 1.61415052, "learning_rate": 9.712e-05, "token_acc": 0.79633028, "epoch": 1.53205849, "global_step/max_steps": "1362/8890", "percentage": "15.32%", "elapsed_time": "28m 40s", "remaining_time": "2h 38m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791747} {"loss": 0.82390416, "grad_norm": 1.67150331, "learning_rate": 9.711e-05, "token_acc": 0.76424189, "epoch": 1.53318335, "global_step/max_steps": "1363/8890", "percentage": "15.33%", "elapsed_time": "28m 41s", "remaining_time": "2h 38m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791801} {"loss": 0.60182536, "grad_norm": 1.81213689, "learning_rate": 9.711e-05, "token_acc": 0.82543978, "epoch": 1.53430821, "global_step/max_steps": "1364/8890", "percentage": "15.34%", "elapsed_time": "28m 42s", "remaining_time": "2h 38m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791858} {"loss": 0.79307723, "grad_norm": 2.00225663, "learning_rate": 9.71e-05, "token_acc": 0.75384615, "epoch": 1.53543307, "global_step/max_steps": "1365/8890", "percentage": "15.35%", "elapsed_time": "28m 43s", "remaining_time": "2h 38m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791975} {"loss": 0.71048093, "grad_norm": 1.62178981, "learning_rate": 9.709e-05, "token_acc": 0.77663551, "epoch": 1.53655793, "global_step/max_steps": "1366/8890", "percentage": "15.37%", "elapsed_time": "28m 44s", "remaining_time": "2h 38m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792021} {"loss": 0.61526227, "grad_norm": 1.90766931, "learning_rate": 9.709e-05, "token_acc": 0.82995951, "epoch": 1.53768279, "global_step/max_steps": "1367/8890", "percentage": "15.38%", "elapsed_time": "28m 45s", "remaining_time": "2h 38m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792134} {"loss": 0.85801929, "grad_norm": 1.70179951, "learning_rate": 9.708e-05, "token_acc": 0.75053996, "epoch": 1.53880765, "global_step/max_steps": "1368/8890", "percentage": "15.39%", "elapsed_time": "28m 46s", "remaining_time": "2h 38m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792197} {"loss": 0.83751822, "grad_norm": 1.78784013, "learning_rate": 9.708e-05, "token_acc": 0.75273088, "epoch": 1.53993251, "global_step/max_steps": "1369/8890", "percentage": "15.40%", "elapsed_time": "28m 47s", "remaining_time": "2h 38m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792258} {"loss": 0.86023021, "grad_norm": 1.70709896, "learning_rate": 9.707e-05, "token_acc": 0.74409821, "epoch": 1.54105737, "global_step/max_steps": "1370/8890", "percentage": "15.41%", "elapsed_time": "28m 49s", "remaining_time": "2h 38m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792173} {"loss": 0.91631764, "grad_norm": 1.78732443, "learning_rate": 9.706e-05, "token_acc": 0.73230088, "epoch": 1.54218223, "global_step/max_steps": "1371/8890", "percentage": "15.42%", "elapsed_time": "28m 50s", "remaining_time": "2h 38m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792167} {"loss": 0.89078462, "grad_norm": 1.86403179, "learning_rate": 9.706e-05, "token_acc": 0.75317797, "epoch": 1.54330709, "global_step/max_steps": "1372/8890", "percentage": "15.43%", "elapsed_time": "28m 51s", "remaining_time": "2h 38m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792217} {"loss": 0.81279129, "grad_norm": 1.85134172, "learning_rate": 9.705e-05, "token_acc": 0.74092616, "epoch": 1.54443195, "global_step/max_steps": "1373/8890", "percentage": "15.44%", "elapsed_time": "28m 53s", "remaining_time": "2h 38m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792185} {"loss": 0.87944806, "grad_norm": 1.94924784, "learning_rate": 9.704e-05, "token_acc": 0.73642534, "epoch": 1.54555681, "global_step/max_steps": "1374/8890", "percentage": "15.46%", "elapsed_time": "28m 54s", "remaining_time": "2h 38m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79228} {"loss": 0.76256579, "grad_norm": 1.74091458, "learning_rate": 9.704e-05, "token_acc": 0.77705628, "epoch": 1.54668166, "global_step/max_steps": "1375/8890", "percentage": "15.47%", "elapsed_time": "28m 55s", "remaining_time": "2h 38m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792343} {"loss": 0.62266254, "grad_norm": 1.5248419, "learning_rate": 9.703e-05, "token_acc": 0.80089989, "epoch": 1.54780652, "global_step/max_steps": "1376/8890", "percentage": "15.48%", "elapsed_time": "28m 56s", "remaining_time": "2h 38m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792396} {"loss": 0.66352105, "grad_norm": 1.91841674, "learning_rate": 9.702e-05, "token_acc": 0.7896679, "epoch": 1.54893138, "global_step/max_steps": "1377/8890", "percentage": "15.49%", "elapsed_time": "28m 57s", "remaining_time": "2h 38m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792442} {"loss": 0.69075263, "grad_norm": 1.50831211, "learning_rate": 9.702e-05, "token_acc": 0.78952381, "epoch": 1.55005624, "global_step/max_steps": "1378/8890", "percentage": "15.50%", "elapsed_time": "28m 58s", "remaining_time": "2h 37m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792432} {"loss": 0.82618046, "grad_norm": 1.93913054, "learning_rate": 9.701e-05, "token_acc": 0.7537961, "epoch": 1.5511811, "global_step/max_steps": "1379/8890", "percentage": "15.51%", "elapsed_time": "29m 0s", "remaining_time": "2h 37m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792524} {"loss": 0.84718621, "grad_norm": 1.81366265, "learning_rate": 9.701e-05, "token_acc": 0.75767544, "epoch": 1.55230596, "global_step/max_steps": "1380/8890", "percentage": "15.52%", "elapsed_time": "29m 1s", "remaining_time": "2h 37m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792488} {"loss": 0.80431354, "grad_norm": 1.8484894, "learning_rate": 9.7e-05, "token_acc": 0.76387249, "epoch": 1.55343082, "global_step/max_steps": "1381/8890", "percentage": "15.53%", "elapsed_time": "29m 2s", "remaining_time": "2h 37m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79254} {"loss": 0.69662946, "grad_norm": 2.15910316, "learning_rate": 9.699e-05, "token_acc": 0.80327869, "epoch": 1.55455568, "global_step/max_steps": "1382/8890", "percentage": "15.55%", "elapsed_time": "29m 3s", "remaining_time": "2h 37m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792588} {"loss": 0.53688812, "grad_norm": 1.59419298, "learning_rate": 9.699e-05, "token_acc": 0.83312732, "epoch": 1.55568054, "global_step/max_steps": "1383/8890", "percentage": "15.56%", "elapsed_time": "29m 4s", "remaining_time": "2h 37m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792644} {"loss": 0.65902078, "grad_norm": 1.76103163, "learning_rate": 9.698e-05, "token_acc": 0.78275476, "epoch": 1.5568054, "global_step/max_steps": "1384/8890", "percentage": "15.57%", "elapsed_time": "29m 5s", "remaining_time": "2h 37m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792698} {"loss": 0.77655935, "grad_norm": 1.79675949, "learning_rate": 9.697e-05, "token_acc": 0.77383863, "epoch": 1.55793026, "global_step/max_steps": "1385/8890", "percentage": "15.58%", "elapsed_time": "29m 7s", "remaining_time": "2h 37m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79278} {"loss": 0.72052032, "grad_norm": 1.51532543, "learning_rate": 9.697e-05, "token_acc": 0.79504132, "epoch": 1.55905512, "global_step/max_steps": "1386/8890", "percentage": "15.59%", "elapsed_time": "29m 8s", "remaining_time": "2h 37m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792852} {"loss": 0.75787747, "grad_norm": 1.72366655, "learning_rate": 9.696e-05, "token_acc": 0.77308448, "epoch": 1.56017998, "global_step/max_steps": "1387/8890", "percentage": "15.60%", "elapsed_time": "29m 9s", "remaining_time": "2h 37m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792811} {"loss": 0.92855549, "grad_norm": 1.76081288, "learning_rate": 9.695e-05, "token_acc": 0.72410714, "epoch": 1.56130484, "global_step/max_steps": "1388/8890", "percentage": "15.61%", "elapsed_time": "29m 10s", "remaining_time": "2h 37m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792866} {"loss": 0.80369794, "grad_norm": 1.82286775, "learning_rate": 9.695e-05, "token_acc": 0.77035491, "epoch": 1.5624297, "global_step/max_steps": "1389/8890", "percentage": "15.62%", "elapsed_time": "29m 11s", "remaining_time": "2h 37m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792921} {"loss": 0.85277557, "grad_norm": 2.02931905, "learning_rate": 9.694e-05, "token_acc": 0.73246753, "epoch": 1.56355456, "global_step/max_steps": "1390/8890", "percentage": "15.64%", "elapsed_time": "29m 12s", "remaining_time": "2h 37m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79298} {"loss": 0.76439953, "grad_norm": 1.48384249, "learning_rate": 9.694e-05, "token_acc": 0.78410042, "epoch": 1.56467942, "global_step/max_steps": "1391/8890", "percentage": "15.65%", "elapsed_time": "29m 14s", "remaining_time": "2h 37m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793026} {"loss": 0.79752135, "grad_norm": 1.80521357, "learning_rate": 9.693e-05, "token_acc": 0.75256674, "epoch": 1.56580427, "global_step/max_steps": "1392/8890", "percentage": "15.66%", "elapsed_time": "29m 15s", "remaining_time": "2h 37m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793127} {"loss": 0.70171142, "grad_norm": 1.97265267, "learning_rate": 9.692e-05, "token_acc": 0.7962963, "epoch": 1.56692913, "global_step/max_steps": "1393/8890", "percentage": "15.67%", "elapsed_time": "29m 16s", "remaining_time": "2h 37m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793172} {"loss": 0.68863595, "grad_norm": 1.67639053, "learning_rate": 9.692e-05, "token_acc": 0.789801, "epoch": 1.56805399, "global_step/max_steps": "1394/8890", "percentage": "15.68%", "elapsed_time": "29m 17s", "remaining_time": "2h 37m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793262} {"loss": 0.6115973, "grad_norm": 1.75220418, "learning_rate": 9.691e-05, "token_acc": 0.80274657, "epoch": 1.56917885, "global_step/max_steps": "1395/8890", "percentage": "15.69%", "elapsed_time": "29m 18s", "remaining_time": "2h 37m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793307} {"loss": 0.7994945, "grad_norm": 1.68574226, "learning_rate": 9.69e-05, "token_acc": 0.75568182, "epoch": 1.57030371, "global_step/max_steps": "1396/8890", "percentage": "15.70%", "elapsed_time": "29m 19s", "remaining_time": "2h 37m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793357} {"loss": 0.70559007, "grad_norm": 1.75180578, "learning_rate": 9.69e-05, "token_acc": 0.78194044, "epoch": 1.57142857, "global_step/max_steps": "1397/8890", "percentage": "15.71%", "elapsed_time": "29m 20s", "remaining_time": "2h 37m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793482} {"loss": 0.86066377, "grad_norm": 1.72649169, "learning_rate": 9.689e-05, "token_acc": 0.75244618, "epoch": 1.57255343, "global_step/max_steps": "1398/8890", "percentage": "15.73%", "elapsed_time": "29m 21s", "remaining_time": "2h 37m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793601} {"loss": 0.72075003, "grad_norm": 1.78066289, "learning_rate": 9.688e-05, "token_acc": 0.7688378, "epoch": 1.57367829, "global_step/max_steps": "1399/8890", "percentage": "15.74%", "elapsed_time": "29m 22s", "remaining_time": "2h 37m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793647} {"loss": 0.7492187, "grad_norm": 1.8418355, "learning_rate": 9.688e-05, "token_acc": 0.75857843, "epoch": 1.57480315, "global_step/max_steps": "1400/8890", "percentage": "15.75%", "elapsed_time": "29m 23s", "remaining_time": "2h 37m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793752} {"loss": 0.57968771, "grad_norm": 1.58130097, "learning_rate": 9.687e-05, "token_acc": 0.81188119, "epoch": 1.57592801, "global_step/max_steps": "1401/8890", "percentage": "15.76%", "elapsed_time": "29m 24s", "remaining_time": "2h 37m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793871} {"loss": 0.84127903, "grad_norm": 1.78910935, "learning_rate": 9.686e-05, "token_acc": 0.74493063, "epoch": 1.57705287, "global_step/max_steps": "1402/8890", "percentage": "15.77%", "elapsed_time": "29m 25s", "remaining_time": "2h 37m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793928} {"loss": 0.80597556, "grad_norm": 2.27688742, "learning_rate": 9.686e-05, "token_acc": 0.74516129, "epoch": 1.57817773, "global_step/max_steps": "1403/8890", "percentage": "15.78%", "elapsed_time": "29m 26s", "remaining_time": "2h 37m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794032} {"loss": 0.73039806, "grad_norm": 1.69868493, "learning_rate": 9.685e-05, "token_acc": 0.78635347, "epoch": 1.57930259, "global_step/max_steps": "1404/8890", "percentage": "15.79%", "elapsed_time": "29m 28s", "remaining_time": "2h 37m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794069} {"loss": 0.74503905, "grad_norm": 1.79599726, "learning_rate": 9.685e-05, "token_acc": 0.76636569, "epoch": 1.58042745, "global_step/max_steps": "1405/8890", "percentage": "15.80%", "elapsed_time": "29m 29s", "remaining_time": "2h 37m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794204} {"loss": 0.8170746, "grad_norm": 1.6581341, "learning_rate": 9.684e-05, "token_acc": 0.75909091, "epoch": 1.58155231, "global_step/max_steps": "1406/8890", "percentage": "15.82%", "elapsed_time": "29m 30s", "remaining_time": "2h 37m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794256} {"loss": 0.80537885, "grad_norm": 1.72463894, "learning_rate": 9.683e-05, "token_acc": 0.748, "epoch": 1.58267717, "global_step/max_steps": "1407/8890", "percentage": "15.83%", "elapsed_time": "29m 31s", "remaining_time": "2h 37m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794255} {"loss": 0.5944618, "grad_norm": 1.54323971, "learning_rate": 9.683e-05, "token_acc": 0.81690141, "epoch": 1.58380202, "global_step/max_steps": "1408/8890", "percentage": "15.84%", "elapsed_time": "29m 32s", "remaining_time": "2h 37m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794244} {"loss": 0.7268194, "grad_norm": 1.76299787, "learning_rate": 9.682e-05, "token_acc": 0.79009901, "epoch": 1.58492688, "global_step/max_steps": "1409/8890", "percentage": "15.85%", "elapsed_time": "29m 33s", "remaining_time": "2h 36m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794331} {"loss": 0.76083708, "grad_norm": 1.76704109, "learning_rate": 9.681e-05, "token_acc": 0.77679558, "epoch": 1.58605174, "global_step/max_steps": "1410/8890", "percentage": "15.86%", "elapsed_time": "29m 34s", "remaining_time": "2h 36m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794467} {"loss": 0.70839596, "grad_norm": 1.65306652, "learning_rate": 9.681e-05, "token_acc": 0.78697001, "epoch": 1.5871766, "global_step/max_steps": "1411/8890", "percentage": "15.87%", "elapsed_time": "29m 35s", "remaining_time": "2h 36m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794566} {"loss": 0.77349412, "grad_norm": 1.91132224, "learning_rate": 9.68e-05, "token_acc": 0.77407848, "epoch": 1.58830146, "global_step/max_steps": "1412/8890", "percentage": "15.88%", "elapsed_time": "29m 37s", "remaining_time": "2h 36m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794543} {"loss": 0.73417431, "grad_norm": 1.98335874, "learning_rate": 9.679e-05, "token_acc": 0.78846154, "epoch": 1.58942632, "global_step/max_steps": "1413/8890", "percentage": "15.89%", "elapsed_time": "29m 38s", "remaining_time": "2h 36m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794594} {"loss": 0.8459332, "grad_norm": 1.83988142, "learning_rate": 9.679e-05, "token_acc": 0.75473579, "epoch": 1.59055118, "global_step/max_steps": "1414/8890", "percentage": "15.91%", "elapsed_time": "29m 39s", "remaining_time": "2h 36m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794634} {"loss": 0.80310392, "grad_norm": 2.08985639, "learning_rate": 9.678e-05, "token_acc": 0.76410835, "epoch": 1.59167604, "global_step/max_steps": "1415/8890", "percentage": "15.92%", "elapsed_time": "29m 40s", "remaining_time": "2h 36m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794675} {"loss": 0.76340532, "grad_norm": 1.9246794, "learning_rate": 9.677e-05, "token_acc": 0.77560976, "epoch": 1.5928009, "global_step/max_steps": "1416/8890", "percentage": "15.93%", "elapsed_time": "29m 41s", "remaining_time": "2h 36m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794769} {"loss": 0.72996819, "grad_norm": 1.8288132, "learning_rate": 9.677e-05, "token_acc": 0.77053571, "epoch": 1.59392576, "global_step/max_steps": "1417/8890", "percentage": "15.94%", "elapsed_time": "29m 42s", "remaining_time": "2h 36m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794812} {"loss": 0.76128626, "grad_norm": 1.9538449, "learning_rate": 9.676e-05, "token_acc": 0.78129496, "epoch": 1.59505062, "global_step/max_steps": "1418/8890", "percentage": "15.95%", "elapsed_time": "29m 43s", "remaining_time": "2h 36m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794866} {"loss": 0.720276, "grad_norm": 1.8645165, "learning_rate": 9.675e-05, "token_acc": 0.77364865, "epoch": 1.59617548, "global_step/max_steps": "1419/8890", "percentage": "15.96%", "elapsed_time": "29m 45s", "remaining_time": "2h 36m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794918} {"loss": 0.85074556, "grad_norm": 1.73180616, "learning_rate": 9.675e-05, "token_acc": 0.75722543, "epoch": 1.59730034, "global_step/max_steps": "1420/8890", "percentage": "15.97%", "elapsed_time": "29m 46s", "remaining_time": "2h 36m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794962} {"loss": 0.64204609, "grad_norm": 1.7112807, "learning_rate": 9.674e-05, "token_acc": 0.79268293, "epoch": 1.5984252, "global_step/max_steps": "1421/8890", "percentage": "15.98%", "elapsed_time": "29m 47s", "remaining_time": "2h 36m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795005} {"loss": 0.6489011, "grad_norm": 1.86062217, "learning_rate": 9.673e-05, "token_acc": 0.79581152, "epoch": 1.59955006, "global_step/max_steps": "1422/8890", "percentage": "16.00%", "elapsed_time": "29m 48s", "remaining_time": "2h 36m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795066} {"loss": 0.77876949, "grad_norm": 1.89653671, "learning_rate": 9.673e-05, "token_acc": 0.78015075, "epoch": 1.60067492, "global_step/max_steps": "1423/8890", "percentage": "16.01%", "elapsed_time": "29m 49s", "remaining_time": "2h 36m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795119} {"loss": 0.71284294, "grad_norm": 1.77368999, "learning_rate": 9.672e-05, "token_acc": 0.76931691, "epoch": 1.60179978, "global_step/max_steps": "1424/8890", "percentage": "16.02%", "elapsed_time": "29m 50s", "remaining_time": "2h 36m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795198} {"loss": 0.80732322, "grad_norm": 1.71637619, "learning_rate": 9.671e-05, "token_acc": 0.76699029, "epoch": 1.60292463, "global_step/max_steps": "1425/8890", "percentage": "16.03%", "elapsed_time": "29m 51s", "remaining_time": "2h 36m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795252} {"loss": 0.80352235, "grad_norm": 1.94414151, "learning_rate": 9.671e-05, "token_acc": 0.76510989, "epoch": 1.60404949, "global_step/max_steps": "1426/8890", "percentage": "16.04%", "elapsed_time": "29m 52s", "remaining_time": "2h 36m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795345} {"loss": 0.74660134, "grad_norm": 2.25653315, "learning_rate": 9.67e-05, "token_acc": 0.76298701, "epoch": 1.60517435, "global_step/max_steps": "1427/8890", "percentage": "16.05%", "elapsed_time": "29m 53s", "remaining_time": "2h 36m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795442} {"loss": 0.85330904, "grad_norm": 1.98516726, "learning_rate": 9.669e-05, "token_acc": 0.76959064, "epoch": 1.60629921, "global_step/max_steps": "1428/8890", "percentage": "16.06%", "elapsed_time": "29m 55s", "remaining_time": "2h 36m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79548} {"loss": 0.90809351, "grad_norm": 2.21553254, "learning_rate": 9.669e-05, "token_acc": 0.73254282, "epoch": 1.60742407, "global_step/max_steps": "1429/8890", "percentage": "16.07%", "elapsed_time": "29m 56s", "remaining_time": "2h 36m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795562} {"loss": 0.79839963, "grad_norm": 1.71814156, "learning_rate": 9.668e-05, "token_acc": 0.75478927, "epoch": 1.60854893, "global_step/max_steps": "1430/8890", "percentage": "16.09%", "elapsed_time": "29m 57s", "remaining_time": "2h 36m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795672} {"loss": 0.81751269, "grad_norm": 1.90691793, "learning_rate": 9.667e-05, "token_acc": 0.76245655, "epoch": 1.60967379, "global_step/max_steps": "1431/8890", "percentage": "16.10%", "elapsed_time": "29m 58s", "remaining_time": "2h 36m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79572} {"loss": 0.83249354, "grad_norm": 1.51200747, "learning_rate": 9.667e-05, "token_acc": 0.74614198, "epoch": 1.61079865, "global_step/max_steps": "1432/8890", "percentage": "16.11%", "elapsed_time": "29m 59s", "remaining_time": "2h 36m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795768} {"loss": 0.75662899, "grad_norm": 1.96031535, "learning_rate": 9.666e-05, "token_acc": 0.77777778, "epoch": 1.61192351, "global_step/max_steps": "1433/8890", "percentage": "16.12%", "elapsed_time": "30m 0s", "remaining_time": "2h 36m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795843} {"loss": 0.79547304, "grad_norm": 1.86402583, "learning_rate": 9.665e-05, "token_acc": 0.76402116, "epoch": 1.61304837, "global_step/max_steps": "1434/8890", "percentage": "16.13%", "elapsed_time": "30m 1s", "remaining_time": "2h 36m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795896} {"loss": 0.76053602, "grad_norm": 1.65308547, "learning_rate": 9.665e-05, "token_acc": 0.77521614, "epoch": 1.61417323, "global_step/max_steps": "1435/8890", "percentage": "16.14%", "elapsed_time": "30m 2s", "remaining_time": "2h 36m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795949} {"loss": 0.92010653, "grad_norm": 1.85608411, "learning_rate": 9.664e-05, "token_acc": 0.72869023, "epoch": 1.61529809, "global_step/max_steps": "1436/8890", "percentage": "16.15%", "elapsed_time": "30m 3s", "remaining_time": "2h 36m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.796043} {"loss": 0.73534012, "grad_norm": 1.80985403, "learning_rate": 9.663e-05, "token_acc": 0.77976953, "epoch": 1.61642295, "global_step/max_steps": "1437/8890", "percentage": "16.16%", "elapsed_time": "30m 5s", "remaining_time": "2h 36m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.796078} {"loss": 0.81635666, "grad_norm": 1.62086153, "learning_rate": 9.663e-05, "token_acc": 0.76524113, "epoch": 1.61754781, "global_step/max_steps": "1438/8890", "percentage": "16.18%", "elapsed_time": "30m 6s", "remaining_time": "2h 36m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.796055} {"loss": 0.80367106, "grad_norm": 1.95256174, "learning_rate": 9.662e-05, "token_acc": 0.7505423, "epoch": 1.61867267, "global_step/max_steps": "1439/8890", "percentage": "16.19%", "elapsed_time": "30m 7s", "remaining_time": "2h 35m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.796106} {"loss": 0.78976882, "grad_norm": 2.040627, "learning_rate": 9.661e-05, "token_acc": 0.76212833, "epoch": 1.61979753, "global_step/max_steps": "1440/8890", "percentage": "16.20%", "elapsed_time": "30m 8s", "remaining_time": "2h 35m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.796157} {"loss": 0.89517713, "grad_norm": 1.73228431, "learning_rate": 9.661e-05, "token_acc": 0.75461255, "epoch": 1.62092238, "global_step/max_steps": "1441/8890", "percentage": "16.21%", "elapsed_time": "30m 9s", "remaining_time": "2h 35m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.796261} {"loss": 0.68333691, "grad_norm": 1.75828636, "learning_rate": 9.66e-05, "token_acc": 0.79708223, "epoch": 1.62204724, "global_step/max_steps": "1442/8890", "percentage": "16.22%", "elapsed_time": "30m 10s", "remaining_time": "2h 35m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.796308} {"loss": 0.80518079, "grad_norm": 1.911273, "learning_rate": 9.659e-05, "token_acc": 0.76298269, "epoch": 1.6231721, "global_step/max_steps": "1443/8890", "percentage": "16.23%", "elapsed_time": "30m 11s", "remaining_time": "2h 35m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.796382} {"loss": 0.70391786, "grad_norm": 1.93550467, "learning_rate": 9.659e-05, "token_acc": 0.78666667, "epoch": 1.62429696, "global_step/max_steps": "1444/8890", "percentage": "16.24%", "elapsed_time": "30m 12s", "remaining_time": "2h 35m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.796478} {"loss": 0.67329341, "grad_norm": 1.70417392, "learning_rate": 9.658e-05, "token_acc": 0.79340029, "epoch": 1.62542182, "global_step/max_steps": "1445/8890", "percentage": "16.25%", "elapsed_time": "30m 14s", "remaining_time": "2h 35m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.796515} {"loss": 0.68897885, "grad_norm": 1.9272244, "learning_rate": 9.657e-05, "token_acc": 0.79340029, "epoch": 1.62654668, "global_step/max_steps": "1446/8890", "percentage": "16.27%", "elapsed_time": "30m 15s", "remaining_time": "2h 35m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.796572} {"loss": 0.82774282, "grad_norm": 1.70275486, "learning_rate": 9.657e-05, "token_acc": 0.76670574, "epoch": 1.62767154, "global_step/max_steps": "1447/8890", "percentage": "16.28%", "elapsed_time": "30m 16s", "remaining_time": "2h 35m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.796663} {"loss": 0.86933661, "grad_norm": 1.74895048, "learning_rate": 9.656e-05, "token_acc": 0.75574365, "epoch": 1.6287964, "global_step/max_steps": "1448/8890", "percentage": "16.29%", "elapsed_time": "30m 17s", "remaining_time": "2h 35m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.796762} {"loss": 0.72829866, "grad_norm": 1.55871511, "learning_rate": 9.655e-05, "token_acc": 0.77476538, "epoch": 1.62992126, "global_step/max_steps": "1449/8890", "percentage": "16.30%", "elapsed_time": "30m 18s", "remaining_time": "2h 35m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.796817} {"loss": 0.73437822, "grad_norm": 1.8327235, "learning_rate": 9.655e-05, "token_acc": 0.77025527, "epoch": 1.63104612, "global_step/max_steps": "1450/8890", "percentage": "16.31%", "elapsed_time": "30m 19s", "remaining_time": "2h 35m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.796914} {"loss": 0.62787467, "grad_norm": 1.72050977, "learning_rate": 9.654e-05, "token_acc": 0.78409091, "epoch": 1.63217098, "global_step/max_steps": "1451/8890", "percentage": "16.32%", "elapsed_time": "30m 20s", "remaining_time": "2h 35m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797007} {"loss": 0.79321337, "grad_norm": 1.63912332, "learning_rate": 9.653e-05, "token_acc": 0.779, "epoch": 1.63329584, "global_step/max_steps": "1452/8890", "percentage": "16.33%", "elapsed_time": "30m 21s", "remaining_time": "2h 35m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79704} {"loss": 1.02686548, "grad_norm": 1.85586345, "learning_rate": 9.653e-05, "token_acc": 0.71646341, "epoch": 1.6344207, "global_step/max_steps": "1453/8890", "percentage": "16.34%", "elapsed_time": "30m 23s", "remaining_time": "2h 35m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797015} {"loss": 0.81459999, "grad_norm": 1.67392421, "learning_rate": 9.652e-05, "token_acc": 0.76895307, "epoch": 1.63554556, "global_step/max_steps": "1454/8890", "percentage": "16.36%", "elapsed_time": "30m 24s", "remaining_time": "2h 35m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797066} {"loss": 0.77123106, "grad_norm": 1.79843926, "learning_rate": 9.651e-05, "token_acc": 0.79120879, "epoch": 1.63667042, "global_step/max_steps": "1455/8890", "percentage": "16.37%", "elapsed_time": "30m 25s", "remaining_time": "2h 35m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797121} {"loss": 0.62385011, "grad_norm": 1.61400104, "learning_rate": 9.651e-05, "token_acc": 0.808948, "epoch": 1.63779528, "global_step/max_steps": "1456/8890", "percentage": "16.38%", "elapsed_time": "30m 26s", "remaining_time": "2h 35m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797171} {"loss": 0.89682108, "grad_norm": 1.7844727, "learning_rate": 9.65e-05, "token_acc": 0.7311609, "epoch": 1.63892013, "global_step/max_steps": "1457/8890", "percentage": "16.39%", "elapsed_time": "30m 27s", "remaining_time": "2h 35m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79723} {"loss": 0.70583475, "grad_norm": 1.76752901, "learning_rate": 9.649e-05, "token_acc": 0.79262087, "epoch": 1.64004499, "global_step/max_steps": "1458/8890", "percentage": "16.40%", "elapsed_time": "30m 28s", "remaining_time": "2h 35m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797287} {"loss": 0.77398437, "grad_norm": 1.67166805, "learning_rate": 9.648e-05, "token_acc": 0.76946108, "epoch": 1.64116985, "global_step/max_steps": "1459/8890", "percentage": "16.41%", "elapsed_time": "30m 29s", "remaining_time": "2h 35m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79732} {"loss": 0.8961246, "grad_norm": 2.00472474, "learning_rate": 9.648e-05, "token_acc": 0.74683544, "epoch": 1.64229471, "global_step/max_steps": "1460/8890", "percentage": "16.42%", "elapsed_time": "30m 31s", "remaining_time": "2h 35m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797355} {"loss": 0.75891453, "grad_norm": 1.65704429, "learning_rate": 9.647e-05, "token_acc": 0.75760286, "epoch": 1.64341957, "global_step/max_steps": "1461/8890", "percentage": "16.43%", "elapsed_time": "30m 32s", "remaining_time": "2h 35m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797332} {"loss": 0.87815535, "grad_norm": 2.01362848, "learning_rate": 9.646e-05, "token_acc": 0.73954116, "epoch": 1.64454443, "global_step/max_steps": "1462/8890", "percentage": "16.45%", "elapsed_time": "30m 33s", "remaining_time": "2h 35m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797387} {"loss": 0.82892907, "grad_norm": 1.59387887, "learning_rate": 9.646e-05, "token_acc": 0.76274165, "epoch": 1.64566929, "global_step/max_steps": "1463/8890", "percentage": "16.46%", "elapsed_time": "30m 34s", "remaining_time": "2h 35m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797461} {"loss": 0.79474247, "grad_norm": 1.62951243, "learning_rate": 9.645e-05, "token_acc": 0.77682403, "epoch": 1.64679415, "global_step/max_steps": "1464/8890", "percentage": "16.47%", "elapsed_time": "30m 35s", "remaining_time": "2h 35m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797516} {"loss": 0.91076362, "grad_norm": 2.19306135, "learning_rate": 9.644e-05, "token_acc": 0.73826458, "epoch": 1.64791901, "global_step/max_steps": "1465/8890", "percentage": "16.48%", "elapsed_time": "30m 36s", "remaining_time": "2h 35m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797556} {"loss": 0.7404511, "grad_norm": 1.56971049, "learning_rate": 9.644e-05, "token_acc": 0.76244131, "epoch": 1.64904387, "global_step/max_steps": "1466/8890", "percentage": "16.49%", "elapsed_time": "30m 38s", "remaining_time": "2h 35m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797543} {"loss": 0.89550102, "grad_norm": 1.90631151, "learning_rate": 9.643e-05, "token_acc": 0.74124294, "epoch": 1.65016873, "global_step/max_steps": "1467/8890", "percentage": "16.50%", "elapsed_time": "30m 39s", "remaining_time": "2h 35m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797638} {"loss": 0.67930579, "grad_norm": 1.5212599, "learning_rate": 9.642e-05, "token_acc": 0.79370953, "epoch": 1.65129359, "global_step/max_steps": "1468/8890", "percentage": "16.51%", "elapsed_time": "30m 40s", "remaining_time": "2h 35m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797631} {"loss": 0.7024557, "grad_norm": 1.65567029, "learning_rate": 9.642e-05, "token_acc": 0.7795874, "epoch": 1.65241845, "global_step/max_steps": "1469/8890", "percentage": "16.52%", "elapsed_time": "30m 41s", "remaining_time": "2h 35m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797708} {"loss": 0.80829573, "grad_norm": 1.76167202, "learning_rate": 9.641e-05, "token_acc": 0.76597938, "epoch": 1.65354331, "global_step/max_steps": "1470/8890", "percentage": "16.54%", "elapsed_time": "30m 42s", "remaining_time": "2h 35m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79775} {"loss": 0.8118763, "grad_norm": 1.73967123, "learning_rate": 9.64e-05, "token_acc": 0.77514231, "epoch": 1.65466817, "global_step/max_steps": "1471/8890", "percentage": "16.55%", "elapsed_time": "30m 43s", "remaining_time": "2h 34m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797799} {"loss": 0.65932286, "grad_norm": 1.54523718, "learning_rate": 9.64e-05, "token_acc": 0.79711538, "epoch": 1.65579303, "global_step/max_steps": "1472/8890", "percentage": "16.56%", "elapsed_time": "30m 45s", "remaining_time": "2h 34m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797786} {"loss": 0.6944828, "grad_norm": 1.50821388, "learning_rate": 9.639e-05, "token_acc": 0.78996283, "epoch": 1.65691789, "global_step/max_steps": "1473/8890", "percentage": "16.57%", "elapsed_time": "30m 46s", "remaining_time": "2h 34m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797874} {"loss": 0.6846807, "grad_norm": 1.92561495, "learning_rate": 9.638e-05, "token_acc": 0.76978417, "epoch": 1.65804274, "global_step/max_steps": "1474/8890", "percentage": "16.58%", "elapsed_time": "30m 47s", "remaining_time": "2h 34m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797961} {"loss": 0.72297645, "grad_norm": 1.62958169, "learning_rate": 9.637e-05, "token_acc": 0.80853392, "epoch": 1.6591676, "global_step/max_steps": "1475/8890", "percentage": "16.59%", "elapsed_time": "30m 48s", "remaining_time": "2h 34m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797992} {"loss": 0.94123304, "grad_norm": 2.0572238, "learning_rate": 9.637e-05, "token_acc": 0.73348783, "epoch": 1.66029246, "global_step/max_steps": "1476/8890", "percentage": "16.60%", "elapsed_time": "30m 49s", "remaining_time": "2h 34m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.797984} {"loss": 0.73928744, "grad_norm": 1.80457759, "learning_rate": 9.636e-05, "token_acc": 0.78770302, "epoch": 1.66141732, "global_step/max_steps": "1477/8890", "percentage": "16.61%", "elapsed_time": "30m 50s", "remaining_time": "2h 34m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798028} {"loss": 0.66661537, "grad_norm": 1.92264056, "learning_rate": 9.635e-05, "token_acc": 0.79178082, "epoch": 1.66254218, "global_step/max_steps": "1478/8890", "percentage": "16.63%", "elapsed_time": "30m 51s", "remaining_time": "2h 34m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798065} {"loss": 0.78793061, "grad_norm": 1.65141284, "learning_rate": 9.635e-05, "token_acc": 0.77189409, "epoch": 1.66366704, "global_step/max_steps": "1479/8890", "percentage": "16.64%", "elapsed_time": "30m 53s", "remaining_time": "2h 34m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798105} {"loss": 0.71092653, "grad_norm": 1.44878447, "learning_rate": 9.634e-05, "token_acc": 0.78249097, "epoch": 1.6647919, "global_step/max_steps": "1480/8890", "percentage": "16.65%", "elapsed_time": "30m 54s", "remaining_time": "2h 34m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798176} {"loss": 0.71235847, "grad_norm": 1.57737172, "learning_rate": 9.633e-05, "token_acc": 0.78162055, "epoch": 1.66591676, "global_step/max_steps": "1481/8890", "percentage": "16.66%", "elapsed_time": "30m 55s", "remaining_time": "2h 34m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798228} {"loss": 0.68604803, "grad_norm": 1.60095489, "learning_rate": 9.633e-05, "token_acc": 0.78056112, "epoch": 1.66704162, "global_step/max_steps": "1482/8890", "percentage": "16.67%", "elapsed_time": "30m 56s", "remaining_time": "2h 34m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798215} {"loss": 0.61141604, "grad_norm": 1.69793952, "learning_rate": 9.632e-05, "token_acc": 0.81794538, "epoch": 1.66816648, "global_step/max_steps": "1483/8890", "percentage": "16.68%", "elapsed_time": "30m 57s", "remaining_time": "2h 34m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798252} {"loss": 0.8422727, "grad_norm": 1.66625369, "learning_rate": 9.631e-05, "token_acc": 0.74624226, "epoch": 1.66929134, "global_step/max_steps": "1484/8890", "percentage": "16.69%", "elapsed_time": "30m 58s", "remaining_time": "2h 34m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79833} {"loss": 0.73745638, "grad_norm": 1.77992058, "learning_rate": 9.63e-05, "token_acc": 0.79368421, "epoch": 1.6704162, "global_step/max_steps": "1485/8890", "percentage": "16.70%", "elapsed_time": "31m 0s", "remaining_time": "2h 34m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798374} {"loss": 0.6913085, "grad_norm": 1.58654833, "learning_rate": 9.63e-05, "token_acc": 0.79532814, "epoch": 1.67154106, "global_step/max_steps": "1486/8890", "percentage": "16.72%", "elapsed_time": "31m 1s", "remaining_time": "2h 34m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798369} {"loss": 0.64548075, "grad_norm": 2.00928521, "learning_rate": 9.629e-05, "token_acc": 0.80398671, "epoch": 1.67266592, "global_step/max_steps": "1487/8890", "percentage": "16.73%", "elapsed_time": "31m 3s", "remaining_time": "2h 34m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79817} {"loss": 0.82046688, "grad_norm": 1.67154396, "learning_rate": 9.628e-05, "token_acc": 0.76307996, "epoch": 1.67379078, "global_step/max_steps": "1488/8890", "percentage": "16.74%", "elapsed_time": "31m 4s", "remaining_time": "2h 34m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798204} {"loss": 0.70104235, "grad_norm": 1.96159565, "learning_rate": 9.628e-05, "token_acc": 0.76856436, "epoch": 1.67491564, "global_step/max_steps": "1489/8890", "percentage": "16.75%", "elapsed_time": "31m 5s", "remaining_time": "2h 34m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798278} {"loss": 0.80532753, "grad_norm": 1.78332937, "learning_rate": 9.627e-05, "token_acc": 0.76497175, "epoch": 1.67604049, "global_step/max_steps": "1490/8890", "percentage": "16.76%", "elapsed_time": "31m 6s", "remaining_time": "2h 34m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798325} {"loss": 0.64187622, "grad_norm": 2.09989834, "learning_rate": 9.626e-05, "token_acc": 0.79397781, "epoch": 1.67716535, "global_step/max_steps": "1491/8890", "percentage": "16.77%", "elapsed_time": "31m 7s", "remaining_time": "2h 34m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798365} {"loss": 0.51848084, "grad_norm": 1.94936025, "learning_rate": 9.626e-05, "token_acc": 0.83923304, "epoch": 1.67829021, "global_step/max_steps": "1492/8890", "percentage": "16.78%", "elapsed_time": "31m 8s", "remaining_time": "2h 34m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798455} {"loss": 0.75372589, "grad_norm": 1.77916849, "learning_rate": 9.625e-05, "token_acc": 0.76334107, "epoch": 1.67941507, "global_step/max_steps": "1493/8890", "percentage": "16.79%", "elapsed_time": "31m 9s", "remaining_time": "2h 34m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798528} {"loss": 0.78625077, "grad_norm": 1.57633889, "learning_rate": 9.624e-05, "token_acc": 0.76984925, "epoch": 1.68053993, "global_step/max_steps": "1494/8890", "percentage": "16.81%", "elapsed_time": "31m 10s", "remaining_time": "2h 34m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798559} {"loss": 0.71508312, "grad_norm": 1.88181555, "learning_rate": 9.623e-05, "token_acc": 0.78346457, "epoch": 1.68166479, "global_step/max_steps": "1495/8890", "percentage": "16.82%", "elapsed_time": "31m 12s", "remaining_time": "2h 34m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798594} {"loss": 0.74030817, "grad_norm": 1.90889168, "learning_rate": 9.623e-05, "token_acc": 0.77668539, "epoch": 1.68278965, "global_step/max_steps": "1496/8890", "percentage": "16.83%", "elapsed_time": "31m 13s", "remaining_time": "2h 34m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798643} {"loss": 0.96872997, "grad_norm": 1.56706345, "learning_rate": 9.622e-05, "token_acc": 0.72628305, "epoch": 1.68391451, "global_step/max_steps": "1497/8890", "percentage": "16.84%", "elapsed_time": "31m 14s", "remaining_time": "2h 34m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798619} {"loss": 0.738644, "grad_norm": 1.89610529, "learning_rate": 9.621e-05, "token_acc": 0.78331258, "epoch": 1.68503937, "global_step/max_steps": "1498/8890", "percentage": "16.85%", "elapsed_time": "31m 15s", "remaining_time": "2h 34m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798672} {"loss": 0.58379996, "grad_norm": 1.70324457, "learning_rate": 9.621e-05, "token_acc": 0.82469136, "epoch": 1.68616423, "global_step/max_steps": "1499/8890", "percentage": "16.86%", "elapsed_time": "31m 16s", "remaining_time": "2h 34m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798756} {"loss": 0.81250435, "grad_norm": 1.8239913, "learning_rate": 9.62e-05, "token_acc": 0.77926829, "epoch": 1.68728909, "global_step/max_steps": "1500/8890", "percentage": "16.87%", "elapsed_time": "31m 17s", "remaining_time": "2h 34m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.798813} {"eval_loss": 0.88732374, "eval_runtime": 31.6384, "eval_samples_per_second": 25.381, "eval_steps_per_second": 3.192, "eval_token_acc": 0.74143718, "epoch": 1.68728909, "global_step/max_steps": "1500/8890", "percentage": "16.87%", "elapsed_time": "31m 49s", "remaining_time": "2h 36m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785574} {"loss": 0.63755566, "grad_norm": 1.6004498, "learning_rate": 9.619e-05, "token_acc": 0.79228487, "epoch": 1.68841395, "global_step/max_steps": "1501/8890", "percentage": "16.88%", "elapsed_time": "32m 4s", "remaining_time": "2h 37m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779923} {"loss": 0.50468063, "grad_norm": 1.78955209, "learning_rate": 9.618e-05, "token_acc": 0.82251656, "epoch": 1.68953881, "global_step/max_steps": "1502/8890", "percentage": "16.90%", "elapsed_time": "32m 5s", "remaining_time": "2h 37m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779973} {"loss": 0.82972348, "grad_norm": 1.58704996, "learning_rate": 9.618e-05, "token_acc": 0.7619477, "epoch": 1.69066367, "global_step/max_steps": "1503/8890", "percentage": "16.91%", "elapsed_time": "32m 6s", "remaining_time": "2h 37m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780027} {"loss": 0.88254499, "grad_norm": 1.74546754, "learning_rate": 9.617e-05, "token_acc": 0.73259669, "epoch": 1.69178853, "global_step/max_steps": "1504/8890", "percentage": "16.92%", "elapsed_time": "32m 7s", "remaining_time": "2h 37m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78009} {"loss": 0.67822313, "grad_norm": 1.83580959, "learning_rate": 9.616e-05, "token_acc": 0.77953715, "epoch": 1.69291339, "global_step/max_steps": "1505/8890", "percentage": "16.93%", "elapsed_time": "32m 9s", "remaining_time": "2h 37m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780153} {"loss": 0.75684345, "grad_norm": 1.75084925, "learning_rate": 9.616e-05, "token_acc": 0.76271186, "epoch": 1.69403825, "global_step/max_steps": "1506/8890", "percentage": "16.94%", "elapsed_time": "32m 10s", "remaining_time": "2h 37m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780201} {"loss": 0.72506785, "grad_norm": 1.81134939, "learning_rate": 9.615e-05, "token_acc": 0.78002245, "epoch": 1.6951631, "global_step/max_steps": "1507/8890", "percentage": "16.95%", "elapsed_time": "32m 11s", "remaining_time": "2h 37m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780265} {"loss": 0.64480311, "grad_norm": 1.73810816, "learning_rate": 9.614e-05, "token_acc": 0.80795455, "epoch": 1.69628796, "global_step/max_steps": "1508/8890", "percentage": "16.96%", "elapsed_time": "32m 12s", "remaining_time": "2h 37m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780361} {"loss": 0.8022958, "grad_norm": 1.74684107, "learning_rate": 9.613e-05, "token_acc": 0.76008065, "epoch": 1.69741282, "global_step/max_steps": "1509/8890", "percentage": "16.97%", "elapsed_time": "32m 13s", "remaining_time": "2h 37m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780404} {"loss": 0.74173969, "grad_norm": 1.68916595, "learning_rate": 9.613e-05, "token_acc": 0.78155873, "epoch": 1.69853768, "global_step/max_steps": "1510/8890", "percentage": "16.99%", "elapsed_time": "32m 14s", "remaining_time": "2h 37m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780459} {"loss": 0.72250879, "grad_norm": 2.02612138, "learning_rate": 9.612e-05, "token_acc": 0.79388084, "epoch": 1.69966254, "global_step/max_steps": "1511/8890", "percentage": "17.00%", "elapsed_time": "32m 15s", "remaining_time": "2h 37m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780571} {"loss": 0.86135739, "grad_norm": 1.84290648, "learning_rate": 9.611e-05, "token_acc": 0.75156055, "epoch": 1.7007874, "global_step/max_steps": "1512/8890", "percentage": "17.01%", "elapsed_time": "32m 16s", "remaining_time": "2h 37m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780623} {"loss": 0.76200807, "grad_norm": 1.74978209, "learning_rate": 9.611e-05, "token_acc": 0.78232759, "epoch": 1.70191226, "global_step/max_steps": "1513/8890", "percentage": "17.02%", "elapsed_time": "32m 18s", "remaining_time": "2h 37m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780675} {"loss": 0.88938153, "grad_norm": 1.89346075, "learning_rate": 9.61e-05, "token_acc": 0.73745998, "epoch": 1.70303712, "global_step/max_steps": "1514/8890", "percentage": "17.03%", "elapsed_time": "32m 19s", "remaining_time": "2h 37m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780735} {"loss": 0.60989356, "grad_norm": 1.5621525, "learning_rate": 9.609e-05, "token_acc": 0.81613892, "epoch": 1.70416198, "global_step/max_steps": "1515/8890", "percentage": "17.04%", "elapsed_time": "32m 20s", "remaining_time": "2h 37m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780802} {"loss": 0.61731917, "grad_norm": 1.83543873, "learning_rate": 9.608e-05, "token_acc": 0.77988338, "epoch": 1.70528684, "global_step/max_steps": "1516/8890", "percentage": "17.05%", "elapsed_time": "32m 21s", "remaining_time": "2h 37m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780859} {"loss": 0.93740338, "grad_norm": 1.90366447, "learning_rate": 9.608e-05, "token_acc": 0.738, "epoch": 1.7064117, "global_step/max_steps": "1517/8890", "percentage": "17.06%", "elapsed_time": "32m 22s", "remaining_time": "2h 37m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780839} {"loss": 0.67101479, "grad_norm": 1.54686844, "learning_rate": 9.607e-05, "token_acc": 0.78178539, "epoch": 1.70753656, "global_step/max_steps": "1518/8890", "percentage": "17.08%", "elapsed_time": "32m 23s", "remaining_time": "2h 37m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780886} {"loss": 0.65303981, "grad_norm": 1.48535967, "learning_rate": 9.606e-05, "token_acc": 0.80738074, "epoch": 1.70866142, "global_step/max_steps": "1519/8890", "percentage": "17.09%", "elapsed_time": "32m 25s", "remaining_time": "2h 37m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780951} {"loss": 0.77939266, "grad_norm": 1.65818095, "learning_rate": 9.605e-05, "token_acc": 0.76428571, "epoch": 1.70978628, "global_step/max_steps": "1520/8890", "percentage": "17.10%", "elapsed_time": "32m 26s", "remaining_time": "2h 37m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780923} {"loss": 0.66176951, "grad_norm": 1.88826346, "learning_rate": 9.605e-05, "token_acc": 0.80025773, "epoch": 1.71091114, "global_step/max_steps": "1521/8890", "percentage": "17.11%", "elapsed_time": "32m 27s", "remaining_time": "2h 37m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781054} {"loss": 0.82079929, "grad_norm": 1.95300639, "learning_rate": 9.604e-05, "token_acc": 0.75386445, "epoch": 1.712036, "global_step/max_steps": "1522/8890", "percentage": "17.12%", "elapsed_time": "32m 28s", "remaining_time": "2h 37m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781166} {"loss": 0.8284148, "grad_norm": 1.84535444, "learning_rate": 9.603e-05, "token_acc": 0.73796193, "epoch": 1.71316085, "global_step/max_steps": "1523/8890", "percentage": "17.13%", "elapsed_time": "32m 29s", "remaining_time": "2h 37m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78122} {"loss": 0.90240788, "grad_norm": 1.84813011, "learning_rate": 9.603e-05, "token_acc": 0.74288518, "epoch": 1.71428571, "global_step/max_steps": "1524/8890", "percentage": "17.14%", "elapsed_time": "32m 30s", "remaining_time": "2h 37m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78128} {"loss": 0.70955563, "grad_norm": 2.0029974, "learning_rate": 9.602e-05, "token_acc": 0.77956204, "epoch": 1.71541057, "global_step/max_steps": "1525/8890", "percentage": "17.15%", "elapsed_time": "32m 31s", "remaining_time": "2h 37m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781383} {"loss": 0.62705588, "grad_norm": 1.63621521, "learning_rate": 9.601e-05, "token_acc": 0.81807648, "epoch": 1.71653543, "global_step/max_steps": "1526/8890", "percentage": "17.17%", "elapsed_time": "32m 32s", "remaining_time": "2h 37m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781432} {"loss": 1.02744985, "grad_norm": 2.16691375, "learning_rate": 9.6e-05, "token_acc": 0.73489519, "epoch": 1.71766029, "global_step/max_steps": "1527/8890", "percentage": "17.18%", "elapsed_time": "32m 33s", "remaining_time": "2h 37m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781495} {"loss": 0.72326756, "grad_norm": 1.8442229, "learning_rate": 9.6e-05, "token_acc": 0.78817734, "epoch": 1.71878515, "global_step/max_steps": "1528/8890", "percentage": "17.19%", "elapsed_time": "32m 35s", "remaining_time": "2h 36m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781548} {"loss": 0.7590518, "grad_norm": 1.47966218, "learning_rate": 9.599e-05, "token_acc": 0.7822374, "epoch": 1.71991001, "global_step/max_steps": "1529/8890", "percentage": "17.20%", "elapsed_time": "32m 36s", "remaining_time": "2h 36m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781613} {"loss": 0.72679329, "grad_norm": 1.7882061, "learning_rate": 9.598e-05, "token_acc": 0.77433155, "epoch": 1.72103487, "global_step/max_steps": "1530/8890", "percentage": "17.21%", "elapsed_time": "32m 37s", "remaining_time": "2h 36m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781679} {"loss": 0.87117857, "grad_norm": 1.79768884, "learning_rate": 9.597e-05, "token_acc": 0.76074766, "epoch": 1.72215973, "global_step/max_steps": "1531/8890", "percentage": "17.22%", "elapsed_time": "32m 38s", "remaining_time": "2h 36m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781726} {"loss": 0.79012787, "grad_norm": 1.8306421, "learning_rate": 9.597e-05, "token_acc": 0.76352705, "epoch": 1.72328459, "global_step/max_steps": "1532/8890", "percentage": "17.23%", "elapsed_time": "32m 39s", "remaining_time": "2h 36m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781837} {"loss": 0.86061811, "grad_norm": 1.86877024, "learning_rate": 9.596e-05, "token_acc": 0.75086906, "epoch": 1.72440945, "global_step/max_steps": "1533/8890", "percentage": "17.24%", "elapsed_time": "32m 41s", "remaining_time": "2h 36m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781674} {"loss": 0.67103046, "grad_norm": 1.70703566, "learning_rate": 9.595e-05, "token_acc": 0.77879133, "epoch": 1.72553431, "global_step/max_steps": "1534/8890", "percentage": "17.26%", "elapsed_time": "32m 42s", "remaining_time": "2h 36m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781731} {"loss": 0.79824078, "grad_norm": 1.93295145, "learning_rate": 9.595e-05, "token_acc": 0.75652174, "epoch": 1.72665917, "global_step/max_steps": "1535/8890", "percentage": "17.27%", "elapsed_time": "32m 43s", "remaining_time": "2h 36m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781832} {"loss": 0.75146401, "grad_norm": 1.89949846, "learning_rate": 9.594e-05, "token_acc": 0.77042802, "epoch": 1.72778403, "global_step/max_steps": "1536/8890", "percentage": "17.28%", "elapsed_time": "32m 44s", "remaining_time": "2h 36m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781867} {"loss": 0.64404172, "grad_norm": 1.67380905, "learning_rate": 9.593e-05, "token_acc": 0.78650442, "epoch": 1.72890889, "global_step/max_steps": "1537/8890", "percentage": "17.29%", "elapsed_time": "32m 45s", "remaining_time": "2h 36m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781938} {"loss": 0.74510062, "grad_norm": 1.66031086, "learning_rate": 9.592e-05, "token_acc": 0.76947705, "epoch": 1.73003375, "global_step/max_steps": "1538/8890", "percentage": "17.30%", "elapsed_time": "32m 46s", "remaining_time": "2h 36m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781991} {"loss": 0.70750952, "grad_norm": 1.78120899, "learning_rate": 9.592e-05, "token_acc": 0.76365663, "epoch": 1.73115861, "global_step/max_steps": "1539/8890", "percentage": "17.31%", "elapsed_time": "32m 47s", "remaining_time": "2h 36m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782039} {"loss": 0.71058917, "grad_norm": 1.48577607, "learning_rate": 9.591e-05, "token_acc": 0.80093458, "epoch": 1.73228346, "global_step/max_steps": "1540/8890", "percentage": "17.32%", "elapsed_time": "32m 48s", "remaining_time": "2h 36m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782128} {"loss": 0.78637904, "grad_norm": 1.6951133, "learning_rate": 9.59e-05, "token_acc": 0.76576577, "epoch": 1.73340832, "global_step/max_steps": "1541/8890", "percentage": "17.33%", "elapsed_time": "32m 50s", "remaining_time": "2h 36m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782191} {"loss": 0.75006515, "grad_norm": 1.64000332, "learning_rate": 9.589e-05, "token_acc": 0.77469136, "epoch": 1.73453318, "global_step/max_steps": "1542/8890", "percentage": "17.35%", "elapsed_time": "32m 51s", "remaining_time": "2h 36m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782247} {"loss": 0.8622908, "grad_norm": 1.97454166, "learning_rate": 9.589e-05, "token_acc": 0.74394464, "epoch": 1.73565804, "global_step/max_steps": "1543/8890", "percentage": "17.36%", "elapsed_time": "32m 52s", "remaining_time": "2h 36m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782249} {"loss": 0.79283237, "grad_norm": 1.88169241, "learning_rate": 9.588e-05, "token_acc": 0.76158192, "epoch": 1.7367829, "global_step/max_steps": "1544/8890", "percentage": "17.37%", "elapsed_time": "32m 53s", "remaining_time": "2h 36m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782298} {"loss": 0.79551196, "grad_norm": 1.70886827, "learning_rate": 9.587e-05, "token_acc": 0.7626753, "epoch": 1.73790776, "global_step/max_steps": "1545/8890", "percentage": "17.38%", "elapsed_time": "32m 54s", "remaining_time": "2h 36m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782343} {"loss": 0.70090353, "grad_norm": 1.70105946, "learning_rate": 9.586e-05, "token_acc": 0.77981651, "epoch": 1.73903262, "global_step/max_steps": "1546/8890", "percentage": "17.39%", "elapsed_time": "32m 56s", "remaining_time": "2h 36m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782334} {"loss": 0.702048, "grad_norm": 1.42886913, "learning_rate": 9.586e-05, "token_acc": 0.78280961, "epoch": 1.74015748, "global_step/max_steps": "1547/8890", "percentage": "17.40%", "elapsed_time": "32m 57s", "remaining_time": "2h 36m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782373} {"loss": 0.74264234, "grad_norm": 1.9797374, "learning_rate": 9.585e-05, "token_acc": 0.76861702, "epoch": 1.74128234, "global_step/max_steps": "1548/8890", "percentage": "17.41%", "elapsed_time": "32m 58s", "remaining_time": "2h 36m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782461} {"loss": 0.70298684, "grad_norm": 1.49624395, "learning_rate": 9.584e-05, "token_acc": 0.77876106, "epoch": 1.7424072, "global_step/max_steps": "1549/8890", "percentage": "17.42%", "elapsed_time": "32m 59s", "remaining_time": "2h 36m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78242} {"loss": 0.7361111, "grad_norm": 1.66050959, "learning_rate": 9.583e-05, "token_acc": 0.76734258, "epoch": 1.74353206, "global_step/max_steps": "1550/8890", "percentage": "17.44%", "elapsed_time": "33m 0s", "remaining_time": "2h 36m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782529} {"loss": 0.73709834, "grad_norm": 1.93901074, "learning_rate": 9.583e-05, "token_acc": 0.78242075, "epoch": 1.74465692, "global_step/max_steps": "1551/8890", "percentage": "17.45%", "elapsed_time": "33m 1s", "remaining_time": "2h 36m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782632} {"loss": 0.7538417, "grad_norm": 1.6369828, "learning_rate": 9.582e-05, "token_acc": 0.78725825, "epoch": 1.74578178, "global_step/max_steps": "1552/8890", "percentage": "17.46%", "elapsed_time": "33m 2s", "remaining_time": "2h 36m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782657} {"loss": 0.93135953, "grad_norm": 1.77489448, "learning_rate": 9.581e-05, "token_acc": 0.72513369, "epoch": 1.74690664, "global_step/max_steps": "1553/8890", "percentage": "17.47%", "elapsed_time": "33m 4s", "remaining_time": "2h 36m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78271} {"loss": 0.60882187, "grad_norm": 1.65290558, "learning_rate": 9.58e-05, "token_acc": 0.80166271, "epoch": 1.7480315, "global_step/max_steps": "1554/8890", "percentage": "17.48%", "elapsed_time": "33m 5s", "remaining_time": "2h 36m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782758} {"loss": 0.64500308, "grad_norm": 1.84814322, "learning_rate": 9.58e-05, "token_acc": 0.80726698, "epoch": 1.74915636, "global_step/max_steps": "1555/8890", "percentage": "17.49%", "elapsed_time": "33m 6s", "remaining_time": "2h 36m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78284} {"loss": 0.66209215, "grad_norm": 1.90532041, "learning_rate": 9.579e-05, "token_acc": 0.79753086, "epoch": 1.75028121, "global_step/max_steps": "1556/8890", "percentage": "17.50%", "elapsed_time": "33m 7s", "remaining_time": "2h 36m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782908} {"loss": 0.84123755, "grad_norm": 1.79544842, "learning_rate": 9.578e-05, "token_acc": 0.75026233, "epoch": 1.75140607, "global_step/max_steps": "1557/8890", "percentage": "17.51%", "elapsed_time": "33m 8s", "remaining_time": "2h 36m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782957} {"loss": 0.53993165, "grad_norm": 1.67024863, "learning_rate": 9.578e-05, "token_acc": 0.83592645, "epoch": 1.75253093, "global_step/max_steps": "1558/8890", "percentage": "17.53%", "elapsed_time": "33m 9s", "remaining_time": "2h 36m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783006} {"loss": 0.69680655, "grad_norm": 1.65048814, "learning_rate": 9.577e-05, "token_acc": 0.78067633, "epoch": 1.75365579, "global_step/max_steps": "1559/8890", "percentage": "17.54%", "elapsed_time": "33m 10s", "remaining_time": "2h 36m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783072} {"loss": 0.65516329, "grad_norm": 1.64266682, "learning_rate": 9.576e-05, "token_acc": 0.80757727, "epoch": 1.75478065, "global_step/max_steps": "1560/8890", "percentage": "17.55%", "elapsed_time": "33m 11s", "remaining_time": "2h 35m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783173} {"loss": 0.58560717, "grad_norm": 1.56240773, "learning_rate": 9.575e-05, "token_acc": 0.81622177, "epoch": 1.75590551, "global_step/max_steps": "1561/8890", "percentage": "17.56%", "elapsed_time": "33m 13s", "remaining_time": "2h 35m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783216} {"loss": 0.73323393, "grad_norm": 1.91987646, "learning_rate": 9.575e-05, "token_acc": 0.78401122, "epoch": 1.75703037, "global_step/max_steps": "1562/8890", "percentage": "17.57%", "elapsed_time": "33m 14s", "remaining_time": "2h 35m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783271} {"loss": 0.75781453, "grad_norm": 1.91351962, "learning_rate": 9.574e-05, "token_acc": 0.76434676, "epoch": 1.75815523, "global_step/max_steps": "1563/8890", "percentage": "17.58%", "elapsed_time": "33m 15s", "remaining_time": "2h 35m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783363} {"loss": 0.80309403, "grad_norm": 1.94861734, "learning_rate": 9.573e-05, "token_acc": 0.7626322, "epoch": 1.75928009, "global_step/max_steps": "1564/8890", "percentage": "17.59%", "elapsed_time": "33m 16s", "remaining_time": "2h 35m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783416} {"loss": 0.71774423, "grad_norm": 1.70624804, "learning_rate": 9.572e-05, "token_acc": 0.78224974, "epoch": 1.76040495, "global_step/max_steps": "1565/8890", "percentage": "17.60%", "elapsed_time": "33m 17s", "remaining_time": "2h 35m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783469} {"loss": 0.62512696, "grad_norm": 1.81217742, "learning_rate": 9.572e-05, "token_acc": 0.8185907, "epoch": 1.76152981, "global_step/max_steps": "1566/8890", "percentage": "17.62%", "elapsed_time": "33m 18s", "remaining_time": "2h 35m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783515} {"loss": 0.7694512, "grad_norm": 1.99381626, "learning_rate": 9.571e-05, "token_acc": 0.7699005, "epoch": 1.76265467, "global_step/max_steps": "1567/8890", "percentage": "17.63%", "elapsed_time": "33m 19s", "remaining_time": "2h 35m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783608} {"loss": 0.71148372, "grad_norm": 1.91260636, "learning_rate": 9.57e-05, "token_acc": 0.78266033, "epoch": 1.76377953, "global_step/max_steps": "1568/8890", "percentage": "17.64%", "elapsed_time": "33m 20s", "remaining_time": "2h 35m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783682} {"loss": 0.79770589, "grad_norm": 1.59382606, "learning_rate": 9.569e-05, "token_acc": 0.76350093, "epoch": 1.76490439, "global_step/max_steps": "1569/8890", "percentage": "17.65%", "elapsed_time": "33m 22s", "remaining_time": "2h 35m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783685} {"loss": 0.77216423, "grad_norm": 1.91148186, "learning_rate": 9.568e-05, "token_acc": 0.77506775, "epoch": 1.76602925, "global_step/max_steps": "1570/8890", "percentage": "17.66%", "elapsed_time": "33m 23s", "remaining_time": "2h 35m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783735} {"loss": 0.65390742, "grad_norm": 1.35441422, "learning_rate": 9.568e-05, "token_acc": 0.80541872, "epoch": 1.76715411, "global_step/max_steps": "1571/8890", "percentage": "17.67%", "elapsed_time": "33m 24s", "remaining_time": "2h 35m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783722} {"loss": 0.72943699, "grad_norm": 1.86869991, "learning_rate": 9.567e-05, "token_acc": 0.77520436, "epoch": 1.76827897, "global_step/max_steps": "1572/8890", "percentage": "17.68%", "elapsed_time": "33m 25s", "remaining_time": "2h 35m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783823} {"loss": 0.65313244, "grad_norm": 1.72164524, "learning_rate": 9.566e-05, "token_acc": 0.7965368, "epoch": 1.76940382, "global_step/max_steps": "1573/8890", "percentage": "17.69%", "elapsed_time": "33m 26s", "remaining_time": "2h 35m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783953} {"loss": 0.6955719, "grad_norm": 1.81534088, "learning_rate": 9.565e-05, "token_acc": 0.79085714, "epoch": 1.77052868, "global_step/max_steps": "1574/8890", "percentage": "17.71%", "elapsed_time": "33m 27s", "remaining_time": "2h 35m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784018} {"loss": 0.85099375, "grad_norm": 1.87339377, "learning_rate": 9.565e-05, "token_acc": 0.72850123, "epoch": 1.77165354, "global_step/max_steps": "1575/8890", "percentage": "17.72%", "elapsed_time": "33m 28s", "remaining_time": "2h 35m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784069} {"loss": 0.6228056, "grad_norm": 1.75739837, "learning_rate": 9.564e-05, "token_acc": 0.79828851, "epoch": 1.7727784, "global_step/max_steps": "1576/8890", "percentage": "17.73%", "elapsed_time": "33m 29s", "remaining_time": "2h 35m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784184} {"loss": 0.85995889, "grad_norm": 1.91053152, "learning_rate": 9.563e-05, "token_acc": 0.74216867, "epoch": 1.77390326, "global_step/max_steps": "1577/8890", "percentage": "17.74%", "elapsed_time": "33m 30s", "remaining_time": "2h 35m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784237} {"loss": 0.71341944, "grad_norm": 1.71653914, "learning_rate": 9.562e-05, "token_acc": 0.79526462, "epoch": 1.77502812, "global_step/max_steps": "1578/8890", "percentage": "17.75%", "elapsed_time": "33m 32s", "remaining_time": "2h 35m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784292} {"loss": 0.69171464, "grad_norm": 1.85617638, "learning_rate": 9.562e-05, "token_acc": 0.78090767, "epoch": 1.77615298, "global_step/max_steps": "1579/8890", "percentage": "17.76%", "elapsed_time": "33m 33s", "remaining_time": "2h 35m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784374} {"loss": 0.94214064, "grad_norm": 1.67072129, "learning_rate": 9.561e-05, "token_acc": 0.72065514, "epoch": 1.77727784, "global_step/max_steps": "1580/8890", "percentage": "17.77%", "elapsed_time": "33m 34s", "remaining_time": "2h 35m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784432} {"loss": 0.77988482, "grad_norm": 1.75003386, "learning_rate": 9.56e-05, "token_acc": 0.7675737, "epoch": 1.7784027, "global_step/max_steps": "1581/8890", "percentage": "17.78%", "elapsed_time": "33m 35s", "remaining_time": "2h 35m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784573} {"loss": 0.71928704, "grad_norm": 1.73683643, "learning_rate": 9.559e-05, "token_acc": 0.80721393, "epoch": 1.77952756, "global_step/max_steps": "1582/8890", "percentage": "17.80%", "elapsed_time": "33m 36s", "remaining_time": "2h 35m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78472} {"loss": 0.77684593, "grad_norm": 1.72944164, "learning_rate": 9.559e-05, "token_acc": 0.78023134, "epoch": 1.78065242, "global_step/max_steps": "1583/8890", "percentage": "17.81%", "elapsed_time": "33m 37s", "remaining_time": "2h 35m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784768} {"loss": 0.92836785, "grad_norm": 2.02492976, "learning_rate": 9.558e-05, "token_acc": 0.73544304, "epoch": 1.78177728, "global_step/max_steps": "1584/8890", "percentage": "17.82%", "elapsed_time": "33m 38s", "remaining_time": "2h 35m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784809} {"loss": 0.78808188, "grad_norm": 1.68961704, "learning_rate": 9.557e-05, "token_acc": 0.77455357, "epoch": 1.78290214, "global_step/max_steps": "1585/8890", "percentage": "17.83%", "elapsed_time": "33m 39s", "remaining_time": "2h 35m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784847} {"loss": 0.79668921, "grad_norm": 1.68083858, "learning_rate": 9.556e-05, "token_acc": 0.77322075, "epoch": 1.784027, "global_step/max_steps": "1586/8890", "percentage": "17.84%", "elapsed_time": "33m 40s", "remaining_time": "2h 35m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784883} {"loss": 0.59251797, "grad_norm": 1.4618715, "learning_rate": 9.556e-05, "token_acc": 0.825, "epoch": 1.78515186, "global_step/max_steps": "1587/8890", "percentage": "17.85%", "elapsed_time": "33m 41s", "remaining_time": "2h 35m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784932} {"loss": 0.79644954, "grad_norm": 1.70308137, "learning_rate": 9.555e-05, "token_acc": 0.74468085, "epoch": 1.78627672, "global_step/max_steps": "1588/8890", "percentage": "17.86%", "elapsed_time": "33m 42s", "remaining_time": "2h 35m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785026} {"loss": 0.82025981, "grad_norm": 1.74674487, "learning_rate": 9.554e-05, "token_acc": 0.76614987, "epoch": 1.78740157, "global_step/max_steps": "1589/8890", "percentage": "17.87%", "elapsed_time": "33m 43s", "remaining_time": "2h 34m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78508} {"loss": 0.64870697, "grad_norm": 1.62727177, "learning_rate": 9.553e-05, "token_acc": 0.80673317, "epoch": 1.78852643, "global_step/max_steps": "1590/8890", "percentage": "17.89%", "elapsed_time": "33m 45s", "remaining_time": "2h 34m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785162} {"loss": 0.71457565, "grad_norm": 1.79592991, "learning_rate": 9.552e-05, "token_acc": 0.78630137, "epoch": 1.78965129, "global_step/max_steps": "1591/8890", "percentage": "17.90%", "elapsed_time": "33m 46s", "remaining_time": "2h 34m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785208} {"loss": 0.74246454, "grad_norm": 1.76481533, "learning_rate": 9.552e-05, "token_acc": 0.75903614, "epoch": 1.79077615, "global_step/max_steps": "1592/8890", "percentage": "17.91%", "elapsed_time": "33m 47s", "remaining_time": "2h 34m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785286} {"loss": 0.63988703, "grad_norm": 1.47683668, "learning_rate": 9.551e-05, "token_acc": 0.79181495, "epoch": 1.79190101, "global_step/max_steps": "1593/8890", "percentage": "17.92%", "elapsed_time": "33m 48s", "remaining_time": "2h 34m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785334} {"loss": 0.89086652, "grad_norm": 2.01859951, "learning_rate": 9.55e-05, "token_acc": 0.74418605, "epoch": 1.79302587, "global_step/max_steps": "1594/8890", "percentage": "17.93%", "elapsed_time": "33m 49s", "remaining_time": "2h 34m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78538} {"loss": 0.83171535, "grad_norm": 1.82370913, "learning_rate": 9.549e-05, "token_acc": 0.74528302, "epoch": 1.79415073, "global_step/max_steps": "1595/8890", "percentage": "17.94%", "elapsed_time": "33m 50s", "remaining_time": "2h 34m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785423} {"loss": 0.66807723, "grad_norm": 1.98093009, "learning_rate": 9.549e-05, "token_acc": 0.78330658, "epoch": 1.79527559, "global_step/max_steps": "1596/8890", "percentage": "17.95%", "elapsed_time": "33m 51s", "remaining_time": "2h 34m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785524} {"loss": 0.76219511, "grad_norm": 1.66226614, "learning_rate": 9.548e-05, "token_acc": 0.77281553, "epoch": 1.79640045, "global_step/max_steps": "1597/8890", "percentage": "17.96%", "elapsed_time": "33m 52s", "remaining_time": "2h 34m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785572} {"loss": 0.55026555, "grad_norm": 1.34891725, "learning_rate": 9.547e-05, "token_acc": 0.82771194, "epoch": 1.79752531, "global_step/max_steps": "1598/8890", "percentage": "17.98%", "elapsed_time": "33m 54s", "remaining_time": "2h 34m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785608} {"loss": 0.69953358, "grad_norm": 1.51263642, "learning_rate": 9.546e-05, "token_acc": 0.78822412, "epoch": 1.79865017, "global_step/max_steps": "1599/8890", "percentage": "17.99%", "elapsed_time": "33m 55s", "remaining_time": "2h 34m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785654} {"loss": 0.85196322, "grad_norm": 1.84294379, "learning_rate": 9.546e-05, "token_acc": 0.75334821, "epoch": 1.79977503, "global_step/max_steps": "1600/8890", "percentage": "18.00%", "elapsed_time": "33m 56s", "remaining_time": "2h 34m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785636} {"loss": 0.82544327, "grad_norm": 1.9030087, "learning_rate": 9.545e-05, "token_acc": 0.75112108, "epoch": 1.80089989, "global_step/max_steps": "1601/8890", "percentage": "18.01%", "elapsed_time": "33m 57s", "remaining_time": "2h 34m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785691} {"loss": 0.83915424, "grad_norm": 1.67952991, "learning_rate": 9.544e-05, "token_acc": 0.74422735, "epoch": 1.80202475, "global_step/max_steps": "1602/8890", "percentage": "18.02%", "elapsed_time": "33m 59s", "remaining_time": "2h 34m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785641} {"loss": 0.72539711, "grad_norm": 1.81679618, "learning_rate": 9.543e-05, "token_acc": 0.77139508, "epoch": 1.80314961, "global_step/max_steps": "1603/8890", "percentage": "18.03%", "elapsed_time": "34m 0s", "remaining_time": "2h 34m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785694} {"loss": 0.81195438, "grad_norm": 1.79828846, "learning_rate": 9.542e-05, "token_acc": 0.76162098, "epoch": 1.80427447, "global_step/max_steps": "1604/8890", "percentage": "18.04%", "elapsed_time": "34m 1s", "remaining_time": "2h 34m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785757} {"loss": 0.79003119, "grad_norm": 1.67001426, "learning_rate": 9.542e-05, "token_acc": 0.756, "epoch": 1.80539933, "global_step/max_steps": "1605/8890", "percentage": "18.05%", "elapsed_time": "34m 2s", "remaining_time": "2h 34m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785813} {"loss": 0.68612361, "grad_norm": 1.89000452, "learning_rate": 9.541e-05, "token_acc": 0.78375149, "epoch": 1.80652418, "global_step/max_steps": "1606/8890", "percentage": "18.07%", "elapsed_time": "34m 3s", "remaining_time": "2h 34m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785812} {"loss": 0.74637127, "grad_norm": 2.03539252, "learning_rate": 9.54e-05, "token_acc": 0.77581522, "epoch": 1.80764904, "global_step/max_steps": "1607/8890", "percentage": "18.08%", "elapsed_time": "34m 4s", "remaining_time": "2h 34m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785893} {"loss": 0.65795904, "grad_norm": 1.77607071, "learning_rate": 9.539e-05, "token_acc": 0.79014085, "epoch": 1.8087739, "global_step/max_steps": "1608/8890", "percentage": "18.09%", "elapsed_time": "34m 5s", "remaining_time": "2h 34m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785968} {"loss": 0.97328913, "grad_norm": 1.85797215, "learning_rate": 9.539e-05, "token_acc": 0.72605561, "epoch": 1.80989876, "global_step/max_steps": "1609/8890", "percentage": "18.10%", "elapsed_time": "34m 7s", "remaining_time": "2h 34m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786004} {"loss": 0.75234985, "grad_norm": 1.8913455, "learning_rate": 9.538e-05, "token_acc": 0.76960784, "epoch": 1.81102362, "global_step/max_steps": "1610/8890", "percentage": "18.11%", "elapsed_time": "34m 8s", "remaining_time": "2h 34m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785797} {"loss": 0.63810402, "grad_norm": 1.63092363, "learning_rate": 9.537e-05, "token_acc": 0.7970852, "epoch": 1.81214848, "global_step/max_steps": "1611/8890", "percentage": "18.12%", "elapsed_time": "34m 10s", "remaining_time": "2h 34m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78585} {"loss": 0.76771796, "grad_norm": 1.74846923, "learning_rate": 9.536e-05, "token_acc": 0.76906318, "epoch": 1.81327334, "global_step/max_steps": "1612/8890", "percentage": "18.13%", "elapsed_time": "34m 11s", "remaining_time": "2h 34m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7859} {"loss": 0.58679658, "grad_norm": 1.79289722, "learning_rate": 9.535e-05, "token_acc": 0.82634731, "epoch": 1.8143982, "global_step/max_steps": "1613/8890", "percentage": "18.14%", "elapsed_time": "34m 12s", "remaining_time": "2h 34m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785953} {"loss": 0.73054862, "grad_norm": 2.22560334, "learning_rate": 9.535e-05, "token_acc": 0.80108499, "epoch": 1.81552306, "global_step/max_steps": "1614/8890", "percentage": "18.16%", "elapsed_time": "34m 13s", "remaining_time": "2h 34m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78605} {"loss": 0.87266552, "grad_norm": 1.83657956, "learning_rate": 9.534e-05, "token_acc": 0.76947368, "epoch": 1.81664792, "global_step/max_steps": "1615/8890", "percentage": "18.17%", "elapsed_time": "34m 14s", "remaining_time": "2h 34m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786091} {"loss": 0.774854, "grad_norm": 1.73988664, "learning_rate": 9.533e-05, "token_acc": 0.75088132, "epoch": 1.81777278, "global_step/max_steps": "1616/8890", "percentage": "18.18%", "elapsed_time": "34m 15s", "remaining_time": "2h 34m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786184} {"loss": 0.65639943, "grad_norm": 1.56774294, "learning_rate": 9.532e-05, "token_acc": 0.7926009, "epoch": 1.81889764, "global_step/max_steps": "1617/8890", "percentage": "18.19%", "elapsed_time": "34m 16s", "remaining_time": "2h 34m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786188} {"loss": 0.6943481, "grad_norm": 1.72466028, "learning_rate": 9.531e-05, "token_acc": 0.78914141, "epoch": 1.8200225, "global_step/max_steps": "1618/8890", "percentage": "18.20%", "elapsed_time": "34m 17s", "remaining_time": "2h 34m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786314} {"loss": 0.76809049, "grad_norm": 1.55934894, "learning_rate": 9.531e-05, "token_acc": 0.77338476, "epoch": 1.82114736, "global_step/max_steps": "1619/8890", "percentage": "18.21%", "elapsed_time": "34m 18s", "remaining_time": "2h 34m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786365} {"loss": 0.93695724, "grad_norm": 1.7727505, "learning_rate": 9.53e-05, "token_acc": 0.73025641, "epoch": 1.82227222, "global_step/max_steps": "1620/8890", "percentage": "18.22%", "elapsed_time": "34m 19s", "remaining_time": "2h 34m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786416} {"loss": 0.62512565, "grad_norm": 1.70375896, "learning_rate": 9.529e-05, "token_acc": 0.80269608, "epoch": 1.82339708, "global_step/max_steps": "1621/8890", "percentage": "18.23%", "elapsed_time": "34m 21s", "remaining_time": "2h 34m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786468} {"loss": 0.86899877, "grad_norm": 1.57988966, "learning_rate": 9.528e-05, "token_acc": 0.74097473, "epoch": 1.82452193, "global_step/max_steps": "1622/8890", "percentage": "18.25%", "elapsed_time": "34m 22s", "remaining_time": "2h 34m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786531} {"loss": 0.64125073, "grad_norm": 1.59972274, "learning_rate": 9.528e-05, "token_acc": 0.79632063, "epoch": 1.82564679, "global_step/max_steps": "1623/8890", "percentage": "18.26%", "elapsed_time": "34m 23s", "remaining_time": "2h 33m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786583} {"loss": 0.74724305, "grad_norm": 1.8895247, "learning_rate": 9.527e-05, "token_acc": 0.78977273, "epoch": 1.82677165, "global_step/max_steps": "1624/8890", "percentage": "18.27%", "elapsed_time": "34m 24s", "remaining_time": "2h 33m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786675} {"loss": 0.81529516, "grad_norm": 1.80761373, "learning_rate": 9.526e-05, "token_acc": 0.7473275, "epoch": 1.82789651, "global_step/max_steps": "1625/8890", "percentage": "18.28%", "elapsed_time": "34m 25s", "remaining_time": "2h 33m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786673} {"loss": 0.79487538, "grad_norm": 1.88835001, "learning_rate": 9.525e-05, "token_acc": 0.77198068, "epoch": 1.82902137, "global_step/max_steps": "1626/8890", "percentage": "18.29%", "elapsed_time": "34m 26s", "remaining_time": "2h 33m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78671} {"loss": 1.01737702, "grad_norm": 1.84493172, "learning_rate": 9.524e-05, "token_acc": 0.72736733, "epoch": 1.83014623, "global_step/max_steps": "1627/8890", "percentage": "18.30%", "elapsed_time": "34m 27s", "remaining_time": "2h 33m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78676} {"loss": 0.88114119, "grad_norm": 1.76535356, "learning_rate": 9.524e-05, "token_acc": 0.75715696, "epoch": 1.83127109, "global_step/max_steps": "1628/8890", "percentage": "18.31%", "elapsed_time": "34m 29s", "remaining_time": "2h 33m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78657} {"loss": 0.91714799, "grad_norm": 1.99280298, "learning_rate": 9.523e-05, "token_acc": 0.74064516, "epoch": 1.83239595, "global_step/max_steps": "1629/8890", "percentage": "18.32%", "elapsed_time": "34m 30s", "remaining_time": "2h 33m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786658} {"loss": 0.79287606, "grad_norm": 1.55488765, "learning_rate": 9.522e-05, "token_acc": 0.76769691, "epoch": 1.83352081, "global_step/max_steps": "1630/8890", "percentage": "18.34%", "elapsed_time": "34m 31s", "remaining_time": "2h 33m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786696} {"loss": 0.81167626, "grad_norm": 1.6917634, "learning_rate": 9.521e-05, "token_acc": 0.74975845, "epoch": 1.83464567, "global_step/max_steps": "1631/8890", "percentage": "18.35%", "elapsed_time": "34m 33s", "remaining_time": "2h 33m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786742} {"loss": 0.83453614, "grad_norm": 1.56379437, "learning_rate": 9.52e-05, "token_acc": 0.75254237, "epoch": 1.83577053, "global_step/max_steps": "1632/8890", "percentage": "18.36%", "elapsed_time": "34m 34s", "remaining_time": "2h 33m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786714} {"loss": 0.81487453, "grad_norm": 1.96767223, "learning_rate": 9.52e-05, "token_acc": 0.74638633, "epoch": 1.83689539, "global_step/max_steps": "1633/8890", "percentage": "18.37%", "elapsed_time": "34m 35s", "remaining_time": "2h 33m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786814} {"loss": 0.85800594, "grad_norm": 1.81684685, "learning_rate": 9.519e-05, "token_acc": 0.75345912, "epoch": 1.83802025, "global_step/max_steps": "1634/8890", "percentage": "18.38%", "elapsed_time": "34m 36s", "remaining_time": "2h 33m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786886} {"loss": 0.82035267, "grad_norm": 1.52479029, "learning_rate": 9.518e-05, "token_acc": 0.76383764, "epoch": 1.83914511, "global_step/max_steps": "1635/8890", "percentage": "18.39%", "elapsed_time": "34m 37s", "remaining_time": "2h 33m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786936} {"loss": 0.88902307, "grad_norm": 1.5556525, "learning_rate": 9.517e-05, "token_acc": 0.75021758, "epoch": 1.84026997, "global_step/max_steps": "1636/8890", "percentage": "18.40%", "elapsed_time": "34m 38s", "remaining_time": "2h 33m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786921} {"loss": 0.72602737, "grad_norm": 1.56650758, "learning_rate": 9.516e-05, "token_acc": 0.78876404, "epoch": 1.84139483, "global_step/max_steps": "1637/8890", "percentage": "18.41%", "elapsed_time": "34m 40s", "remaining_time": "2h 33m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786968} {"loss": 0.93353224, "grad_norm": 1.85957658, "learning_rate": 9.516e-05, "token_acc": 0.73014019, "epoch": 1.84251969, "global_step/max_steps": "1638/8890", "percentage": "18.43%", "elapsed_time": "34m 41s", "remaining_time": "2h 33m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787016} {"loss": 0.61317754, "grad_norm": 1.53542531, "learning_rate": 9.515e-05, "token_acc": 0.80897583, "epoch": 1.84364454, "global_step/max_steps": "1639/8890", "percentage": "18.44%", "elapsed_time": "34m 42s", "remaining_time": "2h 33m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787018} {"loss": 0.59525979, "grad_norm": 1.25806105, "learning_rate": 9.514e-05, "token_acc": 0.81952921, "epoch": 1.8447694, "global_step/max_steps": "1640/8890", "percentage": "18.45%", "elapsed_time": "34m 43s", "remaining_time": "2h 33m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787076} {"loss": 0.9038986, "grad_norm": 1.71359491, "learning_rate": 9.513e-05, "token_acc": 0.73248408, "epoch": 1.84589426, "global_step/max_steps": "1641/8890", "percentage": "18.46%", "elapsed_time": "34m 44s", "remaining_time": "2h 33m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78707} {"loss": 0.74688292, "grad_norm": 1.69375372, "learning_rate": 9.512e-05, "token_acc": 0.78080569, "epoch": 1.84701912, "global_step/max_steps": "1642/8890", "percentage": "18.47%", "elapsed_time": "34m 46s", "remaining_time": "2h 33m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787108} {"loss": 0.78380072, "grad_norm": 1.56954205, "learning_rate": 9.512e-05, "token_acc": 0.77908218, "epoch": 1.84814398, "global_step/max_steps": "1643/8890", "percentage": "18.48%", "elapsed_time": "34m 47s", "remaining_time": "2h 33m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787169} {"loss": 0.7074995, "grad_norm": 1.52863443, "learning_rate": 9.511e-05, "token_acc": 0.77166276, "epoch": 1.84926884, "global_step/max_steps": "1644/8890", "percentage": "18.49%", "elapsed_time": "34m 48s", "remaining_time": "2h 33m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787217} {"loss": 0.77830207, "grad_norm": 1.4468286, "learning_rate": 9.51e-05, "token_acc": 0.76554622, "epoch": 1.8503937, "global_step/max_steps": "1645/8890", "percentage": "18.50%", "elapsed_time": "34m 49s", "remaining_time": "2h 33m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787251} {"loss": 0.7212342, "grad_norm": 1.49839616, "learning_rate": 9.509e-05, "token_acc": 0.78571429, "epoch": 1.85151856, "global_step/max_steps": "1646/8890", "percentage": "18.52%", "elapsed_time": "34m 50s", "remaining_time": "2h 33m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787289} {"loss": 0.93850946, "grad_norm": 1.85727251, "learning_rate": 9.508e-05, "token_acc": 0.73711882, "epoch": 1.85264342, "global_step/max_steps": "1647/8890", "percentage": "18.53%", "elapsed_time": "34m 52s", "remaining_time": "2h 33m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787283} {"loss": 0.7024408, "grad_norm": 1.69054186, "learning_rate": 9.508e-05, "token_acc": 0.78969072, "epoch": 1.85376828, "global_step/max_steps": "1648/8890", "percentage": "18.54%", "elapsed_time": "34m 53s", "remaining_time": "2h 33m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787279} {"loss": 0.7816366, "grad_norm": 1.4162761, "learning_rate": 9.507e-05, "token_acc": 0.7607362, "epoch": 1.85489314, "global_step/max_steps": "1649/8890", "percentage": "18.55%", "elapsed_time": "34m 54s", "remaining_time": "2h 33m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787337} {"loss": 0.78034997, "grad_norm": 1.75123918, "learning_rate": 9.506e-05, "token_acc": 0.77964427, "epoch": 1.856018, "global_step/max_steps": "1650/8890", "percentage": "18.56%", "elapsed_time": "34m 55s", "remaining_time": "2h 33m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787414} {"loss": 0.71318197, "grad_norm": 1.76463008, "learning_rate": 9.505e-05, "token_acc": 0.78454333, "epoch": 1.85714286, "global_step/max_steps": "1651/8890", "percentage": "18.57%", "elapsed_time": "34m 56s", "remaining_time": "2h 33m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787495} {"loss": 0.86297125, "grad_norm": 1.93645418, "learning_rate": 9.504e-05, "token_acc": 0.74851013, "epoch": 1.85826772, "global_step/max_steps": "1652/8890", "percentage": "18.58%", "elapsed_time": "34m 57s", "remaining_time": "2h 33m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787582} {"loss": 0.69548881, "grad_norm": 2.19571185, "learning_rate": 9.504e-05, "token_acc": 0.76843911, "epoch": 1.85939258, "global_step/max_steps": "1653/8890", "percentage": "18.59%", "elapsed_time": "34m 58s", "remaining_time": "2h 33m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787634} {"loss": 0.71295029, "grad_norm": 1.79033709, "learning_rate": 9.503e-05, "token_acc": 0.78791946, "epoch": 1.86051744, "global_step/max_steps": "1654/8890", "percentage": "18.61%", "elapsed_time": "34m 59s", "remaining_time": "2h 33m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787684} {"loss": 0.56540751, "grad_norm": 1.68825102, "learning_rate": 9.502e-05, "token_acc": 0.82225237, "epoch": 1.86164229, "global_step/max_steps": "1655/8890", "percentage": "18.62%", "elapsed_time": "35m 0s", "remaining_time": "2h 33m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787732} {"loss": 0.86399251, "grad_norm": 1.65273118, "learning_rate": 9.501e-05, "token_acc": 0.74718526, "epoch": 1.86276715, "global_step/max_steps": "1656/8890", "percentage": "18.63%", "elapsed_time": "35m 2s", "remaining_time": "2h 33m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787726} {"loss": 0.83072674, "grad_norm": 1.86701226, "learning_rate": 9.5e-05, "token_acc": 0.76670442, "epoch": 1.86389201, "global_step/max_steps": "1657/8890", "percentage": "18.64%", "elapsed_time": "35m 3s", "remaining_time": "2h 33m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787769} {"loss": 0.74552608, "grad_norm": 1.94417286, "learning_rate": 9.5e-05, "token_acc": 0.77696793, "epoch": 1.86501687, "global_step/max_steps": "1658/8890", "percentage": "18.65%", "elapsed_time": "35m 4s", "remaining_time": "2h 32m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787822} {"loss": 0.73010284, "grad_norm": 1.86348748, "learning_rate": 9.499e-05, "token_acc": 0.78272251, "epoch": 1.86614173, "global_step/max_steps": "1659/8890", "percentage": "18.66%", "elapsed_time": "35m 5s", "remaining_time": "2h 32m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787961} {"loss": 0.86916322, "grad_norm": 2.19827461, "learning_rate": 9.498e-05, "token_acc": 0.75857339, "epoch": 1.86726659, "global_step/max_steps": "1660/8890", "percentage": "18.67%", "elapsed_time": "35m 6s", "remaining_time": "2h 32m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788042} {"loss": 0.83822912, "grad_norm": 1.92714846, "learning_rate": 9.497e-05, "token_acc": 0.75857687, "epoch": 1.86839145, "global_step/max_steps": "1661/8890", "percentage": "18.68%", "elapsed_time": "35m 7s", "remaining_time": "2h 32m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788112} {"loss": 0.65001023, "grad_norm": 1.59984267, "learning_rate": 9.496e-05, "token_acc": 0.8115345, "epoch": 1.86951631, "global_step/max_steps": "1662/8890", "percentage": "18.70%", "elapsed_time": "35m 8s", "remaining_time": "2h 32m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788162} {"loss": 0.6638068, "grad_norm": 1.75119841, "learning_rate": 9.495e-05, "token_acc": 0.79396985, "epoch": 1.87064117, "global_step/max_steps": "1663/8890", "percentage": "18.71%", "elapsed_time": "35m 9s", "remaining_time": "2h 32m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788198} {"loss": 0.7248832, "grad_norm": 1.90192974, "learning_rate": 9.495e-05, "token_acc": 0.8047619, "epoch": 1.87176603, "global_step/max_steps": "1664/8890", "percentage": "18.72%", "elapsed_time": "35m 10s", "remaining_time": "2h 32m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788283} {"loss": 0.87185466, "grad_norm": 1.77400231, "learning_rate": 9.494e-05, "token_acc": 0.74497354, "epoch": 1.87289089, "global_step/max_steps": "1665/8890", "percentage": "18.73%", "elapsed_time": "35m 12s", "remaining_time": "2h 32m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788333} {"loss": 0.79027128, "grad_norm": 1.55821157, "learning_rate": 9.493e-05, "token_acc": 0.77032136, "epoch": 1.87401575, "global_step/max_steps": "1666/8890", "percentage": "18.74%", "elapsed_time": "35m 13s", "remaining_time": "2h 32m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788384} {"loss": 0.87747186, "grad_norm": 1.844993, "learning_rate": 9.492e-05, "token_acc": 0.74145299, "epoch": 1.87514061, "global_step/max_steps": "1667/8890", "percentage": "18.75%", "elapsed_time": "35m 14s", "remaining_time": "2h 32m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788436} {"loss": 0.83591676, "grad_norm": 1.90366471, "learning_rate": 9.491e-05, "token_acc": 0.75478927, "epoch": 1.87626547, "global_step/max_steps": "1668/8890", "percentage": "18.76%", "elapsed_time": "35m 15s", "remaining_time": "2h 32m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788489} {"loss": 0.68980289, "grad_norm": 1.66459715, "learning_rate": 9.491e-05, "token_acc": 0.77925532, "epoch": 1.87739033, "global_step/max_steps": "1669/8890", "percentage": "18.77%", "elapsed_time": "35m 16s", "remaining_time": "2h 32m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788484} {"loss": 0.70084393, "grad_norm": 1.53814268, "learning_rate": 9.49e-05, "token_acc": 0.78405316, "epoch": 1.87851519, "global_step/max_steps": "1670/8890", "percentage": "18.79%", "elapsed_time": "35m 17s", "remaining_time": "2h 32m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788534} {"loss": 0.80243576, "grad_norm": 1.69632232, "learning_rate": 9.489e-05, "token_acc": 0.76484284, "epoch": 1.87964004, "global_step/max_steps": "1671/8890", "percentage": "18.80%", "elapsed_time": "35m 19s", "remaining_time": "2h 32m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78857} {"loss": 0.88150108, "grad_norm": 1.85799932, "learning_rate": 9.488e-05, "token_acc": 0.7471555, "epoch": 1.8807649, "global_step/max_steps": "1672/8890", "percentage": "18.81%", "elapsed_time": "35m 20s", "remaining_time": "2h 32m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788614} {"loss": 0.82355964, "grad_norm": 1.65510046, "learning_rate": 9.487e-05, "token_acc": 0.75365141, "epoch": 1.88188976, "global_step/max_steps": "1673/8890", "percentage": "18.82%", "elapsed_time": "35m 21s", "remaining_time": "2h 32m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788705} {"loss": 0.86164731, "grad_norm": 1.89789963, "learning_rate": 9.486e-05, "token_acc": 0.73893303, "epoch": 1.88301462, "global_step/max_steps": "1674/8890", "percentage": "18.83%", "elapsed_time": "35m 22s", "remaining_time": "2h 32m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78874} {"loss": 0.71105444, "grad_norm": 1.79870248, "learning_rate": 9.486e-05, "token_acc": 0.80285344, "epoch": 1.88413948, "global_step/max_steps": "1675/8890", "percentage": "18.84%", "elapsed_time": "35m 23s", "remaining_time": "2h 32m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78881} {"loss": 0.79467428, "grad_norm": 1.77842081, "learning_rate": 9.485e-05, "token_acc": 0.75386445, "epoch": 1.88526434, "global_step/max_steps": "1676/8890", "percentage": "18.85%", "elapsed_time": "35m 24s", "remaining_time": "2h 32m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788889} {"loss": 0.79265571, "grad_norm": 1.73619819, "learning_rate": 9.484e-05, "token_acc": 0.753125, "epoch": 1.8863892, "global_step/max_steps": "1677/8890", "percentage": "18.86%", "elapsed_time": "35m 25s", "remaining_time": "2h 32m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788938} {"loss": 0.75644469, "grad_norm": 1.87767601, "learning_rate": 9.483e-05, "token_acc": 0.74698795, "epoch": 1.88751406, "global_step/max_steps": "1678/8890", "percentage": "18.88%", "elapsed_time": "35m 26s", "remaining_time": "2h 32m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789057} {"loss": 0.82918876, "grad_norm": 1.56828356, "learning_rate": 9.482e-05, "token_acc": 0.75590551, "epoch": 1.88863892, "global_step/max_steps": "1679/8890", "percentage": "18.89%", "elapsed_time": "35m 27s", "remaining_time": "2h 32m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789041} {"loss": 0.65004361, "grad_norm": 1.74189174, "learning_rate": 9.482e-05, "token_acc": 0.79466667, "epoch": 1.88976378, "global_step/max_steps": "1680/8890", "percentage": "18.90%", "elapsed_time": "35m 28s", "remaining_time": "2h 32m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789142} {"loss": 0.53467286, "grad_norm": 1.63226557, "learning_rate": 9.481e-05, "token_acc": 0.82629108, "epoch": 1.89088864, "global_step/max_steps": "1681/8890", "percentage": "18.91%", "elapsed_time": "35m 29s", "remaining_time": "2h 32m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789228} {"loss": 0.66611528, "grad_norm": 1.52162743, "learning_rate": 9.48e-05, "token_acc": 0.78763709, "epoch": 1.8920135, "global_step/max_steps": "1682/8890", "percentage": "18.92%", "elapsed_time": "35m 31s", "remaining_time": "2h 32m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789283} {"loss": 0.78094614, "grad_norm": 1.72578096, "learning_rate": 9.479e-05, "token_acc": 0.76348039, "epoch": 1.89313836, "global_step/max_steps": "1683/8890", "percentage": "18.93%", "elapsed_time": "35m 32s", "remaining_time": "2h 32m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789329} {"loss": 0.76300049, "grad_norm": 1.87919295, "learning_rate": 9.478e-05, "token_acc": 0.75668449, "epoch": 1.89426322, "global_step/max_steps": "1684/8890", "percentage": "18.94%", "elapsed_time": "35m 33s", "remaining_time": "2h 32m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789423} {"loss": 0.88470316, "grad_norm": 1.70514894, "learning_rate": 9.477e-05, "token_acc": 0.73578947, "epoch": 1.89538808, "global_step/max_steps": "1685/8890", "percentage": "18.95%", "elapsed_time": "35m 34s", "remaining_time": "2h 32m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789462} {"loss": 0.77132869, "grad_norm": 1.8094703, "learning_rate": 9.477e-05, "token_acc": 0.77377892, "epoch": 1.89651294, "global_step/max_steps": "1686/8890", "percentage": "18.97%", "elapsed_time": "35m 35s", "remaining_time": "2h 32m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789537} {"loss": 0.58894408, "grad_norm": 1.29688084, "learning_rate": 9.476e-05, "token_acc": 0.82822086, "epoch": 1.8976378, "global_step/max_steps": "1687/8890", "percentage": "18.98%", "elapsed_time": "35m 36s", "remaining_time": "2h 32m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78954} {"loss": 0.53268641, "grad_norm": 1.55921578, "learning_rate": 9.475e-05, "token_acc": 0.82541899, "epoch": 1.89876265, "global_step/max_steps": "1688/8890", "percentage": "18.99%", "elapsed_time": "35m 37s", "remaining_time": "2h 32m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789593} {"loss": 0.79799616, "grad_norm": 1.75265849, "learning_rate": 9.474e-05, "token_acc": 0.75866667, "epoch": 1.89988751, "global_step/max_steps": "1689/8890", "percentage": "19.00%", "elapsed_time": "35m 38s", "remaining_time": "2h 31m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789631} {"loss": 0.74730366, "grad_norm": 1.51148391, "learning_rate": 9.473e-05, "token_acc": 0.76702509, "epoch": 1.90101237, "global_step/max_steps": "1690/8890", "percentage": "19.01%", "elapsed_time": "35m 40s", "remaining_time": "2h 31m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789673} {"loss": 0.63565886, "grad_norm": 1.66153753, "learning_rate": 9.472e-05, "token_acc": 0.79647218, "epoch": 1.90213723, "global_step/max_steps": "1691/8890", "percentage": "19.02%", "elapsed_time": "35m 41s", "remaining_time": "2h 31m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789716} {"loss": 0.83034813, "grad_norm": 1.95629644, "learning_rate": 9.472e-05, "token_acc": 0.74794521, "epoch": 1.90326209, "global_step/max_steps": "1692/8890", "percentage": "19.03%", "elapsed_time": "35m 42s", "remaining_time": "2h 31m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789759} {"loss": 0.75630176, "grad_norm": 1.82151759, "learning_rate": 9.471e-05, "token_acc": 0.74637681, "epoch": 1.90438695, "global_step/max_steps": "1693/8890", "percentage": "19.04%", "elapsed_time": "35m 43s", "remaining_time": "2h 31m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789794} {"loss": 0.73891306, "grad_norm": 1.63465881, "learning_rate": 9.47e-05, "token_acc": 0.77006508, "epoch": 1.90551181, "global_step/max_steps": "1694/8890", "percentage": "19.06%", "elapsed_time": "35m 44s", "remaining_time": "2h 31m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789827} {"loss": 0.74925828, "grad_norm": 1.46567893, "learning_rate": 9.469e-05, "token_acc": 0.78442545, "epoch": 1.90663667, "global_step/max_steps": "1695/8890", "percentage": "19.07%", "elapsed_time": "35m 45s", "remaining_time": "2h 31m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789884} {"loss": 0.7483691, "grad_norm": 2.17972898, "learning_rate": 9.468e-05, "token_acc": 0.75275591, "epoch": 1.90776153, "global_step/max_steps": "1696/8890", "percentage": "19.08%", "elapsed_time": "35m 46s", "remaining_time": "2h 31m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789959} {"loss": 0.81536478, "grad_norm": 1.701033, "learning_rate": 9.467e-05, "token_acc": 0.76196319, "epoch": 1.90888639, "global_step/max_steps": "1697/8890", "percentage": "19.09%", "elapsed_time": "35m 47s", "remaining_time": "2h 31m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790072} {"loss": 0.56708735, "grad_norm": 1.80087388, "learning_rate": 9.467e-05, "token_acc": 0.81868132, "epoch": 1.91001125, "global_step/max_steps": "1698/8890", "percentage": "19.10%", "elapsed_time": "35m 48s", "remaining_time": "2h 31m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790142} {"loss": 0.69303763, "grad_norm": 1.79593551, "learning_rate": 9.466e-05, "token_acc": 0.79719388, "epoch": 1.91113611, "global_step/max_steps": "1699/8890", "percentage": "19.11%", "elapsed_time": "35m 50s", "remaining_time": "2h 31m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790195} {"loss": 0.98523706, "grad_norm": 1.63547432, "learning_rate": 9.465e-05, "token_acc": 0.72206304, "epoch": 1.91226097, "global_step/max_steps": "1700/8890", "percentage": "19.12%", "elapsed_time": "35m 51s", "remaining_time": "2h 31m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79024} {"loss": 0.66138554, "grad_norm": 1.75602889, "learning_rate": 9.464e-05, "token_acc": 0.79637378, "epoch": 1.91338583, "global_step/max_steps": "1701/8890", "percentage": "19.13%", "elapsed_time": "35m 52s", "remaining_time": "2h 31m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790285} {"loss": 0.74805045, "grad_norm": 1.53482819, "learning_rate": 9.463e-05, "token_acc": 0.776028, "epoch": 1.91451069, "global_step/max_steps": "1702/8890", "percentage": "19.15%", "elapsed_time": "35m 53s", "remaining_time": "2h 31m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790336} {"loss": 0.81103361, "grad_norm": 1.81031024, "learning_rate": 9.462e-05, "token_acc": 0.75563464, "epoch": 1.91563555, "global_step/max_steps": "1703/8890", "percentage": "19.16%", "elapsed_time": "35m 54s", "remaining_time": "2h 31m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790453} {"loss": 0.90453947, "grad_norm": 2.28985572, "learning_rate": 9.462e-05, "token_acc": 0.72141707, "epoch": 1.9167604, "global_step/max_steps": "1704/8890", "percentage": "19.17%", "elapsed_time": "35m 55s", "remaining_time": "2h 31m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790502} {"loss": 0.54144686, "grad_norm": 1.53094375, "learning_rate": 9.461e-05, "token_acc": 0.83576642, "epoch": 1.91788526, "global_step/max_steps": "1705/8890", "percentage": "19.18%", "elapsed_time": "35m 56s", "remaining_time": "2h 31m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790576} {"loss": 0.7865628, "grad_norm": 1.75064921, "learning_rate": 9.46e-05, "token_acc": 0.7708082, "epoch": 1.91901012, "global_step/max_steps": "1706/8890", "percentage": "19.19%", "elapsed_time": "35m 57s", "remaining_time": "2h 31m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790663} {"loss": 0.71458536, "grad_norm": 1.76284325, "learning_rate": 9.459e-05, "token_acc": 0.78701299, "epoch": 1.92013498, "global_step/max_steps": "1707/8890", "percentage": "19.20%", "elapsed_time": "35m 58s", "remaining_time": "2h 31m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790707} {"loss": 0.86223972, "grad_norm": 1.70491242, "learning_rate": 9.458e-05, "token_acc": 0.75951904, "epoch": 1.92125984, "global_step/max_steps": "1708/8890", "percentage": "19.21%", "elapsed_time": "35m 59s", "remaining_time": "2h 31m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790753} {"loss": 0.74384868, "grad_norm": 1.89743209, "learning_rate": 9.457e-05, "token_acc": 0.78318584, "epoch": 1.9223847, "global_step/max_steps": "1709/8890", "percentage": "19.22%", "elapsed_time": "36m 1s", "remaining_time": "2h 31m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7908} {"loss": 0.75427598, "grad_norm": 1.69340277, "learning_rate": 9.457e-05, "token_acc": 0.77087576, "epoch": 1.92350956, "global_step/max_steps": "1710/8890", "percentage": "19.24%", "elapsed_time": "36m 2s", "remaining_time": "2h 31m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790884} {"loss": 0.68113524, "grad_norm": 1.541116, "learning_rate": 9.456e-05, "token_acc": 0.81543274, "epoch": 1.92463442, "global_step/max_steps": "1711/8890", "percentage": "19.25%", "elapsed_time": "36m 3s", "remaining_time": "2h 31m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790931} {"loss": 0.66809005, "grad_norm": 1.86354816, "learning_rate": 9.455e-05, "token_acc": 0.79083969, "epoch": 1.92575928, "global_step/max_steps": "1712/8890", "percentage": "19.26%", "elapsed_time": "36m 4s", "remaining_time": "2h 31m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79101} {"loss": 0.72613388, "grad_norm": 1.9179678, "learning_rate": 9.454e-05, "token_acc": 0.77733333, "epoch": 1.92688414, "global_step/max_steps": "1713/8890", "percentage": "19.27%", "elapsed_time": "36m 5s", "remaining_time": "2h 31m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79106} {"loss": 0.68741411, "grad_norm": 1.75386345, "learning_rate": 9.453e-05, "token_acc": 0.78187919, "epoch": 1.928009, "global_step/max_steps": "1714/8890", "percentage": "19.28%", "elapsed_time": "36m 6s", "remaining_time": "2h 31m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791107} {"loss": 0.84875339, "grad_norm": 1.7702204, "learning_rate": 9.452e-05, "token_acc": 0.73469388, "epoch": 1.92913386, "global_step/max_steps": "1715/8890", "percentage": "19.29%", "elapsed_time": "36m 7s", "remaining_time": "2h 31m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79116} {"loss": 0.76985162, "grad_norm": 1.81991327, "learning_rate": 9.451e-05, "token_acc": 0.77424023, "epoch": 1.93025872, "global_step/max_steps": "1716/8890", "percentage": "19.30%", "elapsed_time": "36m 8s", "remaining_time": "2h 31m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791236} {"loss": 0.76116705, "grad_norm": 1.73494387, "learning_rate": 9.451e-05, "token_acc": 0.78438662, "epoch": 1.93138358, "global_step/max_steps": "1717/8890", "percentage": "19.31%", "elapsed_time": "36m 9s", "remaining_time": "2h 31m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791287} {"loss": 0.89598811, "grad_norm": 2.17711401, "learning_rate": 9.45e-05, "token_acc": 0.74489796, "epoch": 1.93250844, "global_step/max_steps": "1718/8890", "percentage": "19.33%", "elapsed_time": "36m 10s", "remaining_time": "2h 31m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791353} {"loss": 0.45067155, "grad_norm": 1.34540427, "learning_rate": 9.449e-05, "token_acc": 0.85310734, "epoch": 1.9336333, "global_step/max_steps": "1719/8890", "percentage": "19.34%", "elapsed_time": "36m 12s", "remaining_time": "2h 31m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791385} {"loss": 0.55474693, "grad_norm": 1.46427023, "learning_rate": 9.448e-05, "token_acc": 0.8157129, "epoch": 1.93475816, "global_step/max_steps": "1720/8890", "percentage": "19.35%", "elapsed_time": "36m 13s", "remaining_time": "2h 30m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791452} {"loss": 0.85748756, "grad_norm": 1.75333834, "learning_rate": 9.447e-05, "token_acc": 0.74705252, "epoch": 1.93588301, "global_step/max_steps": "1721/8890", "percentage": "19.36%", "elapsed_time": "36m 14s", "remaining_time": "2h 30m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791496} {"loss": 0.90926898, "grad_norm": 1.89325273, "learning_rate": 9.446e-05, "token_acc": 0.74871134, "epoch": 1.93700787, "global_step/max_steps": "1722/8890", "percentage": "19.37%", "elapsed_time": "36m 15s", "remaining_time": "2h 30m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791543} {"loss": 0.74948817, "grad_norm": 1.65746081, "learning_rate": 9.445e-05, "token_acc": 0.77198697, "epoch": 1.93813273, "global_step/max_steps": "1723/8890", "percentage": "19.38%", "elapsed_time": "36m 16s", "remaining_time": "2h 30m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791592} {"loss": 0.75827807, "grad_norm": 1.68783116, "learning_rate": 9.445e-05, "token_acc": 0.76932367, "epoch": 1.93925759, "global_step/max_steps": "1724/8890", "percentage": "19.39%", "elapsed_time": "36m 17s", "remaining_time": "2h 30m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791627} {"loss": 0.81062591, "grad_norm": 1.49004412, "learning_rate": 9.444e-05, "token_acc": 0.7685113, "epoch": 1.94038245, "global_step/max_steps": "1725/8890", "percentage": "19.40%", "elapsed_time": "36m 18s", "remaining_time": "2h 30m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791677} {"loss": 0.63135123, "grad_norm": 1.56639302, "learning_rate": 9.443e-05, "token_acc": 0.80756014, "epoch": 1.94150731, "global_step/max_steps": "1726/8890", "percentage": "19.42%", "elapsed_time": "36m 19s", "remaining_time": "2h 30m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791767} {"loss": 0.67294097, "grad_norm": 1.60844421, "learning_rate": 9.442e-05, "token_acc": 0.80268595, "epoch": 1.94263217, "global_step/max_steps": "1727/8890", "percentage": "19.43%", "elapsed_time": "36m 20s", "remaining_time": "2h 30m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791847} {"loss": 0.69221455, "grad_norm": 1.5574522, "learning_rate": 9.441e-05, "token_acc": 0.80482204, "epoch": 1.94375703, "global_step/max_steps": "1728/8890", "percentage": "19.44%", "elapsed_time": "36m 22s", "remaining_time": "2h 30m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791914} {"loss": 0.80944729, "grad_norm": 1.93405962, "learning_rate": 9.44e-05, "token_acc": 0.75766423, "epoch": 1.94488189, "global_step/max_steps": "1729/8890", "percentage": "19.45%", "elapsed_time": "36m 23s", "remaining_time": "2h 30m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791962} {"loss": 0.69606453, "grad_norm": 2.33121204, "learning_rate": 9.44e-05, "token_acc": 0.78341794, "epoch": 1.94600675, "global_step/max_steps": "1730/8890", "percentage": "19.46%", "elapsed_time": "36m 24s", "remaining_time": "2h 30m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791998} {"loss": 0.74757528, "grad_norm": 1.7614646, "learning_rate": 9.439e-05, "token_acc": 0.76678043, "epoch": 1.94713161, "global_step/max_steps": "1731/8890", "percentage": "19.47%", "elapsed_time": "36m 25s", "remaining_time": "2h 30m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792051} {"loss": 0.67830145, "grad_norm": 1.62175047, "learning_rate": 9.438e-05, "token_acc": 0.80434783, "epoch": 1.94825647, "global_step/max_steps": "1732/8890", "percentage": "19.48%", "elapsed_time": "36m 26s", "remaining_time": "2h 30m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792104} {"loss": 0.757761, "grad_norm": 2.04360771, "learning_rate": 9.437e-05, "token_acc": 0.77326969, "epoch": 1.94938133, "global_step/max_steps": "1733/8890", "percentage": "19.49%", "elapsed_time": "36m 27s", "remaining_time": "2h 30m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792157} {"loss": 0.80390143, "grad_norm": 1.82884967, "learning_rate": 9.436e-05, "token_acc": 0.77017937, "epoch": 1.95050619, "global_step/max_steps": "1734/8890", "percentage": "19.51%", "elapsed_time": "36m 28s", "remaining_time": "2h 30m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792242} {"loss": 0.74117637, "grad_norm": 1.83372831, "learning_rate": 9.435e-05, "token_acc": 0.788, "epoch": 1.95163105, "global_step/max_steps": "1735/8890", "percentage": "19.52%", "elapsed_time": "36m 29s", "remaining_time": "2h 30m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792283} {"loss": 0.74358118, "grad_norm": 1.75236893, "learning_rate": 9.434e-05, "token_acc": 0.78271309, "epoch": 1.95275591, "global_step/max_steps": "1736/8890", "percentage": "19.53%", "elapsed_time": "36m 31s", "remaining_time": "2h 30m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792313} {"loss": 0.72388136, "grad_norm": 1.72443712, "learning_rate": 9.434e-05, "token_acc": 0.78675645, "epoch": 1.95388076, "global_step/max_steps": "1737/8890", "percentage": "19.54%", "elapsed_time": "36m 32s", "remaining_time": "2h 30m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792352} {"loss": 0.6673649, "grad_norm": 1.8315661, "learning_rate": 9.433e-05, "token_acc": 0.78254211, "epoch": 1.95500562, "global_step/max_steps": "1738/8890", "percentage": "19.55%", "elapsed_time": "36m 33s", "remaining_time": "2h 30m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79242} {"loss": 0.87632728, "grad_norm": 2.17473459, "learning_rate": 9.432e-05, "token_acc": 0.7433291, "epoch": 1.95613048, "global_step/max_steps": "1739/8890", "percentage": "19.56%", "elapsed_time": "36m 34s", "remaining_time": "2h 30m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792522} {"loss": 0.77561069, "grad_norm": 1.81263113, "learning_rate": 9.431e-05, "token_acc": 0.77463194, "epoch": 1.95725534, "global_step/max_steps": "1740/8890", "percentage": "19.57%", "elapsed_time": "36m 35s", "remaining_time": "2h 30m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792603} {"loss": 0.79577923, "grad_norm": 1.5752635, "learning_rate": 9.43e-05, "token_acc": 0.75140788, "epoch": 1.9583802, "global_step/max_steps": "1741/8890", "percentage": "19.58%", "elapsed_time": "36m 36s", "remaining_time": "2h 30m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792632} {"loss": 0.77757221, "grad_norm": 1.72525179, "learning_rate": 9.429e-05, "token_acc": 0.7788671, "epoch": 1.95950506, "global_step/max_steps": "1742/8890", "percentage": "19.60%", "elapsed_time": "36m 37s", "remaining_time": "2h 30m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792672} {"loss": 0.72908014, "grad_norm": 1.63398457, "learning_rate": 9.428e-05, "token_acc": 0.79405738, "epoch": 1.96062992, "global_step/max_steps": "1743/8890", "percentage": "19.61%", "elapsed_time": "36m 38s", "remaining_time": "2h 30m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792712} {"loss": 0.74626088, "grad_norm": 1.6852026, "learning_rate": 9.427e-05, "token_acc": 0.78571429, "epoch": 1.96175478, "global_step/max_steps": "1744/8890", "percentage": "19.62%", "elapsed_time": "36m 39s", "remaining_time": "2h 30m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792761} {"loss": 0.59405702, "grad_norm": 1.99584842, "learning_rate": 9.427e-05, "token_acc": 0.82621951, "epoch": 1.96287964, "global_step/max_steps": "1745/8890", "percentage": "19.63%", "elapsed_time": "36m 41s", "remaining_time": "2h 30m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792809} {"loss": 0.82529366, "grad_norm": 1.95641625, "learning_rate": 9.426e-05, "token_acc": 0.75348189, "epoch": 1.9640045, "global_step/max_steps": "1746/8890", "percentage": "19.64%", "elapsed_time": "36m 42s", "remaining_time": "2h 30m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792893} {"loss": 0.69708264, "grad_norm": 1.57560647, "learning_rate": 9.425e-05, "token_acc": 0.7943038, "epoch": 1.96512936, "global_step/max_steps": "1747/8890", "percentage": "19.65%", "elapsed_time": "36m 43s", "remaining_time": "2h 30m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792938} {"loss": 0.73781598, "grad_norm": 1.72362864, "learning_rate": 9.424e-05, "token_acc": 0.77777778, "epoch": 1.96625422, "global_step/max_steps": "1748/8890", "percentage": "19.66%", "elapsed_time": "36m 44s", "remaining_time": "2h 30m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79302} {"loss": 0.78516996, "grad_norm": 1.62615907, "learning_rate": 9.423e-05, "token_acc": 0.76890756, "epoch": 1.96737908, "global_step/max_steps": "1749/8890", "percentage": "19.67%", "elapsed_time": "36m 45s", "remaining_time": "2h 30m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793003} {"loss": 0.84157157, "grad_norm": 1.67087328, "learning_rate": 9.422e-05, "token_acc": 0.74710425, "epoch": 1.96850394, "global_step/max_steps": "1750/8890", "percentage": "19.69%", "elapsed_time": "36m 46s", "remaining_time": "2h 30m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793082} {"loss": 0.89471191, "grad_norm": 1.76760769, "learning_rate": 9.421e-05, "token_acc": 0.74170124, "epoch": 1.9696288, "global_step/max_steps": "1751/8890", "percentage": "19.70%", "elapsed_time": "36m 47s", "remaining_time": "2h 30m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79315} {"loss": 0.66455507, "grad_norm": 1.52292085, "learning_rate": 9.421e-05, "token_acc": 0.79770992, "epoch": 1.97075366, "global_step/max_steps": "1752/8890", "percentage": "19.71%", "elapsed_time": "36m 48s", "remaining_time": "2h 29m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793195} {"loss": 0.75408012, "grad_norm": 1.81159353, "learning_rate": 9.42e-05, "token_acc": 0.76201117, "epoch": 1.97187852, "global_step/max_steps": "1753/8890", "percentage": "19.72%", "elapsed_time": "36m 49s", "remaining_time": "2h 29m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793244} {"loss": 0.76549238, "grad_norm": 2.12847447, "learning_rate": 9.419e-05, "token_acc": 0.76060606, "epoch": 1.97300337, "global_step/max_steps": "1754/8890", "percentage": "19.73%", "elapsed_time": "36m 51s", "remaining_time": "2h 29m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793293} {"loss": 0.7347002, "grad_norm": 1.74990165, "learning_rate": 9.418e-05, "token_acc": 0.77537058, "epoch": 1.97412823, "global_step/max_steps": "1755/8890", "percentage": "19.74%", "elapsed_time": "36m 52s", "remaining_time": "2h 29m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79334} {"loss": 0.77701312, "grad_norm": 1.86322927, "learning_rate": 9.417e-05, "token_acc": 0.7639485, "epoch": 1.97525309, "global_step/max_steps": "1756/8890", "percentage": "19.75%", "elapsed_time": "36m 53s", "remaining_time": "2h 29m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793391} {"loss": 0.66618264, "grad_norm": 1.80777717, "learning_rate": 9.416e-05, "token_acc": 0.80079156, "epoch": 1.97637795, "global_step/max_steps": "1757/8890", "percentage": "19.76%", "elapsed_time": "36m 54s", "remaining_time": "2h 29m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793435} {"loss": 0.68192923, "grad_norm": 1.61590207, "learning_rate": 9.415e-05, "token_acc": 0.78755869, "epoch": 1.97750281, "global_step/max_steps": "1758/8890", "percentage": "19.78%", "elapsed_time": "36m 55s", "remaining_time": "2h 29m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793512} {"loss": 0.8485561, "grad_norm": 1.87197876, "learning_rate": 9.414e-05, "token_acc": 0.77086093, "epoch": 1.97862767, "global_step/max_steps": "1759/8890", "percentage": "19.79%", "elapsed_time": "36m 56s", "remaining_time": "2h 29m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793561} {"loss": 0.88272804, "grad_norm": 1.83987379, "learning_rate": 9.414e-05, "token_acc": 0.74487705, "epoch": 1.97975253, "global_step/max_steps": "1760/8890", "percentage": "19.80%", "elapsed_time": "36m 57s", "remaining_time": "2h 29m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793593} {"loss": 0.73510766, "grad_norm": 1.96905792, "learning_rate": 9.413e-05, "token_acc": 0.76150628, "epoch": 1.98087739, "global_step/max_steps": "1761/8890", "percentage": "19.81%", "elapsed_time": "36m 58s", "remaining_time": "2h 29m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793641} {"loss": 0.60493219, "grad_norm": 1.80163109, "learning_rate": 9.412e-05, "token_acc": 0.80461538, "epoch": 1.98200225, "global_step/max_steps": "1762/8890", "percentage": "19.82%", "elapsed_time": "36m 59s", "remaining_time": "2h 29m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793719} {"loss": 0.77000326, "grad_norm": 1.7863158, "learning_rate": 9.411e-05, "token_acc": 0.75669643, "epoch": 1.98312711, "global_step/max_steps": "1763/8890", "percentage": "19.83%", "elapsed_time": "37m 0s", "remaining_time": "2h 29m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79389} {"loss": 0.83982301, "grad_norm": 1.95089865, "learning_rate": 9.41e-05, "token_acc": 0.76299113, "epoch": 1.98425197, "global_step/max_steps": "1764/8890", "percentage": "19.84%", "elapsed_time": "37m 1s", "remaining_time": "2h 29m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793937} {"loss": 0.59542471, "grad_norm": 1.46099329, "learning_rate": 9.409e-05, "token_acc": 0.82020202, "epoch": 1.98537683, "global_step/max_steps": "1765/8890", "percentage": "19.85%", "elapsed_time": "37m 2s", "remaining_time": "2h 29m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794007} {"loss": 0.89683729, "grad_norm": 1.64635777, "learning_rate": 9.408e-05, "token_acc": 0.73138075, "epoch": 1.98650169, "global_step/max_steps": "1766/8890", "percentage": "19.87%", "elapsed_time": "37m 4s", "remaining_time": "2h 29m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.793997} {"loss": 0.74749291, "grad_norm": 2.00925779, "learning_rate": 9.407e-05, "token_acc": 0.75086505, "epoch": 1.98762655, "global_step/max_steps": "1767/8890", "percentage": "19.88%", "elapsed_time": "37m 5s", "remaining_time": "2h 29m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794047} {"loss": 0.52474052, "grad_norm": 1.31438732, "learning_rate": 9.407e-05, "token_acc": 0.84160305, "epoch": 1.98875141, "global_step/max_steps": "1768/8890", "percentage": "19.89%", "elapsed_time": "37m 6s", "remaining_time": "2h 29m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794094} {"loss": 0.76081568, "grad_norm": 1.99479187, "learning_rate": 9.406e-05, "token_acc": 0.77279305, "epoch": 1.98987627, "global_step/max_steps": "1769/8890", "percentage": "19.90%", "elapsed_time": "37m 7s", "remaining_time": "2h 29m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794221} {"loss": 0.82216054, "grad_norm": 1.79101157, "learning_rate": 9.405e-05, "token_acc": 0.74967405, "epoch": 1.99100112, "global_step/max_steps": "1770/8890", "percentage": "19.91%", "elapsed_time": "37m 8s", "remaining_time": "2h 29m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794253} {"loss": 0.68485612, "grad_norm": 1.54406738, "learning_rate": 9.404e-05, "token_acc": 0.79101124, "epoch": 1.99212598, "global_step/max_steps": "1771/8890", "percentage": "19.92%", "elapsed_time": "37m 9s", "remaining_time": "2h 29m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794288} {"loss": 0.72933435, "grad_norm": 1.65893793, "learning_rate": 9.403e-05, "token_acc": 0.78349515, "epoch": 1.99325084, "global_step/max_steps": "1772/8890", "percentage": "19.93%", "elapsed_time": "37m 10s", "remaining_time": "2h 29m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794321} {"loss": 0.81089544, "grad_norm": 1.77664757, "learning_rate": 9.402e-05, "token_acc": 0.75266272, "epoch": 1.9943757, "global_step/max_steps": "1773/8890", "percentage": "19.94%", "elapsed_time": "37m 12s", "remaining_time": "2h 29m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794349} {"loss": 0.62259531, "grad_norm": 1.78528595, "learning_rate": 9.401e-05, "token_acc": 0.80428571, "epoch": 1.99550056, "global_step/max_steps": "1774/8890", "percentage": "19.96%", "elapsed_time": "37m 12s", "remaining_time": "2h 29m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794512} {"loss": 0.73157549, "grad_norm": 1.62751746, "learning_rate": 9.4e-05, "token_acc": 0.77754237, "epoch": 1.99662542, "global_step/max_steps": "1775/8890", "percentage": "19.97%", "elapsed_time": "37m 14s", "remaining_time": "2h 29m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794539} {"loss": 0.67539728, "grad_norm": 1.87658226, "learning_rate": 9.4e-05, "token_acc": 0.7983871, "epoch": 1.99775028, "global_step/max_steps": "1776/8890", "percentage": "19.98%", "elapsed_time": "37m 15s", "remaining_time": "2h 29m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794583} {"loss": 0.72242349, "grad_norm": 1.68727076, "learning_rate": 9.399e-05, "token_acc": 0.78372591, "epoch": 1.99887514, "global_step/max_steps": "1777/8890", "percentage": "19.99%", "elapsed_time": "37m 16s", "remaining_time": "2h 29m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794629} {"loss": 0.6941644, "grad_norm": 1.59516108, "learning_rate": 9.398e-05, "token_acc": 0.77089479, "epoch": 2.0, "global_step/max_steps": "1778/8890", "percentage": "20.00%", "elapsed_time": "37m 17s", "remaining_time": "2h 29m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794669} {"loss": 0.57299936, "grad_norm": 1.45292222, "learning_rate": 9.397e-05, "token_acc": 0.82647386, "epoch": 2.00112486, "global_step/max_steps": "1779/8890", "percentage": "20.01%", "elapsed_time": "37m 19s", "remaining_time": "2h 29m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794492} {"loss": 0.4882828, "grad_norm": 1.32022917, "learning_rate": 9.396e-05, "token_acc": 0.84820394, "epoch": 2.00224972, "global_step/max_steps": "1780/8890", "percentage": "20.02%", "elapsed_time": "37m 20s", "remaining_time": "2h 29m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794521} {"loss": 0.56424427, "grad_norm": 1.53665936, "learning_rate": 9.395e-05, "token_acc": 0.83592814, "epoch": 2.00337458, "global_step/max_steps": "1781/8890", "percentage": "20.03%", "elapsed_time": "37m 21s", "remaining_time": "2h 29m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794585} {"loss": 0.56720525, "grad_norm": 1.42891765, "learning_rate": 9.394e-05, "token_acc": 0.83190883, "epoch": 2.00449944, "global_step/max_steps": "1782/8890", "percentage": "20.04%", "elapsed_time": "37m 22s", "remaining_time": "2h 29m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794628} {"loss": 0.34031832, "grad_norm": 1.24279583, "learning_rate": 9.393e-05, "token_acc": 0.88668555, "epoch": 2.0056243, "global_step/max_steps": "1783/8890", "percentage": "20.06%", "elapsed_time": "37m 23s", "remaining_time": "2h 29m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794749} {"loss": 0.60653633, "grad_norm": 1.57404649, "learning_rate": 9.392e-05, "token_acc": 0.81871345, "epoch": 2.00674916, "global_step/max_steps": "1784/8890", "percentage": "20.07%", "elapsed_time": "37m 24s", "remaining_time": "2h 29m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794854} {"loss": 0.63244438, "grad_norm": 1.46713495, "learning_rate": 9.392e-05, "token_acc": 0.80289331, "epoch": 2.00787402, "global_step/max_steps": "1785/8890", "percentage": "20.08%", "elapsed_time": "37m 25s", "remaining_time": "2h 28m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794837} {"loss": 0.56443787, "grad_norm": 1.68615794, "learning_rate": 9.391e-05, "token_acc": 0.81829122, "epoch": 2.00899888, "global_step/max_steps": "1786/8890", "percentage": "20.09%", "elapsed_time": "37m 27s", "remaining_time": "2h 28m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794832} {"loss": 0.55236769, "grad_norm": 1.48114049, "learning_rate": 9.39e-05, "token_acc": 0.82761578, "epoch": 2.01012373, "global_step/max_steps": "1787/8890", "percentage": "20.10%", "elapsed_time": "37m 28s", "remaining_time": "2h 28m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.794879} {"loss": 0.45882261, "grad_norm": 1.50895512, "learning_rate": 9.389e-05, "token_acc": 0.84193548, "epoch": 2.01124859, "global_step/max_steps": "1788/8890", "percentage": "20.11%", "elapsed_time": "37m 29s", "remaining_time": "2h 28m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79497} {"loss": 0.54913038, "grad_norm": 1.79897308, "learning_rate": 9.388e-05, "token_acc": 0.82602922, "epoch": 2.01237345, "global_step/max_steps": "1789/8890", "percentage": "20.12%", "elapsed_time": "37m 30s", "remaining_time": "2h 28m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795029} {"loss": 0.5205617, "grad_norm": 1.49461198, "learning_rate": 9.387e-05, "token_acc": 0.8321492, "epoch": 2.01349831, "global_step/max_steps": "1790/8890", "percentage": "20.13%", "elapsed_time": "37m 31s", "remaining_time": "2h 28m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795074} {"loss": 0.59504932, "grad_norm": 1.92764008, "learning_rate": 9.386e-05, "token_acc": 0.80069525, "epoch": 2.01462317, "global_step/max_steps": "1791/8890", "percentage": "20.15%", "elapsed_time": "37m 32s", "remaining_time": "2h 28m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795148} {"loss": 0.52632564, "grad_norm": 1.80988061, "learning_rate": 9.385e-05, "token_acc": 0.84637681, "epoch": 2.01574803, "global_step/max_steps": "1792/8890", "percentage": "20.16%", "elapsed_time": "37m 33s", "remaining_time": "2h 28m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795238} {"loss": 0.45678899, "grad_norm": 1.95555484, "learning_rate": 9.384e-05, "token_acc": 0.84090909, "epoch": 2.01687289, "global_step/max_steps": "1793/8890", "percentage": "20.17%", "elapsed_time": "37m 34s", "remaining_time": "2h 28m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795222} {"loss": 0.54512763, "grad_norm": 1.85586631, "learning_rate": 9.384e-05, "token_acc": 0.82941176, "epoch": 2.01799775, "global_step/max_steps": "1794/8890", "percentage": "20.18%", "elapsed_time": "37m 35s", "remaining_time": "2h 28m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795218} {"loss": 0.44311863, "grad_norm": 1.78861141, "learning_rate": 9.383e-05, "token_acc": 0.84599376, "epoch": 2.01912261, "global_step/max_steps": "1795/8890", "percentage": "20.19%", "elapsed_time": "37m 37s", "remaining_time": "2h 28m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795211} {"loss": 0.49367908, "grad_norm": 2.3388083, "learning_rate": 9.382e-05, "token_acc": 0.84364821, "epoch": 2.02024747, "global_step/max_steps": "1796/8890", "percentage": "20.20%", "elapsed_time": "37m 38s", "remaining_time": "2h 28m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795284} {"loss": 0.66747898, "grad_norm": 2.08903623, "learning_rate": 9.381e-05, "token_acc": 0.81682243, "epoch": 2.02137233, "global_step/max_steps": "1797/8890", "percentage": "20.21%", "elapsed_time": "37m 39s", "remaining_time": "2h 28m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795309} {"loss": 0.53423214, "grad_norm": 2.01545215, "learning_rate": 9.38e-05, "token_acc": 0.82896237, "epoch": 2.02249719, "global_step/max_steps": "1798/8890", "percentage": "20.22%", "elapsed_time": "37m 40s", "remaining_time": "2h 28m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795415} {"loss": 0.56799257, "grad_norm": 2.29798532, "learning_rate": 9.379e-05, "token_acc": 0.8362069, "epoch": 2.02362205, "global_step/max_steps": "1799/8890", "percentage": "20.24%", "elapsed_time": "37m 41s", "remaining_time": "2h 28m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795461} {"loss": 0.58402205, "grad_norm": 2.07380581, "learning_rate": 9.378e-05, "token_acc": 0.82387476, "epoch": 2.02474691, "global_step/max_steps": "1800/8890", "percentage": "20.25%", "elapsed_time": "37m 42s", "remaining_time": "2h 28m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.795442} {"eval_loss": 0.95982438, "eval_runtime": 31.6839, "eval_samples_per_second": 25.344, "eval_steps_per_second": 3.188, "eval_token_acc": 0.74002385, "epoch": 2.02474691, "global_step/max_steps": "1800/8890", "percentage": "20.25%", "elapsed_time": "38m 14s", "remaining_time": "2h 30m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784455} {"loss": 0.50355923, "grad_norm": 2.00802255, "learning_rate": 9.377e-05, "token_acc": 0.83375959, "epoch": 2.02587177, "global_step/max_steps": "1801/8890", "percentage": "20.26%", "elapsed_time": "38m 29s", "remaining_time": "2h 31m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779739} {"loss": 0.62885249, "grad_norm": 1.89878285, "learning_rate": 9.376e-05, "token_acc": 0.80173913, "epoch": 2.02699663, "global_step/max_steps": "1802/8890", "percentage": "20.27%", "elapsed_time": "38m 30s", "remaining_time": "2h 31m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779789} {"loss": 0.6622237, "grad_norm": 2.00517845, "learning_rate": 9.375e-05, "token_acc": 0.81399317, "epoch": 2.02812148, "global_step/max_steps": "1803/8890", "percentage": "20.28%", "elapsed_time": "38m 32s", "remaining_time": "2h 31m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779838} {"loss": 0.58127809, "grad_norm": 2.11273837, "learning_rate": 9.375e-05, "token_acc": 0.81400966, "epoch": 2.02924634, "global_step/max_steps": "1804/8890", "percentage": "20.29%", "elapsed_time": "38m 33s", "remaining_time": "2h 31m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.77989} {"loss": 0.54072505, "grad_norm": 2.3743794, "learning_rate": 9.374e-05, "token_acc": 0.8387553, "epoch": 2.0303712, "global_step/max_steps": "1805/8890", "percentage": "20.30%", "elapsed_time": "38m 34s", "remaining_time": "2h 31m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779941} {"loss": 0.46896124, "grad_norm": 2.21273518, "learning_rate": 9.373e-05, "token_acc": 0.84161491, "epoch": 2.03149606, "global_step/max_steps": "1806/8890", "percentage": "20.31%", "elapsed_time": "38m 35s", "remaining_time": "2h 31m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779996} {"loss": 0.44552252, "grad_norm": 1.87930334, "learning_rate": 9.372e-05, "token_acc": 0.86706349, "epoch": 2.03262092, "global_step/max_steps": "1807/8890", "percentage": "20.33%", "elapsed_time": "38m 36s", "remaining_time": "2h 31m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780084} {"loss": 0.59438378, "grad_norm": 2.21537399, "learning_rate": 9.371e-05, "token_acc": 0.82436883, "epoch": 2.03374578, "global_step/max_steps": "1808/8890", "percentage": "20.34%", "elapsed_time": "38m 37s", "remaining_time": "2h 31m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780129} {"loss": 0.60069084, "grad_norm": 2.10418606, "learning_rate": 9.37e-05, "token_acc": 0.81578947, "epoch": 2.03487064, "global_step/max_steps": "1809/8890", "percentage": "20.35%", "elapsed_time": "38m 38s", "remaining_time": "2h 31m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780164} {"loss": 0.54488575, "grad_norm": 1.9388876, "learning_rate": 9.369e-05, "token_acc": 0.83982301, "epoch": 2.0359955, "global_step/max_steps": "1810/8890", "percentage": "20.36%", "elapsed_time": "38m 39s", "remaining_time": "2h 31m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78024} {"loss": 0.57142675, "grad_norm": 2.16517282, "learning_rate": 9.368e-05, "token_acc": 0.82532751, "epoch": 2.03712036, "global_step/max_steps": "1811/8890", "percentage": "20.37%", "elapsed_time": "38m 40s", "remaining_time": "2h 31m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780283} {"loss": 0.59054631, "grad_norm": 2.08882022, "learning_rate": 9.367e-05, "token_acc": 0.83314794, "epoch": 2.03824522, "global_step/max_steps": "1812/8890", "percentage": "20.38%", "elapsed_time": "38m 42s", "remaining_time": "2h 31m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780313} {"loss": 0.53720093, "grad_norm": 1.86929178, "learning_rate": 9.366e-05, "token_acc": 0.8225957, "epoch": 2.03937008, "global_step/max_steps": "1813/8890", "percentage": "20.39%", "elapsed_time": "38m 43s", "remaining_time": "2h 31m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780398} {"loss": 0.5720368, "grad_norm": 1.89533758, "learning_rate": 9.365e-05, "token_acc": 0.8320356, "epoch": 2.04049494, "global_step/max_steps": "1814/8890", "percentage": "20.40%", "elapsed_time": "38m 44s", "remaining_time": "2h 31m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780439} {"loss": 0.3696391, "grad_norm": 1.63644147, "learning_rate": 9.365e-05, "token_acc": 0.86757426, "epoch": 2.0416198, "global_step/max_steps": "1815/8890", "percentage": "20.42%", "elapsed_time": "38m 45s", "remaining_time": "2h 31m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780489} {"loss": 0.44099152, "grad_norm": 1.64893544, "learning_rate": 9.364e-05, "token_acc": 0.85073171, "epoch": 2.04274466, "global_step/max_steps": "1816/8890", "percentage": "20.43%", "elapsed_time": "38m 46s", "remaining_time": "2h 31m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780538} {"loss": 0.58754396, "grad_norm": 2.01948142, "learning_rate": 9.363e-05, "token_acc": 0.81879955, "epoch": 2.04386952, "global_step/max_steps": "1817/8890", "percentage": "20.44%", "elapsed_time": "38m 47s", "remaining_time": "2h 31m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780583} {"loss": 0.51372218, "grad_norm": 2.5609951, "learning_rate": 9.362e-05, "token_acc": 0.83833116, "epoch": 2.04499438, "global_step/max_steps": "1818/8890", "percentage": "20.45%", "elapsed_time": "38m 48s", "remaining_time": "2h 30m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780633} {"loss": 0.45638317, "grad_norm": 1.92554808, "learning_rate": 9.361e-05, "token_acc": 0.84675325, "epoch": 2.04611924, "global_step/max_steps": "1819/8890", "percentage": "20.46%", "elapsed_time": "38m 50s", "remaining_time": "2h 30m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780682} {"loss": 0.35568205, "grad_norm": 1.52815974, "learning_rate": 9.36e-05, "token_acc": 0.88888889, "epoch": 2.04724409, "global_step/max_steps": "1820/8890", "percentage": "20.47%", "elapsed_time": "38m 51s", "remaining_time": "2h 30m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780489} {"loss": 0.60095096, "grad_norm": 2.00732136, "learning_rate": 9.359e-05, "token_acc": 0.7944664, "epoch": 2.04836895, "global_step/max_steps": "1821/8890", "percentage": "20.48%", "elapsed_time": "38m 52s", "remaining_time": "2h 30m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780542} {"loss": 0.54308015, "grad_norm": 1.97275102, "learning_rate": 9.358e-05, "token_acc": 0.81693122, "epoch": 2.04949381, "global_step/max_steps": "1822/8890", "percentage": "20.49%", "elapsed_time": "38m 54s", "remaining_time": "2h 30m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780586} {"loss": 0.49481985, "grad_norm": 2.14609218, "learning_rate": 9.357e-05, "token_acc": 0.85834502, "epoch": 2.05061867, "global_step/max_steps": "1823/8890", "percentage": "20.51%", "elapsed_time": "38m 55s", "remaining_time": "2h 30m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780682} {"loss": 0.45139444, "grad_norm": 1.9299773, "learning_rate": 9.356e-05, "token_acc": 0.85833333, "epoch": 2.05174353, "global_step/max_steps": "1824/8890", "percentage": "20.52%", "elapsed_time": "38m 56s", "remaining_time": "2h 30m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780695} {"loss": 0.49319381, "grad_norm": 1.61322856, "learning_rate": 9.355e-05, "token_acc": 0.84577114, "epoch": 2.05286839, "global_step/max_steps": "1825/8890", "percentage": "20.53%", "elapsed_time": "38m 58s", "remaining_time": "2h 30m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780577} {"loss": 0.45182303, "grad_norm": 2.02973938, "learning_rate": 9.355e-05, "token_acc": 0.85106383, "epoch": 2.05399325, "global_step/max_steps": "1826/8890", "percentage": "20.54%", "elapsed_time": "38m 59s", "remaining_time": "2h 30m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780431} {"loss": 0.38132697, "grad_norm": 2.1618495, "learning_rate": 9.354e-05, "token_acc": 0.86987522, "epoch": 2.05511811, "global_step/max_steps": "1827/8890", "percentage": "20.55%", "elapsed_time": "39m 0s", "remaining_time": "2h 30m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780513} {"loss": 0.49974358, "grad_norm": 1.97981775, "learning_rate": 9.353e-05, "token_acc": 0.83915344, "epoch": 2.05624297, "global_step/max_steps": "1828/8890", "percentage": "20.56%", "elapsed_time": "39m 1s", "remaining_time": "2h 30m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780563} {"loss": 0.49036777, "grad_norm": 2.10380387, "learning_rate": 9.352e-05, "token_acc": 0.85098952, "epoch": 2.05736783, "global_step/max_steps": "1829/8890", "percentage": "20.57%", "elapsed_time": "39m 2s", "remaining_time": "2h 30m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780637} {"loss": 0.61623466, "grad_norm": 1.96964252, "learning_rate": 9.351e-05, "token_acc": 0.81725888, "epoch": 2.05849269, "global_step/max_steps": "1830/8890", "percentage": "20.58%", "elapsed_time": "39m 4s", "remaining_time": "2h 30m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780642} {"loss": 0.71067023, "grad_norm": 1.92192972, "learning_rate": 9.35e-05, "token_acc": 0.79186228, "epoch": 2.05961755, "global_step/max_steps": "1831/8890", "percentage": "20.60%", "elapsed_time": "39m 5s", "remaining_time": "2h 30m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780696} {"loss": 0.40230325, "grad_norm": 1.84794092, "learning_rate": 9.349e-05, "token_acc": 0.84931507, "epoch": 2.06074241, "global_step/max_steps": "1832/8890", "percentage": "20.61%", "elapsed_time": "39m 6s", "remaining_time": "2h 30m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780673} {"loss": 0.4270519, "grad_norm": 1.84646225, "learning_rate": 9.348e-05, "token_acc": 0.86536585, "epoch": 2.06186727, "global_step/max_steps": "1833/8890", "percentage": "20.62%", "elapsed_time": "39m 7s", "remaining_time": "2h 30m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780761} {"loss": 0.39500898, "grad_norm": 1.99974394, "learning_rate": 9.347e-05, "token_acc": 0.88356164, "epoch": 2.06299213, "global_step/max_steps": "1834/8890", "percentage": "20.63%", "elapsed_time": "39m 8s", "remaining_time": "2h 30m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780808} {"loss": 0.53773189, "grad_norm": 2.30111074, "learning_rate": 9.346e-05, "token_acc": 0.84197531, "epoch": 2.06411699, "global_step/max_steps": "1835/8890", "percentage": "20.64%", "elapsed_time": "39m 9s", "remaining_time": "2h 30m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780863} {"loss": 0.35719961, "grad_norm": 1.92692351, "learning_rate": 9.345e-05, "token_acc": 0.89060489, "epoch": 2.06524184, "global_step/max_steps": "1836/8890", "percentage": "20.65%", "elapsed_time": "39m 11s", "remaining_time": "2h 30m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780913} {"loss": 0.46156409, "grad_norm": 1.79893517, "learning_rate": 9.344e-05, "token_acc": 0.85009862, "epoch": 2.0663667, "global_step/max_steps": "1837/8890", "percentage": "20.66%", "elapsed_time": "39m 12s", "remaining_time": "2h 30m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780992} {"loss": 0.54676491, "grad_norm": 2.7664423, "learning_rate": 9.344e-05, "token_acc": 0.81751825, "epoch": 2.06749156, "global_step/max_steps": "1838/8890", "percentage": "20.67%", "elapsed_time": "39m 13s", "remaining_time": "2h 30m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781039} {"loss": 0.433209, "grad_norm": 1.8797574, "learning_rate": 9.343e-05, "token_acc": 0.87427912, "epoch": 2.06861642, "global_step/max_steps": "1839/8890", "percentage": "20.69%", "elapsed_time": "39m 14s", "remaining_time": "2h 30m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781083} {"loss": 0.60589147, "grad_norm": 1.96966219, "learning_rate": 9.342e-05, "token_acc": 0.82621083, "epoch": 2.06974128, "global_step/max_steps": "1840/8890", "percentage": "20.70%", "elapsed_time": "39m 15s", "remaining_time": "2h 30m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781121} {"loss": 0.58457714, "grad_norm": 2.28241158, "learning_rate": 9.341e-05, "token_acc": 0.82481752, "epoch": 2.07086614, "global_step/max_steps": "1841/8890", "percentage": "20.71%", "elapsed_time": "39m 16s", "remaining_time": "2h 30m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781166} {"loss": 0.59877127, "grad_norm": 2.03977585, "learning_rate": 9.34e-05, "token_acc": 0.81765835, "epoch": 2.071991, "global_step/max_steps": "1842/8890", "percentage": "20.72%", "elapsed_time": "39m 18s", "remaining_time": "2h 30m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781155} {"loss": 0.58538842, "grad_norm": 2.4755621, "learning_rate": 9.339e-05, "token_acc": 0.81141439, "epoch": 2.07311586, "global_step/max_steps": "1843/8890", "percentage": "20.73%", "elapsed_time": "39m 19s", "remaining_time": "2h 30m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781234} {"loss": 0.43951482, "grad_norm": 2.20460057, "learning_rate": 9.338e-05, "token_acc": 0.86086957, "epoch": 2.07424072, "global_step/max_steps": "1844/8890", "percentage": "20.74%", "elapsed_time": "39m 20s", "remaining_time": "2h 30m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781283} {"loss": 0.51996934, "grad_norm": 2.33527255, "learning_rate": 9.337e-05, "token_acc": 0.82758621, "epoch": 2.07536558, "global_step/max_steps": "1845/8890", "percentage": "20.75%", "elapsed_time": "39m 21s", "remaining_time": "2h 30m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781324} {"loss": 0.53497487, "grad_norm": 2.21605802, "learning_rate": 9.336e-05, "token_acc": 0.83393502, "epoch": 2.07649044, "global_step/max_steps": "1846/8890", "percentage": "20.76%", "elapsed_time": "39m 22s", "remaining_time": "2h 30m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781436} {"loss": 0.55491877, "grad_norm": 1.97411215, "learning_rate": 9.335e-05, "token_acc": 0.83397313, "epoch": 2.0776153, "global_step/max_steps": "1847/8890", "percentage": "20.78%", "elapsed_time": "39m 23s", "remaining_time": "2h 30m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781486} {"loss": 0.64828908, "grad_norm": 2.17279387, "learning_rate": 9.334e-05, "token_acc": 0.80595483, "epoch": 2.07874016, "global_step/max_steps": "1848/8890", "percentage": "20.79%", "elapsed_time": "39m 24s", "remaining_time": "2h 30m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781457} {"loss": 0.56281507, "grad_norm": 2.255615, "learning_rate": 9.333e-05, "token_acc": 0.830721, "epoch": 2.07986502, "global_step/max_steps": "1849/8890", "percentage": "20.80%", "elapsed_time": "39m 25s", "remaining_time": "2h 30m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781506} {"loss": 0.57058442, "grad_norm": 2.18339086, "learning_rate": 9.332e-05, "token_acc": 0.82932417, "epoch": 2.08098988, "global_step/max_steps": "1850/8890", "percentage": "20.81%", "elapsed_time": "39m 26s", "remaining_time": "2h 30m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78158} {"loss": 0.65848649, "grad_norm": 2.49035215, "learning_rate": 9.332e-05, "token_acc": 0.80829016, "epoch": 2.08211474, "global_step/max_steps": "1851/8890", "percentage": "20.82%", "elapsed_time": "39m 28s", "remaining_time": "2h 30m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781633} {"loss": 0.5798105, "grad_norm": 1.93902254, "learning_rate": 9.331e-05, "token_acc": 0.81363636, "epoch": 2.0832396, "global_step/max_steps": "1852/8890", "percentage": "20.83%", "elapsed_time": "39m 29s", "remaining_time": "2h 30m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781679} {"loss": 0.63131273, "grad_norm": 2.07386613, "learning_rate": 9.33e-05, "token_acc": 0.80512821, "epoch": 2.08436445, "global_step/max_steps": "1853/8890", "percentage": "20.84%", "elapsed_time": "39m 30s", "remaining_time": "2h 30m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781719} {"loss": 0.60878396, "grad_norm": 2.03828335, "learning_rate": 9.329e-05, "token_acc": 0.80729761, "epoch": 2.08548931, "global_step/max_steps": "1854/8890", "percentage": "20.85%", "elapsed_time": "39m 31s", "remaining_time": "2h 30m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781762} {"loss": 0.69075227, "grad_norm": 2.26796579, "learning_rate": 9.328e-05, "token_acc": 0.81043257, "epoch": 2.08661417, "global_step/max_steps": "1855/8890", "percentage": "20.87%", "elapsed_time": "39m 32s", "remaining_time": "2h 29m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781811} {"loss": 0.56800234, "grad_norm": 1.98946941, "learning_rate": 9.327e-05, "token_acc": 0.83580508, "epoch": 2.08773903, "global_step/max_steps": "1856/8890", "percentage": "20.88%", "elapsed_time": "39m 33s", "remaining_time": "2h 29m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781886} {"loss": 0.58096743, "grad_norm": 2.00749993, "learning_rate": 9.326e-05, "token_acc": 0.81243414, "epoch": 2.08886389, "global_step/max_steps": "1857/8890", "percentage": "20.89%", "elapsed_time": "39m 34s", "remaining_time": "2h 29m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78193} {"loss": 0.56558836, "grad_norm": 2.43599105, "learning_rate": 9.325e-05, "token_acc": 0.81777778, "epoch": 2.08998875, "global_step/max_steps": "1858/8890", "percentage": "20.90%", "elapsed_time": "39m 35s", "remaining_time": "2h 29m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782} {"loss": 0.56427646, "grad_norm": 2.30872655, "learning_rate": 9.324e-05, "token_acc": 0.82818792, "epoch": 2.09111361, "global_step/max_steps": "1859/8890", "percentage": "20.91%", "elapsed_time": "39m 37s", "remaining_time": "2h 29m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782073} {"loss": 0.49625784, "grad_norm": 1.93290901, "learning_rate": 9.323e-05, "token_acc": 0.84252874, "epoch": 2.09223847, "global_step/max_steps": "1860/8890", "percentage": "20.92%", "elapsed_time": "39m 38s", "remaining_time": "2h 29m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782118} {"loss": 0.43396047, "grad_norm": 2.05740237, "learning_rate": 9.322e-05, "token_acc": 0.86578947, "epoch": 2.09336333, "global_step/max_steps": "1861/8890", "percentage": "20.93%", "elapsed_time": "39m 39s", "remaining_time": "2h 29m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782155} {"loss": 0.51622629, "grad_norm": 1.89239538, "learning_rate": 9.321e-05, "token_acc": 0.82943144, "epoch": 2.09448819, "global_step/max_steps": "1862/8890", "percentage": "20.94%", "elapsed_time": "39m 40s", "remaining_time": "2h 29m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782207} {"loss": 0.43255866, "grad_norm": 2.0197053, "learning_rate": 9.32e-05, "token_acc": 0.85063291, "epoch": 2.09561305, "global_step/max_steps": "1863/8890", "percentage": "20.96%", "elapsed_time": "39m 41s", "remaining_time": "2h 29m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78225} {"loss": 0.60263366, "grad_norm": 2.29005265, "learning_rate": 9.319e-05, "token_acc": 0.81006289, "epoch": 2.09673791, "global_step/max_steps": "1864/8890", "percentage": "20.97%", "elapsed_time": "39m 42s", "remaining_time": "2h 29m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78229} {"loss": 0.49202627, "grad_norm": 2.07767344, "learning_rate": 9.318e-05, "token_acc": 0.85045662, "epoch": 2.09786277, "global_step/max_steps": "1865/8890", "percentage": "20.98%", "elapsed_time": "39m 43s", "remaining_time": "2h 29m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782416} {"loss": 0.49580321, "grad_norm": 1.83272338, "learning_rate": 9.318e-05, "token_acc": 0.84942887, "epoch": 2.09898763, "global_step/max_steps": "1866/8890", "percentage": "20.99%", "elapsed_time": "39m 44s", "remaining_time": "2h 29m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782465} {"loss": 0.5720315, "grad_norm": 2.22989368, "learning_rate": 9.317e-05, "token_acc": 0.83135392, "epoch": 2.10011249, "global_step/max_steps": "1867/8890", "percentage": "21.00%", "elapsed_time": "39m 45s", "remaining_time": "2h 29m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782499} {"loss": 0.59795105, "grad_norm": 2.30446124, "learning_rate": 9.316e-05, "token_acc": 0.81557847, "epoch": 2.10123735, "global_step/max_steps": "1868/8890", "percentage": "21.01%", "elapsed_time": "39m 46s", "remaining_time": "2h 29m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782574} {"loss": 0.59492588, "grad_norm": 2.12420416, "learning_rate": 9.315e-05, "token_acc": 0.81156317, "epoch": 2.1023622, "global_step/max_steps": "1869/8890", "percentage": "21.02%", "elapsed_time": "39m 48s", "remaining_time": "2h 29m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782618} {"loss": 0.48975778, "grad_norm": 2.4849391, "learning_rate": 9.314e-05, "token_acc": 0.83893557, "epoch": 2.10348706, "global_step/max_steps": "1870/8890", "percentage": "21.03%", "elapsed_time": "39m 49s", "remaining_time": "2h 29m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782662} {"loss": 0.41663039, "grad_norm": 1.76005137, "learning_rate": 9.313e-05, "token_acc": 0.86359077, "epoch": 2.10461192, "global_step/max_steps": "1871/8890", "percentage": "21.05%", "elapsed_time": "39m 50s", "remaining_time": "2h 29m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782702} {"loss": 0.46542567, "grad_norm": 2.04651427, "learning_rate": 9.312e-05, "token_acc": 0.84197531, "epoch": 2.10573678, "global_step/max_steps": "1872/8890", "percentage": "21.06%", "elapsed_time": "39m 51s", "remaining_time": "2h 29m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78275} {"loss": 0.51014662, "grad_norm": 2.0923233, "learning_rate": 9.311e-05, "token_acc": 0.83461117, "epoch": 2.10686164, "global_step/max_steps": "1873/8890", "percentage": "21.07%", "elapsed_time": "39m 52s", "remaining_time": "2h 29m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78283} {"loss": 0.53580093, "grad_norm": 1.93999767, "learning_rate": 9.31e-05, "token_acc": 0.82894737, "epoch": 2.1079865, "global_step/max_steps": "1874/8890", "percentage": "21.08%", "elapsed_time": "39m 53s", "remaining_time": "2h 29m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782838} {"loss": 0.54829413, "grad_norm": 2.29645705, "learning_rate": 9.309e-05, "token_acc": 0.81481481, "epoch": 2.10911136, "global_step/max_steps": "1875/8890", "percentage": "21.09%", "elapsed_time": "39m 55s", "remaining_time": "2h 29m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78284} {"loss": 0.59520662, "grad_norm": 2.02230811, "learning_rate": 9.308e-05, "token_acc": 0.81894934, "epoch": 2.11023622, "global_step/max_steps": "1876/8890", "percentage": "21.10%", "elapsed_time": "39m 56s", "remaining_time": "2h 29m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782838} {"loss": 0.3469415, "grad_norm": 2.23800135, "learning_rate": 9.307e-05, "token_acc": 0.88, "epoch": 2.11136108, "global_step/max_steps": "1877/8890", "percentage": "21.11%", "elapsed_time": "39m 57s", "remaining_time": "2h 29m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782949} {"loss": 0.54367423, "grad_norm": 2.41801667, "learning_rate": 9.306e-05, "token_acc": 0.81498829, "epoch": 2.11248594, "global_step/max_steps": "1878/8890", "percentage": "21.12%", "elapsed_time": "39m 58s", "remaining_time": "2h 29m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782983} {"loss": 0.50054216, "grad_norm": 2.19509172, "learning_rate": 9.305e-05, "token_acc": 0.84363178, "epoch": 2.1136108, "global_step/max_steps": "1879/8890", "percentage": "21.14%", "elapsed_time": "39m 59s", "remaining_time": "2h 29m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78302} {"loss": 0.5977599, "grad_norm": 2.28817296, "learning_rate": 9.304e-05, "token_acc": 0.81900452, "epoch": 2.11473566, "global_step/max_steps": "1880/8890", "percentage": "21.15%", "elapsed_time": "40m 0s", "remaining_time": "2h 29m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783096} {"loss": 0.50687408, "grad_norm": 2.18889642, "learning_rate": 9.303e-05, "token_acc": 0.85529716, "epoch": 2.11586052, "global_step/max_steps": "1881/8890", "percentage": "21.16%", "elapsed_time": "40m 1s", "remaining_time": "2h 29m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78313} {"loss": 0.56072193, "grad_norm": 2.13777494, "learning_rate": 9.302e-05, "token_acc": 0.82747934, "epoch": 2.11698538, "global_step/max_steps": "1882/8890", "percentage": "21.17%", "elapsed_time": "40m 3s", "remaining_time": "2h 29m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783162} {"loss": 0.45497072, "grad_norm": 2.49476409, "learning_rate": 9.301e-05, "token_acc": 0.84217877, "epoch": 2.11811024, "global_step/max_steps": "1883/8890", "percentage": "21.18%", "elapsed_time": "40m 4s", "remaining_time": "2h 29m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783234} {"loss": 0.52371842, "grad_norm": 1.98985445, "learning_rate": 9.301e-05, "token_acc": 0.84676705, "epoch": 2.1192351, "global_step/max_steps": "1884/8890", "percentage": "21.19%", "elapsed_time": "40m 5s", "remaining_time": "2h 29m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78323} {"loss": 0.47789121, "grad_norm": 2.00199628, "learning_rate": 9.3e-05, "token_acc": 0.83901919, "epoch": 2.12035996, "global_step/max_steps": "1885/8890", "percentage": "21.20%", "elapsed_time": "40m 6s", "remaining_time": "2h 29m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783227} {"loss": 0.42558149, "grad_norm": 2.03982019, "learning_rate": 9.299e-05, "token_acc": 0.86887608, "epoch": 2.12148481, "global_step/max_steps": "1886/8890", "percentage": "21.21%", "elapsed_time": "40m 7s", "remaining_time": "2h 29m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783294} {"loss": 0.50749874, "grad_norm": 2.47612572, "learning_rate": 9.298e-05, "token_acc": 0.84165478, "epoch": 2.12260967, "global_step/max_steps": "1887/8890", "percentage": "21.23%", "elapsed_time": "40m 8s", "remaining_time": "2h 28m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783344} {"loss": 0.57590759, "grad_norm": 1.90361214, "learning_rate": 9.297e-05, "token_acc": 0.82181515, "epoch": 2.12373453, "global_step/max_steps": "1888/8890", "percentage": "21.24%", "elapsed_time": "40m 10s", "remaining_time": "2h 28m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783376} {"loss": 0.55215013, "grad_norm": 2.29120517, "learning_rate": 9.296e-05, "token_acc": 0.83312883, "epoch": 2.12485939, "global_step/max_steps": "1889/8890", "percentage": "21.25%", "elapsed_time": "40m 11s", "remaining_time": "2h 28m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78341} {"loss": 0.5245055, "grad_norm": 1.88949978, "learning_rate": 9.295e-05, "token_acc": 0.8375, "epoch": 2.12598425, "global_step/max_steps": "1890/8890", "percentage": "21.26%", "elapsed_time": "40m 12s", "remaining_time": "2h 28m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783386} {"loss": 0.55336082, "grad_norm": 1.80319619, "learning_rate": 9.294e-05, "token_acc": 0.8225957, "epoch": 2.12710911, "global_step/max_steps": "1891/8890", "percentage": "21.27%", "elapsed_time": "40m 13s", "remaining_time": "2h 28m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783439} {"loss": 0.50736117, "grad_norm": 2.42417216, "learning_rate": 9.293e-05, "token_acc": 0.83333333, "epoch": 2.12823397, "global_step/max_steps": "1892/8890", "percentage": "21.28%", "elapsed_time": "40m 14s", "remaining_time": "2h 28m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783487} {"loss": 0.49127322, "grad_norm": 1.90251505, "learning_rate": 9.292e-05, "token_acc": 0.8368087, "epoch": 2.12935883, "global_step/max_steps": "1893/8890", "percentage": "21.29%", "elapsed_time": "40m 15s", "remaining_time": "2h 28m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783532} {"loss": 0.61809343, "grad_norm": 2.23720527, "learning_rate": 9.291e-05, "token_acc": 0.80512249, "epoch": 2.13048369, "global_step/max_steps": "1894/8890", "percentage": "21.30%", "elapsed_time": "40m 17s", "remaining_time": "2h 28m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783531} {"loss": 0.57550943, "grad_norm": 2.11709595, "learning_rate": 9.29e-05, "token_acc": 0.81188119, "epoch": 2.13160855, "global_step/max_steps": "1895/8890", "percentage": "21.32%", "elapsed_time": "40m 18s", "remaining_time": "2h 28m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78363} {"loss": 0.49402338, "grad_norm": 2.32220435, "learning_rate": 9.289e-05, "token_acc": 0.82946794, "epoch": 2.13273341, "global_step/max_steps": "1896/8890", "percentage": "21.33%", "elapsed_time": "40m 19s", "remaining_time": "2h 28m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783664} {"loss": 0.62053764, "grad_norm": 2.1153264, "learning_rate": 9.288e-05, "token_acc": 0.81473684, "epoch": 2.13385827, "global_step/max_steps": "1897/8890", "percentage": "21.34%", "elapsed_time": "40m 20s", "remaining_time": "2h 28m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783711} {"loss": 0.54490077, "grad_norm": 1.95995629, "learning_rate": 9.287e-05, "token_acc": 0.82150538, "epoch": 2.13498313, "global_step/max_steps": "1898/8890", "percentage": "21.35%", "elapsed_time": "40m 21s", "remaining_time": "2h 28m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783753} {"loss": 0.53083074, "grad_norm": 2.26392746, "learning_rate": 9.286e-05, "token_acc": 0.83814304, "epoch": 2.13610799, "global_step/max_steps": "1899/8890", "percentage": "21.36%", "elapsed_time": "40m 22s", "remaining_time": "2h 28m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783832} {"loss": 0.61080831, "grad_norm": 2.24478602, "learning_rate": 9.285e-05, "token_acc": 0.80676856, "epoch": 2.13723285, "global_step/max_steps": "1900/8890", "percentage": "21.37%", "elapsed_time": "40m 23s", "remaining_time": "2h 28m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783833} {"loss": 0.47218794, "grad_norm": 1.87593138, "learning_rate": 9.284e-05, "token_acc": 0.85493562, "epoch": 2.13835771, "global_step/max_steps": "1901/8890", "percentage": "21.38%", "elapsed_time": "40m 24s", "remaining_time": "2h 28m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783941} {"loss": 0.60855049, "grad_norm": 1.90365827, "learning_rate": 9.283e-05, "token_acc": 0.82001756, "epoch": 2.13948256, "global_step/max_steps": "1902/8890", "percentage": "21.39%", "elapsed_time": "40m 26s", "remaining_time": "2h 28m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78398} {"loss": 0.4002741, "grad_norm": 1.99393344, "learning_rate": 9.282e-05, "token_acc": 0.87484036, "epoch": 2.14060742, "global_step/max_steps": "1903/8890", "percentage": "21.41%", "elapsed_time": "40m 27s", "remaining_time": "2h 28m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78405} {"loss": 0.44533196, "grad_norm": 1.95544255, "learning_rate": 9.281e-05, "token_acc": 0.86149936, "epoch": 2.14173228, "global_step/max_steps": "1904/8890", "percentage": "21.42%", "elapsed_time": "40m 28s", "remaining_time": "2h 28m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784123} {"loss": 0.43421122, "grad_norm": 1.75886452, "learning_rate": 9.28e-05, "token_acc": 0.86692382, "epoch": 2.14285714, "global_step/max_steps": "1905/8890", "percentage": "21.43%", "elapsed_time": "40m 29s", "remaining_time": "2h 28m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784227} {"loss": 0.49433181, "grad_norm": 2.06756973, "learning_rate": 9.28e-05, "token_acc": 0.83660934, "epoch": 2.143982, "global_step/max_steps": "1906/8890", "percentage": "21.44%", "elapsed_time": "40m 30s", "remaining_time": "2h 28m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784277} {"loss": 0.56964588, "grad_norm": 2.21476889, "learning_rate": 9.279e-05, "token_acc": 0.81476846, "epoch": 2.14510686, "global_step/max_steps": "1907/8890", "percentage": "21.45%", "elapsed_time": "40m 31s", "remaining_time": "2h 28m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784324} {"loss": 0.58015752, "grad_norm": 1.78743756, "learning_rate": 9.278e-05, "token_acc": 0.81458003, "epoch": 2.14623172, "global_step/max_steps": "1908/8890", "percentage": "21.46%", "elapsed_time": "40m 32s", "remaining_time": "2h 28m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784366} {"loss": 0.42944127, "grad_norm": 2.03031182, "learning_rate": 9.277e-05, "token_acc": 0.85695709, "epoch": 2.14735658, "global_step/max_steps": "1909/8890", "percentage": "21.47%", "elapsed_time": "40m 33s", "remaining_time": "2h 28m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784427} {"loss": 0.52246678, "grad_norm": 2.11013317, "learning_rate": 9.276e-05, "token_acc": 0.83507549, "epoch": 2.14848144, "global_step/max_steps": "1910/8890", "percentage": "21.48%", "elapsed_time": "40m 34s", "remaining_time": "2h 28m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784462} {"loss": 0.4544048, "grad_norm": 1.9725877, "learning_rate": 9.275e-05, "token_acc": 0.85697941, "epoch": 2.1496063, "global_step/max_steps": "1911/8890", "percentage": "21.50%", "elapsed_time": "40m 35s", "remaining_time": "2h 28m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784495} {"loss": 0.45649648, "grad_norm": 1.91601789, "learning_rate": 9.274e-05, "token_acc": 0.85031847, "epoch": 2.15073116, "global_step/max_steps": "1912/8890", "percentage": "21.51%", "elapsed_time": "40m 37s", "remaining_time": "2h 28m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784496} {"loss": 0.5321697, "grad_norm": 2.27934957, "learning_rate": 9.273e-05, "token_acc": 0.83208396, "epoch": 2.15185602, "global_step/max_steps": "1913/8890", "percentage": "21.52%", "elapsed_time": "40m 38s", "remaining_time": "2h 28m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784569} {"loss": 0.5672453, "grad_norm": 2.21599197, "learning_rate": 9.272e-05, "token_acc": 0.81939163, "epoch": 2.15298088, "global_step/max_steps": "1914/8890", "percentage": "21.53%", "elapsed_time": "40m 39s", "remaining_time": "2h 28m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784652} {"loss": 0.58779454, "grad_norm": 2.12475276, "learning_rate": 9.271e-05, "token_acc": 0.80069124, "epoch": 2.15410574, "global_step/max_steps": "1915/8890", "percentage": "21.54%", "elapsed_time": "40m 40s", "remaining_time": "2h 28m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78469} {"loss": 0.52664137, "grad_norm": 1.79206657, "learning_rate": 9.27e-05, "token_acc": 0.82864914, "epoch": 2.1552306, "global_step/max_steps": "1916/8890", "percentage": "21.55%", "elapsed_time": "40m 41s", "remaining_time": "2h 28m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784687} {"loss": 0.49211967, "grad_norm": 2.21158242, "learning_rate": 9.269e-05, "token_acc": 0.83625731, "epoch": 2.15635546, "global_step/max_steps": "1917/8890", "percentage": "21.56%", "elapsed_time": "40m 42s", "remaining_time": "2h 28m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784761} {"loss": 0.42040133, "grad_norm": 2.10777044, "learning_rate": 9.268e-05, "token_acc": 0.87454765, "epoch": 2.15748031, "global_step/max_steps": "1918/8890", "percentage": "21.57%", "elapsed_time": "40m 43s", "remaining_time": "2h 28m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784888} {"loss": 0.49352598, "grad_norm": 2.15948129, "learning_rate": 9.267e-05, "token_acc": 0.84204131, "epoch": 2.15860517, "global_step/max_steps": "1919/8890", "percentage": "21.59%", "elapsed_time": "40m 44s", "remaining_time": "2h 28m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784949} {"loss": 0.58444327, "grad_norm": 1.93850362, "learning_rate": 9.266e-05, "token_acc": 0.82046138, "epoch": 2.15973003, "global_step/max_steps": "1920/8890", "percentage": "21.60%", "elapsed_time": "40m 45s", "remaining_time": "2h 27m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784981} {"loss": 0.57874489, "grad_norm": 2.03995466, "learning_rate": 9.265e-05, "token_acc": 0.82675439, "epoch": 2.16085489, "global_step/max_steps": "1921/8890", "percentage": "21.61%", "elapsed_time": "40m 47s", "remaining_time": "2h 27m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785027} {"loss": 0.4922244, "grad_norm": 2.22680902, "learning_rate": 9.264e-05, "token_acc": 0.84354628, "epoch": 2.16197975, "global_step/max_steps": "1922/8890", "percentage": "21.62%", "elapsed_time": "40m 48s", "remaining_time": "2h 27m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785065} {"loss": 0.50392574, "grad_norm": 2.19922805, "learning_rate": 9.263e-05, "token_acc": 0.8206278, "epoch": 2.16310461, "global_step/max_steps": "1923/8890", "percentage": "21.63%", "elapsed_time": "40m 49s", "remaining_time": "2h 27m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785053} {"loss": 0.48725078, "grad_norm": 2.2552011, "learning_rate": 9.262e-05, "token_acc": 0.84114583, "epoch": 2.16422947, "global_step/max_steps": "1924/8890", "percentage": "21.64%", "elapsed_time": "40m 50s", "remaining_time": "2h 27m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785207} {"loss": 0.54481459, "grad_norm": 2.0218029, "learning_rate": 9.261e-05, "token_acc": 0.8229927, "epoch": 2.16535433, "global_step/max_steps": "1925/8890", "percentage": "21.65%", "elapsed_time": "40m 51s", "remaining_time": "2h 27m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785259} {"loss": 0.52502197, "grad_norm": 2.09787846, "learning_rate": 9.26e-05, "token_acc": 0.84562997, "epoch": 2.16647919, "global_step/max_steps": "1926/8890", "percentage": "21.66%", "elapsed_time": "40m 52s", "remaining_time": "2h 27m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785305} {"loss": 0.50419784, "grad_norm": 2.19394922, "learning_rate": 9.259e-05, "token_acc": 0.84555985, "epoch": 2.16760405, "global_step/max_steps": "1927/8890", "percentage": "21.68%", "elapsed_time": "40m 53s", "remaining_time": "2h 27m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785342} {"loss": 0.40869918, "grad_norm": 2.10551405, "learning_rate": 9.258e-05, "token_acc": 0.8801898, "epoch": 2.16872891, "global_step/max_steps": "1928/8890", "percentage": "21.69%", "elapsed_time": "40m 54s", "remaining_time": "2h 27m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785377} {"loss": 0.47205788, "grad_norm": 2.05398893, "learning_rate": 9.257e-05, "token_acc": 0.84282908, "epoch": 2.16985377, "global_step/max_steps": "1929/8890", "percentage": "21.70%", "elapsed_time": "40m 55s", "remaining_time": "2h 27m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785451} {"loss": 0.66347885, "grad_norm": 2.27327991, "learning_rate": 9.256e-05, "token_acc": 0.80490654, "epoch": 2.17097863, "global_step/max_steps": "1930/8890", "percentage": "21.71%", "elapsed_time": "40m 57s", "remaining_time": "2h 27m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785487} {"loss": 0.43708238, "grad_norm": 1.97595012, "learning_rate": 9.255e-05, "token_acc": 0.8505618, "epoch": 2.17210349, "global_step/max_steps": "1931/8890", "percentage": "21.72%", "elapsed_time": "40m 58s", "remaining_time": "2h 27m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785523} {"loss": 0.647349, "grad_norm": 2.20088649, "learning_rate": 9.254e-05, "token_acc": 0.81462926, "epoch": 2.17322835, "global_step/max_steps": "1932/8890", "percentage": "21.73%", "elapsed_time": "40m 59s", "remaining_time": "2h 27m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785585} {"loss": 0.5050379, "grad_norm": 2.56924963, "learning_rate": 9.253e-05, "token_acc": 0.83819242, "epoch": 2.17435321, "global_step/max_steps": "1933/8890", "percentage": "21.74%", "elapsed_time": "41m 0s", "remaining_time": "2h 27m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785657} {"loss": 0.57759607, "grad_norm": 2.17131138, "learning_rate": 9.252e-05, "token_acc": 0.81905782, "epoch": 2.17547807, "global_step/max_steps": "1934/8890", "percentage": "21.75%", "elapsed_time": "41m 1s", "remaining_time": "2h 27m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785704} {"loss": 0.46797824, "grad_norm": 2.05874658, "learning_rate": 9.251e-05, "token_acc": 0.84368071, "epoch": 2.17660292, "global_step/max_steps": "1935/8890", "percentage": "21.77%", "elapsed_time": "41m 2s", "remaining_time": "2h 27m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785745} {"loss": 0.7805112, "grad_norm": 2.47820377, "learning_rate": 9.25e-05, "token_acc": 0.77788555, "epoch": 2.17772778, "global_step/max_steps": "1936/8890", "percentage": "21.78%", "elapsed_time": "41m 3s", "remaining_time": "2h 27m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785786} {"loss": 0.5650419, "grad_norm": 2.1595583, "learning_rate": 9.249e-05, "token_acc": 0.82224646, "epoch": 2.17885264, "global_step/max_steps": "1937/8890", "percentage": "21.79%", "elapsed_time": "41m 4s", "remaining_time": "2h 27m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785819} {"loss": 0.46184665, "grad_norm": 2.1102469, "learning_rate": 9.248e-05, "token_acc": 0.85714286, "epoch": 2.1799775, "global_step/max_steps": "1938/8890", "percentage": "21.80%", "elapsed_time": "41m 6s", "remaining_time": "2h 27m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785819} {"loss": 0.65456039, "grad_norm": 2.44623113, "learning_rate": 9.247e-05, "token_acc": 0.78609626, "epoch": 2.18110236, "global_step/max_steps": "1939/8890", "percentage": "21.81%", "elapsed_time": "41m 7s", "remaining_time": "2h 27m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78587} {"loss": 0.55162024, "grad_norm": 2.26831722, "learning_rate": 9.246e-05, "token_acc": 0.83018868, "epoch": 2.18222722, "global_step/max_steps": "1940/8890", "percentage": "21.82%", "elapsed_time": "41m 8s", "remaining_time": "2h 27m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785914} {"loss": 0.52264696, "grad_norm": 2.05788326, "learning_rate": 9.245e-05, "token_acc": 0.8540724, "epoch": 2.18335208, "global_step/max_steps": "1941/8890", "percentage": "21.83%", "elapsed_time": "41m 9s", "remaining_time": "2h 27m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785949} {"loss": 0.54872704, "grad_norm": 1.98509538, "learning_rate": 9.245e-05, "token_acc": 0.83809524, "epoch": 2.18447694, "global_step/max_steps": "1942/8890", "percentage": "21.84%", "elapsed_time": "41m 10s", "remaining_time": "2h 27m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785989} {"loss": 0.32967114, "grad_norm": 1.63316453, "learning_rate": 9.244e-05, "token_acc": 0.89560976, "epoch": 2.1856018, "global_step/max_steps": "1943/8890", "percentage": "21.86%", "elapsed_time": "41m 11s", "remaining_time": "2h 27m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786032} {"loss": 0.65916413, "grad_norm": 2.13717151, "learning_rate": 9.243e-05, "token_acc": 0.79103283, "epoch": 2.18672666, "global_step/max_steps": "1944/8890", "percentage": "21.87%", "elapsed_time": "41m 13s", "remaining_time": "2h 27m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786071} {"loss": 0.44978002, "grad_norm": 1.65047479, "learning_rate": 9.242e-05, "token_acc": 0.86482085, "epoch": 2.18785152, "global_step/max_steps": "1945/8890", "percentage": "21.88%", "elapsed_time": "41m 14s", "remaining_time": "2h 27m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786105} {"loss": 0.50717419, "grad_norm": 2.366786, "learning_rate": 9.241e-05, "token_acc": 0.84022039, "epoch": 2.18897638, "global_step/max_steps": "1946/8890", "percentage": "21.89%", "elapsed_time": "41m 15s", "remaining_time": "2h 27m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786144} {"loss": 0.4892174, "grad_norm": 2.2032907, "learning_rate": 9.24e-05, "token_acc": 0.84555985, "epoch": 2.19010124, "global_step/max_steps": "1947/8890", "percentage": "21.90%", "elapsed_time": "41m 16s", "remaining_time": "2h 27m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78619} {"loss": 0.50546455, "grad_norm": 2.72430396, "learning_rate": 9.239e-05, "token_acc": 0.84724689, "epoch": 2.1912261, "global_step/max_steps": "1948/8890", "percentage": "21.91%", "elapsed_time": "41m 17s", "remaining_time": "2h 27m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786264} {"loss": 0.5576098, "grad_norm": 2.04754496, "learning_rate": 9.238e-05, "token_acc": 0.83449651, "epoch": 2.19235096, "global_step/max_steps": "1949/8890", "percentage": "21.92%", "elapsed_time": "41m 18s", "remaining_time": "2h 27m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786296} {"loss": 0.50909215, "grad_norm": 2.17090869, "learning_rate": 9.237e-05, "token_acc": 0.82794457, "epoch": 2.19347582, "global_step/max_steps": "1950/8890", "percentage": "21.93%", "elapsed_time": "41m 19s", "remaining_time": "2h 27m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786334} {"loss": 0.46981415, "grad_norm": 2.15496135, "learning_rate": 9.236e-05, "token_acc": 0.83354193, "epoch": 2.19460067, "global_step/max_steps": "1951/8890", "percentage": "21.95%", "elapsed_time": "41m 21s", "remaining_time": "2h 27m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786375} {"loss": 0.55167127, "grad_norm": 2.3405087, "learning_rate": 9.235e-05, "token_acc": 0.82810811, "epoch": 2.19572553, "global_step/max_steps": "1952/8890", "percentage": "21.96%", "elapsed_time": "41m 22s", "remaining_time": "2h 27m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786419} {"loss": 0.56296062, "grad_norm": 2.40167546, "learning_rate": 9.234e-05, "token_acc": 0.83854819, "epoch": 2.19685039, "global_step/max_steps": "1953/8890", "percentage": "21.97%", "elapsed_time": "41m 23s", "remaining_time": "2h 27m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786479} {"loss": 0.52786559, "grad_norm": 1.97784114, "learning_rate": 9.233e-05, "token_acc": 0.83856089, "epoch": 2.19797525, "global_step/max_steps": "1954/8890", "percentage": "21.98%", "elapsed_time": "41m 24s", "remaining_time": "2h 26m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786467} {"loss": 0.50795615, "grad_norm": 2.13024688, "learning_rate": 9.232e-05, "token_acc": 0.83628319, "epoch": 2.19910011, "global_step/max_steps": "1955/8890", "percentage": "21.99%", "elapsed_time": "41m 25s", "remaining_time": "2h 26m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786508} {"loss": 0.7041918, "grad_norm": 2.42047691, "learning_rate": 9.231e-05, "token_acc": 0.77898551, "epoch": 2.20022497, "global_step/max_steps": "1956/8890", "percentage": "22.00%", "elapsed_time": "41m 26s", "remaining_time": "2h 26m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786551} {"loss": 0.70657432, "grad_norm": 2.09736633, "learning_rate": 9.23e-05, "token_acc": 0.78484848, "epoch": 2.20134983, "global_step/max_steps": "1957/8890", "percentage": "22.01%", "elapsed_time": "41m 27s", "remaining_time": "2h 26m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786591} {"loss": 0.54132169, "grad_norm": 1.93795168, "learning_rate": 9.229e-05, "token_acc": 0.83399209, "epoch": 2.20247469, "global_step/max_steps": "1958/8890", "percentage": "22.02%", "elapsed_time": "41m 29s", "remaining_time": "2h 26m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78663} {"loss": 0.5403834, "grad_norm": 2.11249781, "learning_rate": 9.228e-05, "token_acc": 0.8256351, "epoch": 2.20359955, "global_step/max_steps": "1959/8890", "percentage": "22.04%", "elapsed_time": "41m 30s", "remaining_time": "2h 26m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786674} {"loss": 0.48979044, "grad_norm": 1.81026864, "learning_rate": 9.227e-05, "token_acc": 0.84729494, "epoch": 2.20472441, "global_step/max_steps": "1960/8890", "percentage": "22.05%", "elapsed_time": "41m 31s", "remaining_time": "2h 26m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786744} {"loss": 0.52794039, "grad_norm": 2.48703742, "learning_rate": 9.226e-05, "token_acc": 0.81981982, "epoch": 2.20584927, "global_step/max_steps": "1961/8890", "percentage": "22.06%", "elapsed_time": "41m 32s", "remaining_time": "2h 26m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786786} {"loss": 0.52319634, "grad_norm": 1.96601057, "learning_rate": 9.225e-05, "token_acc": 0.81862745, "epoch": 2.20697413, "global_step/max_steps": "1962/8890", "percentage": "22.07%", "elapsed_time": "41m 33s", "remaining_time": "2h 26m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786824} {"loss": 0.36529213, "grad_norm": 1.98721278, "learning_rate": 9.224e-05, "token_acc": 0.85531915, "epoch": 2.20809899, "global_step/max_steps": "1963/8890", "percentage": "22.08%", "elapsed_time": "41m 34s", "remaining_time": "2h 26m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786861} {"loss": 0.58457696, "grad_norm": 2.13041687, "learning_rate": 9.223e-05, "token_acc": 0.83152828, "epoch": 2.20922385, "global_step/max_steps": "1964/8890", "percentage": "22.09%", "elapsed_time": "41m 35s", "remaining_time": "2h 26m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786932} {"loss": 0.60556269, "grad_norm": 1.95694864, "learning_rate": 9.222e-05, "token_acc": 0.81471136, "epoch": 2.21034871, "global_step/max_steps": "1965/8890", "percentage": "22.10%", "elapsed_time": "41m 36s", "remaining_time": "2h 26m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786959} {"loss": 0.68362337, "grad_norm": 2.18930411, "learning_rate": 9.221e-05, "token_acc": 0.79058598, "epoch": 2.21147357, "global_step/max_steps": "1966/8890", "percentage": "22.11%", "elapsed_time": "41m 38s", "remaining_time": "2h 26m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786993} {"loss": 0.38453627, "grad_norm": 2.20546985, "learning_rate": 9.22e-05, "token_acc": 0.86357039, "epoch": 2.21259843, "global_step/max_steps": "1967/8890", "percentage": "22.13%", "elapsed_time": "41m 39s", "remaining_time": "2h 26m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787077} {"loss": 0.53023982, "grad_norm": 2.14177036, "learning_rate": 9.219e-05, "token_acc": 0.83870968, "epoch": 2.21372328, "global_step/max_steps": "1968/8890", "percentage": "22.14%", "elapsed_time": "41m 40s", "remaining_time": "2h 26m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78715} {"loss": 0.40376443, "grad_norm": 2.10494876, "learning_rate": 9.218e-05, "token_acc": 0.87575758, "epoch": 2.21484814, "global_step/max_steps": "1969/8890", "percentage": "22.15%", "elapsed_time": "41m 41s", "remaining_time": "2h 26m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787188} {"loss": 0.51905805, "grad_norm": 2.01289368, "learning_rate": 9.217e-05, "token_acc": 0.82655502, "epoch": 2.215973, "global_step/max_steps": "1970/8890", "percentage": "22.16%", "elapsed_time": "41m 42s", "remaining_time": "2h 26m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787235} {"loss": 0.57664877, "grad_norm": 2.3275063, "learning_rate": 9.216e-05, "token_acc": 0.83519553, "epoch": 2.21709786, "global_step/max_steps": "1971/8890", "percentage": "22.17%", "elapsed_time": "41m 43s", "remaining_time": "2h 26m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787303} {"loss": 0.53558499, "grad_norm": 2.09107661, "learning_rate": 9.215e-05, "token_acc": 0.83395291, "epoch": 2.21822272, "global_step/max_steps": "1972/8890", "percentage": "22.18%", "elapsed_time": "41m 44s", "remaining_time": "2h 26m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787348} {"loss": 0.53881705, "grad_norm": 2.33593035, "learning_rate": 9.214e-05, "token_acc": 0.81974249, "epoch": 2.21934758, "global_step/max_steps": "1973/8890", "percentage": "22.19%", "elapsed_time": "41m 45s", "remaining_time": "2h 26m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787423} {"loss": 0.55309319, "grad_norm": 2.20316863, "learning_rate": 9.213e-05, "token_acc": 0.81766055, "epoch": 2.22047244, "global_step/max_steps": "1974/8890", "percentage": "22.20%", "elapsed_time": "41m 46s", "remaining_time": "2h 26m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787457} {"loss": 0.46676832, "grad_norm": 2.11051106, "learning_rate": 9.212e-05, "token_acc": 0.85634589, "epoch": 2.2215973, "global_step/max_steps": "1975/8890", "percentage": "22.22%", "elapsed_time": "41m 47s", "remaining_time": "2h 26m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787502} {"loss": 0.45497653, "grad_norm": 1.91792798, "learning_rate": 9.211e-05, "token_acc": 0.85890258, "epoch": 2.22272216, "global_step/max_steps": "1976/8890", "percentage": "22.23%", "elapsed_time": "41m 49s", "remaining_time": "2h 26m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787546} {"loss": 0.56648982, "grad_norm": 1.78309309, "learning_rate": 9.21e-05, "token_acc": 0.82529118, "epoch": 2.22384702, "global_step/max_steps": "1977/8890", "percentage": "22.24%", "elapsed_time": "41m 50s", "remaining_time": "2h 26m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787542} {"loss": 0.51847744, "grad_norm": 2.1848793, "learning_rate": 9.209e-05, "token_acc": 0.83698297, "epoch": 2.22497188, "global_step/max_steps": "1978/8890", "percentage": "22.25%", "elapsed_time": "41m 51s", "remaining_time": "2h 26m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787577} {"loss": 0.53712308, "grad_norm": 2.29237652, "learning_rate": 9.208e-05, "token_acc": 0.81875, "epoch": 2.22609674, "global_step/max_steps": "1979/8890", "percentage": "22.26%", "elapsed_time": "41m 52s", "remaining_time": "2h 26m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787636} {"loss": 0.60761881, "grad_norm": 2.14382458, "learning_rate": 9.207e-05, "token_acc": 0.81040892, "epoch": 2.2272216, "global_step/max_steps": "1980/8890", "percentage": "22.27%", "elapsed_time": "41m 53s", "remaining_time": "2h 26m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787664} {"loss": 0.53281891, "grad_norm": 2.22957945, "learning_rate": 9.206e-05, "token_acc": 0.82923833, "epoch": 2.22834646, "global_step/max_steps": "1981/8890", "percentage": "22.28%", "elapsed_time": "41m 54s", "remaining_time": "2h 26m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787721} {"loss": 0.49025166, "grad_norm": 1.95950127, "learning_rate": 9.205e-05, "token_acc": 0.85580524, "epoch": 2.22947132, "global_step/max_steps": "1982/8890", "percentage": "22.29%", "elapsed_time": "41m 56s", "remaining_time": "2h 26m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787755} {"loss": 0.56222856, "grad_norm": 2.53972507, "learning_rate": 9.204e-05, "token_acc": 0.82305936, "epoch": 2.23059618, "global_step/max_steps": "1983/8890", "percentage": "22.31%", "elapsed_time": "41m 57s", "remaining_time": "2h 26m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787827} {"loss": 0.57490802, "grad_norm": 2.1149323, "learning_rate": 9.203e-05, "token_acc": 0.82640333, "epoch": 2.23172103, "global_step/max_steps": "1984/8890", "percentage": "22.32%", "elapsed_time": "41m 58s", "remaining_time": "2h 26m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787868} {"loss": 0.56818259, "grad_norm": 2.0327189, "learning_rate": 9.202e-05, "token_acc": 0.81873112, "epoch": 2.23284589, "global_step/max_steps": "1985/8890", "percentage": "22.33%", "elapsed_time": "41m 59s", "remaining_time": "2h 26m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787906} {"loss": 0.60573369, "grad_norm": 2.77336454, "learning_rate": 9.201e-05, "token_acc": 0.81869688, "epoch": 2.23397075, "global_step/max_steps": "1986/8890", "percentage": "22.34%", "elapsed_time": "42m 0s", "remaining_time": "2h 26m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787966} {"loss": 0.51512301, "grad_norm": 2.19113207, "learning_rate": 9.2e-05, "token_acc": 0.83897436, "epoch": 2.23509561, "global_step/max_steps": "1987/8890", "percentage": "22.35%", "elapsed_time": "42m 1s", "remaining_time": "2h 25m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788032} {"loss": 0.53571528, "grad_norm": 2.27235723, "learning_rate": 9.199e-05, "token_acc": 0.83540023, "epoch": 2.23622047, "global_step/max_steps": "1988/8890", "percentage": "22.36%", "elapsed_time": "42m 2s", "remaining_time": "2h 25m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788077} {"loss": 0.59739161, "grad_norm": 2.21013713, "learning_rate": 9.198e-05, "token_acc": 0.81788079, "epoch": 2.23734533, "global_step/max_steps": "1989/8890", "percentage": "22.37%", "elapsed_time": "42m 3s", "remaining_time": "2h 25m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788147} {"loss": 0.51766121, "grad_norm": 2.24924326, "learning_rate": 9.197e-05, "token_acc": 0.84615385, "epoch": 2.23847019, "global_step/max_steps": "1990/8890", "percentage": "22.38%", "elapsed_time": "42m 4s", "remaining_time": "2h 25m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788191} {"loss": 0.59146023, "grad_norm": 2.48624897, "learning_rate": 9.196e-05, "token_acc": 0.82105263, "epoch": 2.23959505, "global_step/max_steps": "1991/8890", "percentage": "22.40%", "elapsed_time": "42m 5s", "remaining_time": "2h 25m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788256} {"loss": 0.59105814, "grad_norm": 2.19928026, "learning_rate": 9.195e-05, "token_acc": 0.82482482, "epoch": 2.24071991, "global_step/max_steps": "1992/8890", "percentage": "22.41%", "elapsed_time": "42m 6s", "remaining_time": "2h 25m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788287} {"loss": 0.50988513, "grad_norm": 2.16942, "learning_rate": 9.194e-05, "token_acc": 0.84722222, "epoch": 2.24184477, "global_step/max_steps": "1993/8890", "percentage": "22.42%", "elapsed_time": "42m 8s", "remaining_time": "2h 25m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788314} {"loss": 0.59587121, "grad_norm": 2.14276242, "learning_rate": 9.193e-05, "token_acc": 0.81863727, "epoch": 2.24296963, "global_step/max_steps": "1994/8890", "percentage": "22.43%", "elapsed_time": "42m 9s", "remaining_time": "2h 25m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788347} {"loss": 0.43084201, "grad_norm": 1.83189666, "learning_rate": 9.192e-05, "token_acc": 0.84837862, "epoch": 2.24409449, "global_step/max_steps": "1995/8890", "percentage": "22.44%", "elapsed_time": "42m 10s", "remaining_time": "2h 25m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788387} {"loss": 0.64128071, "grad_norm": 2.0366931, "learning_rate": 9.191e-05, "token_acc": 0.8208441, "epoch": 2.24521935, "global_step/max_steps": "1996/8890", "percentage": "22.45%", "elapsed_time": "42m 11s", "remaining_time": "2h 25m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788426} {"loss": 0.62679112, "grad_norm": 2.47795105, "learning_rate": 9.19e-05, "token_acc": 0.80980861, "epoch": 2.24634421, "global_step/max_steps": "1997/8890", "percentage": "22.46%", "elapsed_time": "42m 12s", "remaining_time": "2h 25m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788482} {"loss": 0.52794802, "grad_norm": 1.78447783, "learning_rate": 9.189e-05, "token_acc": 0.84725966, "epoch": 2.24746907, "global_step/max_steps": "1998/8890", "percentage": "22.47%", "elapsed_time": "42m 13s", "remaining_time": "2h 25m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78851} {"loss": 0.50459623, "grad_norm": 2.28506422, "learning_rate": 9.188e-05, "token_acc": 0.84553928, "epoch": 2.24859393, "global_step/max_steps": "1999/8890", "percentage": "22.49%", "elapsed_time": "42m 15s", "remaining_time": "2h 25m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788554} {"loss": 0.5561958, "grad_norm": 2.10836554, "learning_rate": 9.187e-05, "token_acc": 0.84145078, "epoch": 2.24971879, "global_step/max_steps": "2000/8890", "percentage": "22.50%", "elapsed_time": "42m 16s", "remaining_time": "2h 25m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788599} {"loss": 0.63654149, "grad_norm": 2.20924997, "learning_rate": 9.185e-05, "token_acc": 0.81200453, "epoch": 2.25084364, "global_step/max_steps": "2001/8890", "percentage": "22.51%", "elapsed_time": "42m 17s", "remaining_time": "2h 25m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788634} {"loss": 0.65165603, "grad_norm": 2.20852852, "learning_rate": 9.184e-05, "token_acc": 0.81029263, "epoch": 2.2519685, "global_step/max_steps": "2002/8890", "percentage": "22.52%", "elapsed_time": "42m 18s", "remaining_time": "2h 25m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788671} {"loss": 0.49521476, "grad_norm": 1.9858371, "learning_rate": 9.183e-05, "token_acc": 0.84138786, "epoch": 2.25309336, "global_step/max_steps": "2003/8890", "percentage": "22.53%", "elapsed_time": "42m 19s", "remaining_time": "2h 25m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788699} {"loss": 0.55677778, "grad_norm": 2.05694485, "learning_rate": 9.182e-05, "token_acc": 0.81127733, "epoch": 2.25421822, "global_step/max_steps": "2004/8890", "percentage": "22.54%", "elapsed_time": "42m 20s", "remaining_time": "2h 25m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788729} {"loss": 0.66816843, "grad_norm": 2.13837004, "learning_rate": 9.181e-05, "token_acc": 0.8013308, "epoch": 2.25534308, "global_step/max_steps": "2005/8890", "percentage": "22.55%", "elapsed_time": "42m 21s", "remaining_time": "2h 25m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78877} {"loss": 0.68234873, "grad_norm": 1.94176984, "learning_rate": 9.18e-05, "token_acc": 0.81239389, "epoch": 2.25646794, "global_step/max_steps": "2006/8890", "percentage": "22.56%", "elapsed_time": "42m 23s", "remaining_time": "2h 25m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788751} {"loss": 0.6143477, "grad_norm": 2.15816998, "learning_rate": 9.179e-05, "token_acc": 0.81994192, "epoch": 2.2575928, "global_step/max_steps": "2007/8890", "percentage": "22.58%", "elapsed_time": "42m 24s", "remaining_time": "2h 25m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788794} {"loss": 0.52643293, "grad_norm": 2.09989047, "learning_rate": 9.178e-05, "token_acc": 0.82679739, "epoch": 2.25871766, "global_step/max_steps": "2008/8890", "percentage": "22.59%", "elapsed_time": "42m 25s", "remaining_time": "2h 25m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788831} {"loss": 0.62693423, "grad_norm": 2.25057507, "learning_rate": 9.177e-05, "token_acc": 0.79978118, "epoch": 2.25984252, "global_step/max_steps": "2009/8890", "percentage": "22.60%", "elapsed_time": "42m 26s", "remaining_time": "2h 25m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788875} {"loss": 0.47394943, "grad_norm": 2.08227372, "learning_rate": 9.176e-05, "token_acc": 0.83308715, "epoch": 2.26096738, "global_step/max_steps": "2010/8890", "percentage": "22.61%", "elapsed_time": "42m 27s", "remaining_time": "2h 25m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78892} {"loss": 0.31784254, "grad_norm": 1.48555779, "learning_rate": 9.175e-05, "token_acc": 0.89016237, "epoch": 2.26209224, "global_step/max_steps": "2011/8890", "percentage": "22.62%", "elapsed_time": "42m 28s", "remaining_time": "2h 25m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788946} {"loss": 0.53402078, "grad_norm": 2.30276775, "learning_rate": 9.174e-05, "token_acc": 0.84410646, "epoch": 2.2632171, "global_step/max_steps": "2012/8890", "percentage": "22.63%", "elapsed_time": "42m 30s", "remaining_time": "2h 25m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788992} {"loss": 0.62416101, "grad_norm": 2.33132577, "learning_rate": 9.173e-05, "token_acc": 0.82190265, "epoch": 2.26434196, "global_step/max_steps": "2013/8890", "percentage": "22.64%", "elapsed_time": "42m 31s", "remaining_time": "2h 25m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788974} {"loss": 0.41121352, "grad_norm": 2.42889142, "learning_rate": 9.172e-05, "token_acc": 0.8601626, "epoch": 2.26546682, "global_step/max_steps": "2014/8890", "percentage": "22.65%", "elapsed_time": "42m 32s", "remaining_time": "2h 25m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789035} {"loss": 0.6116935, "grad_norm": 2.41848683, "learning_rate": 9.171e-05, "token_acc": 0.81828979, "epoch": 2.26659168, "global_step/max_steps": "2015/8890", "percentage": "22.67%", "elapsed_time": "42m 33s", "remaining_time": "2h 25m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789062} {"loss": 0.51966596, "grad_norm": 2.3200624, "learning_rate": 9.17e-05, "token_acc": 0.83277962, "epoch": 2.26771654, "global_step/max_steps": "2016/8890", "percentage": "22.68%", "elapsed_time": "42m 34s", "remaining_time": "2h 25m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789087} {"loss": 0.55755383, "grad_norm": 2.27216053, "learning_rate": 9.169e-05, "token_acc": 0.82241015, "epoch": 2.26884139, "global_step/max_steps": "2017/8890", "percentage": "22.69%", "elapsed_time": "42m 35s", "remaining_time": "2h 25m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789124} {"loss": 0.67838448, "grad_norm": 2.28751278, "learning_rate": 9.168e-05, "token_acc": 0.80057252, "epoch": 2.26996625, "global_step/max_steps": "2018/8890", "percentage": "22.70%", "elapsed_time": "42m 37s", "remaining_time": "2h 25m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78915} {"loss": 0.60368252, "grad_norm": 1.95992041, "learning_rate": 9.167e-05, "token_acc": 0.80890688, "epoch": 2.27109111, "global_step/max_steps": "2019/8890", "percentage": "22.71%", "elapsed_time": "42m 38s", "remaining_time": "2h 25m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78913} {"loss": 0.50117236, "grad_norm": 2.13758373, "learning_rate": 9.166e-05, "token_acc": 0.84785134, "epoch": 2.27221597, "global_step/max_steps": "2020/8890", "percentage": "22.72%", "elapsed_time": "42m 39s", "remaining_time": "2h 25m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789178} {"loss": 0.53768432, "grad_norm": 2.38717771, "learning_rate": 9.165e-05, "token_acc": 0.85251799, "epoch": 2.27334083, "global_step/max_steps": "2021/8890", "percentage": "22.73%", "elapsed_time": "42m 40s", "remaining_time": "2h 25m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789211} {"loss": 0.63273042, "grad_norm": 2.42301345, "learning_rate": 9.164e-05, "token_acc": 0.80724299, "epoch": 2.27446569, "global_step/max_steps": "2022/8890", "percentage": "22.74%", "elapsed_time": "42m 41s", "remaining_time": "2h 25m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78925} {"loss": 0.60783172, "grad_norm": 2.30089879, "learning_rate": 9.163e-05, "token_acc": 0.8137931, "epoch": 2.27559055, "global_step/max_steps": "2023/8890", "percentage": "22.76%", "elapsed_time": "42m 43s", "remaining_time": "2h 25m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789287} {"loss": 0.47697207, "grad_norm": 2.32333279, "learning_rate": 9.162e-05, "token_acc": 0.83809524, "epoch": 2.27671541, "global_step/max_steps": "2024/8890", "percentage": "22.77%", "elapsed_time": "42m 44s", "remaining_time": "2h 24m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789343} {"loss": 0.48776823, "grad_norm": 2.34750104, "learning_rate": 9.161e-05, "token_acc": 0.84548105, "epoch": 2.27784027, "global_step/max_steps": "2025/8890", "percentage": "22.78%", "elapsed_time": "42m 45s", "remaining_time": "2h 24m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78941} {"loss": 0.60375571, "grad_norm": 1.97649729, "learning_rate": 9.16e-05, "token_acc": 0.81515712, "epoch": 2.27896513, "global_step/max_steps": "2026/8890", "percentage": "22.79%", "elapsed_time": "42m 46s", "remaining_time": "2h 24m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789442} {"loss": 0.73875922, "grad_norm": 2.55162024, "learning_rate": 9.159e-05, "token_acc": 0.78039702, "epoch": 2.28008999, "global_step/max_steps": "2027/8890", "percentage": "22.80%", "elapsed_time": "42m 47s", "remaining_time": "2h 24m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789482} {"loss": 0.52843946, "grad_norm": 2.16530609, "learning_rate": 9.158e-05, "token_acc": 0.82227488, "epoch": 2.28121485, "global_step/max_steps": "2028/8890", "percentage": "22.81%", "elapsed_time": "42m 48s", "remaining_time": "2h 24m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789515} {"loss": 0.53341764, "grad_norm": 2.34952068, "learning_rate": 9.157e-05, "token_acc": 0.82894737, "epoch": 2.28233971, "global_step/max_steps": "2029/8890", "percentage": "22.82%", "elapsed_time": "42m 49s", "remaining_time": "2h 24m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789542} {"loss": 0.65269381, "grad_norm": 2.08468461, "learning_rate": 9.156e-05, "token_acc": 0.80555556, "epoch": 2.28346457, "global_step/max_steps": "2030/8890", "percentage": "22.83%", "elapsed_time": "42m 51s", "remaining_time": "2h 24m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789575} {"loss": 0.60867906, "grad_norm": 2.19690704, "learning_rate": 9.155e-05, "token_acc": 0.80769231, "epoch": 2.28458943, "global_step/max_steps": "2031/8890", "percentage": "22.85%", "elapsed_time": "42m 52s", "remaining_time": "2h 24m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789621} {"loss": 0.38563955, "grad_norm": 1.85456228, "learning_rate": 9.154e-05, "token_acc": 0.86838535, "epoch": 2.28571429, "global_step/max_steps": "2032/8890", "percentage": "22.86%", "elapsed_time": "42m 53s", "remaining_time": "2h 24m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789651} {"loss": 0.58806539, "grad_norm": 2.07368731, "learning_rate": 9.153e-05, "token_acc": 0.81230448, "epoch": 2.28683915, "global_step/max_steps": "2033/8890", "percentage": "22.87%", "elapsed_time": "42m 54s", "remaining_time": "2h 24m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789678} {"loss": 0.4005692, "grad_norm": 1.98934722, "learning_rate": 9.152e-05, "token_acc": 0.85452323, "epoch": 2.287964, "global_step/max_steps": "2034/8890", "percentage": "22.88%", "elapsed_time": "42m 55s", "remaining_time": "2h 24m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789822} {"loss": 0.48940128, "grad_norm": 1.90428114, "learning_rate": 9.151e-05, "token_acc": 0.85363248, "epoch": 2.28908886, "global_step/max_steps": "2035/8890", "percentage": "22.89%", "elapsed_time": "42m 56s", "remaining_time": "2h 24m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789855} {"loss": 0.50308281, "grad_norm": 2.27491713, "learning_rate": 9.15e-05, "token_acc": 0.84480432, "epoch": 2.29021372, "global_step/max_steps": "2036/8890", "percentage": "22.90%", "elapsed_time": "42m 57s", "remaining_time": "2h 24m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789898} {"loss": 0.59989291, "grad_norm": 2.71116877, "learning_rate": 9.148e-05, "token_acc": 0.81921618, "epoch": 2.29133858, "global_step/max_steps": "2037/8890", "percentage": "22.91%", "elapsed_time": "42m 58s", "remaining_time": "2h 24m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78994} {"loss": 0.61797065, "grad_norm": 2.87623167, "learning_rate": 9.147e-05, "token_acc": 0.81768559, "epoch": 2.29246344, "global_step/max_steps": "2038/8890", "percentage": "22.92%", "elapsed_time": "42m 59s", "remaining_time": "2h 24m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79003} {"loss": 0.53291178, "grad_norm": 2.25935483, "learning_rate": 9.146e-05, "token_acc": 0.82669537, "epoch": 2.2935883, "global_step/max_steps": "2039/8890", "percentage": "22.94%", "elapsed_time": "43m 0s", "remaining_time": "2h 24m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790057} {"loss": 0.56887162, "grad_norm": 2.21689582, "learning_rate": 9.145e-05, "token_acc": 0.82458234, "epoch": 2.29471316, "global_step/max_steps": "2040/8890", "percentage": "22.95%", "elapsed_time": "43m 1s", "remaining_time": "2h 24m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790094} {"loss": 0.6024664, "grad_norm": 2.19654059, "learning_rate": 9.144e-05, "token_acc": 0.8202765, "epoch": 2.29583802, "global_step/max_steps": "2041/8890", "percentage": "22.96%", "elapsed_time": "43m 3s", "remaining_time": "2h 24m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790133} {"loss": 0.48055333, "grad_norm": 2.35186791, "learning_rate": 9.143e-05, "token_acc": 0.83768116, "epoch": 2.29696288, "global_step/max_steps": "2042/8890", "percentage": "22.97%", "elapsed_time": "43m 4s", "remaining_time": "2h 24m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790165} {"loss": 0.58704424, "grad_norm": 2.31080198, "learning_rate": 9.142e-05, "token_acc": 0.8171926, "epoch": 2.29808774, "global_step/max_steps": "2043/8890", "percentage": "22.98%", "elapsed_time": "43m 5s", "remaining_time": "2h 24m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790243} {"loss": 0.47317156, "grad_norm": 1.99857628, "learning_rate": 9.141e-05, "token_acc": 0.84548422, "epoch": 2.2992126, "global_step/max_steps": "2044/8890", "percentage": "22.99%", "elapsed_time": "43m 6s", "remaining_time": "2h 24m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790323} {"loss": 0.62175995, "grad_norm": 2.27121902, "learning_rate": 9.14e-05, "token_acc": 0.80493033, "epoch": 2.30033746, "global_step/max_steps": "2045/8890", "percentage": "23.00%", "elapsed_time": "43m 7s", "remaining_time": "2h 24m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790366} {"loss": 0.41239303, "grad_norm": 1.78569412, "learning_rate": 9.139e-05, "token_acc": 0.86659878, "epoch": 2.30146232, "global_step/max_steps": "2046/8890", "percentage": "23.01%", "elapsed_time": "43m 8s", "remaining_time": "2h 24m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790411} {"loss": 0.44216287, "grad_norm": 2.36489391, "learning_rate": 9.138e-05, "token_acc": 0.85761589, "epoch": 2.30258718, "global_step/max_steps": "2047/8890", "percentage": "23.03%", "elapsed_time": "43m 9s", "remaining_time": "2h 24m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79048} {"loss": 0.57619369, "grad_norm": 1.74433434, "learning_rate": 9.137e-05, "token_acc": 0.82084942, "epoch": 2.30371204, "global_step/max_steps": "2048/8890", "percentage": "23.04%", "elapsed_time": "43m 10s", "remaining_time": "2h 24m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790439} {"loss": 0.54390424, "grad_norm": 2.27001858, "learning_rate": 9.136e-05, "token_acc": 0.82261905, "epoch": 2.3048369, "global_step/max_steps": "2049/8890", "percentage": "23.05%", "elapsed_time": "43m 12s", "remaining_time": "2h 24m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79031} {"loss": 0.44994164, "grad_norm": 2.02690125, "learning_rate": 9.135e-05, "token_acc": 0.85714286, "epoch": 2.30596175, "global_step/max_steps": "2050/8890", "percentage": "23.06%", "elapsed_time": "43m 13s", "remaining_time": "2h 24m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790333} {"loss": 0.60404295, "grad_norm": 2.32868981, "learning_rate": 9.134e-05, "token_acc": 0.82075472, "epoch": 2.30708661, "global_step/max_steps": "2051/8890", "percentage": "23.07%", "elapsed_time": "43m 15s", "remaining_time": "2h 24m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790362} {"loss": 0.44460544, "grad_norm": 2.32504511, "learning_rate": 9.133e-05, "token_acc": 0.85883905, "epoch": 2.30821147, "global_step/max_steps": "2052/8890", "percentage": "23.08%", "elapsed_time": "43m 16s", "remaining_time": "2h 24m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790399} {"loss": 0.72088277, "grad_norm": 2.37077212, "learning_rate": 9.132e-05, "token_acc": 0.76856316, "epoch": 2.30933633, "global_step/max_steps": "2053/8890", "percentage": "23.09%", "elapsed_time": "43m 17s", "remaining_time": "2h 24m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790431} {"loss": 0.63105798, "grad_norm": 2.1910255, "learning_rate": 9.131e-05, "token_acc": 0.80079286, "epoch": 2.31046119, "global_step/max_steps": "2054/8890", "percentage": "23.10%", "elapsed_time": "43m 18s", "remaining_time": "2h 24m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790457} {"loss": 0.437244, "grad_norm": 1.80846274, "learning_rate": 9.13e-05, "token_acc": 0.84833165, "epoch": 2.31158605, "global_step/max_steps": "2055/8890", "percentage": "23.12%", "elapsed_time": "43m 19s", "remaining_time": "2h 24m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790482} {"loss": 0.44477606, "grad_norm": 1.97967947, "learning_rate": 9.129e-05, "token_acc": 0.85612969, "epoch": 2.31271091, "global_step/max_steps": "2056/8890", "percentage": "23.13%", "elapsed_time": "43m 20s", "remaining_time": "2h 24m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790538} {"loss": 0.62060559, "grad_norm": 2.20070601, "learning_rate": 9.128e-05, "token_acc": 0.81798483, "epoch": 2.31383577, "global_step/max_steps": "2057/8890", "percentage": "23.14%", "elapsed_time": "43m 21s", "remaining_time": "2h 24m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790579} {"loss": 0.53504694, "grad_norm": 2.08275557, "learning_rate": 9.127e-05, "token_acc": 0.83387978, "epoch": 2.31496063, "global_step/max_steps": "2058/8890", "percentage": "23.15%", "elapsed_time": "43m 23s", "remaining_time": "2h 24m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790615} {"loss": 0.63550681, "grad_norm": 2.29959345, "learning_rate": 9.125e-05, "token_acc": 0.79756637, "epoch": 2.31608549, "global_step/max_steps": "2059/8890", "percentage": "23.16%", "elapsed_time": "43m 24s", "remaining_time": "2h 24m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790581} {"loss": 0.69747543, "grad_norm": 2.18987703, "learning_rate": 9.124e-05, "token_acc": 0.79569892, "epoch": 2.31721035, "global_step/max_steps": "2060/8890", "percentage": "23.17%", "elapsed_time": "43m 25s", "remaining_time": "2h 23m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79065} {"loss": 0.76435357, "grad_norm": 2.22865582, "learning_rate": 9.123e-05, "token_acc": 0.7809439, "epoch": 2.31833521, "global_step/max_steps": "2061/8890", "percentage": "23.18%", "elapsed_time": "43m 26s", "remaining_time": "2h 23m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790759} {"loss": 0.53254092, "grad_norm": 2.05330753, "learning_rate": 9.122e-05, "token_acc": 0.83550296, "epoch": 2.31946007, "global_step/max_steps": "2062/8890", "percentage": "23.19%", "elapsed_time": "43m 27s", "remaining_time": "2h 23m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790799} {"loss": 0.56644249, "grad_norm": 2.19860458, "learning_rate": 9.121e-05, "token_acc": 0.83293839, "epoch": 2.32058493, "global_step/max_steps": "2063/8890", "percentage": "23.21%", "elapsed_time": "43m 28s", "remaining_time": "2h 23m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790866} {"loss": 0.48559415, "grad_norm": 1.92125714, "learning_rate": 9.12e-05, "token_acc": 0.86359077, "epoch": 2.32170979, "global_step/max_steps": "2064/8890", "percentage": "23.22%", "elapsed_time": "43m 29s", "remaining_time": "2h 23m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790898} {"loss": 0.38627636, "grad_norm": 1.64743447, "learning_rate": 9.119e-05, "token_acc": 0.85361391, "epoch": 2.32283465, "global_step/max_steps": "2065/8890", "percentage": "23.23%", "elapsed_time": "43m 30s", "remaining_time": "2h 23m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790926} {"loss": 0.50843221, "grad_norm": 2.18554401, "learning_rate": 9.118e-05, "token_acc": 0.83902439, "epoch": 2.32395951, "global_step/max_steps": "2066/8890", "percentage": "23.24%", "elapsed_time": "43m 31s", "remaining_time": "2h 23m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790965} {"loss": 0.6548906, "grad_norm": 2.41791105, "learning_rate": 9.117e-05, "token_acc": 0.8007472, "epoch": 2.32508436, "global_step/max_steps": "2067/8890", "percentage": "23.25%", "elapsed_time": "43m 33s", "remaining_time": "2h 23m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79104} {"loss": 0.43225265, "grad_norm": 2.08060765, "learning_rate": 9.116e-05, "token_acc": 0.86375661, "epoch": 2.32620922, "global_step/max_steps": "2068/8890", "percentage": "23.26%", "elapsed_time": "43m 34s", "remaining_time": "2h 23m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791108} {"loss": 0.47266108, "grad_norm": 2.10515332, "learning_rate": 9.115e-05, "token_acc": 0.85108696, "epoch": 2.32733408, "global_step/max_steps": "2069/8890", "percentage": "23.27%", "elapsed_time": "43m 35s", "remaining_time": "2h 23m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791143} {"loss": 0.52111304, "grad_norm": 2.27947974, "learning_rate": 9.114e-05, "token_acc": 0.84704185, "epoch": 2.32845894, "global_step/max_steps": "2070/8890", "percentage": "23.28%", "elapsed_time": "43m 36s", "remaining_time": "2h 23m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791184} {"loss": 0.4810099, "grad_norm": 2.26276302, "learning_rate": 9.113e-05, "token_acc": 0.83676269, "epoch": 2.3295838, "global_step/max_steps": "2071/8890", "percentage": "23.30%", "elapsed_time": "43m 37s", "remaining_time": "2h 23m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791258} {"loss": 0.57864767, "grad_norm": 2.48114228, "learning_rate": 9.112e-05, "token_acc": 0.82236025, "epoch": 2.33070866, "global_step/max_steps": "2072/8890", "percentage": "23.31%", "elapsed_time": "43m 38s", "remaining_time": "2h 23m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79129} {"loss": 0.54707134, "grad_norm": 2.57915711, "learning_rate": 9.111e-05, "token_acc": 0.81638418, "epoch": 2.33183352, "global_step/max_steps": "2073/8890", "percentage": "23.32%", "elapsed_time": "43m 39s", "remaining_time": "2h 23m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79138} {"loss": 0.41489846, "grad_norm": 2.47932816, "learning_rate": 9.11e-05, "token_acc": 0.85090909, "epoch": 2.33295838, "global_step/max_steps": "2074/8890", "percentage": "23.33%", "elapsed_time": "43m 40s", "remaining_time": "2h 23m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791446} {"loss": 0.37018979, "grad_norm": 2.04787016, "learning_rate": 9.109e-05, "token_acc": 0.873502, "epoch": 2.33408324, "global_step/max_steps": "2075/8890", "percentage": "23.34%", "elapsed_time": "43m 41s", "remaining_time": "2h 23m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791487} {"loss": 0.5278219, "grad_norm": 2.15252829, "learning_rate": 9.108e-05, "token_acc": 0.83438819, "epoch": 2.3352081, "global_step/max_steps": "2076/8890", "percentage": "23.35%", "elapsed_time": "43m 42s", "remaining_time": "2h 23m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791515} {"loss": 0.61567032, "grad_norm": 2.50594544, "learning_rate": 9.106e-05, "token_acc": 0.79902558, "epoch": 2.33633296, "global_step/max_steps": "2077/8890", "percentage": "23.36%", "elapsed_time": "43m 43s", "remaining_time": "2h 23m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791585} {"loss": 0.51603848, "grad_norm": 2.25891471, "learning_rate": 9.105e-05, "token_acc": 0.83631362, "epoch": 2.33745782, "global_step/max_steps": "2078/8890", "percentage": "23.37%", "elapsed_time": "43m 44s", "remaining_time": "2h 23m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791628} {"loss": 0.60027301, "grad_norm": 2.10159326, "learning_rate": 9.104e-05, "token_acc": 0.80393836, "epoch": 2.33858268, "global_step/max_steps": "2079/8890", "percentage": "23.39%", "elapsed_time": "43m 46s", "remaining_time": "2h 23m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791656} {"loss": 0.54754126, "grad_norm": 1.89297152, "learning_rate": 9.103e-05, "token_acc": 0.83497537, "epoch": 2.33970754, "global_step/max_steps": "2080/8890", "percentage": "23.40%", "elapsed_time": "43m 47s", "remaining_time": "2h 23m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791682} {"loss": 0.58262217, "grad_norm": 2.32536101, "learning_rate": 9.102e-05, "token_acc": 0.83292383, "epoch": 2.3408324, "global_step/max_steps": "2081/8890", "percentage": "23.41%", "elapsed_time": "43m 48s", "remaining_time": "2h 23m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791708} {"loss": 0.33469838, "grad_norm": 1.62757838, "learning_rate": 9.101e-05, "token_acc": 0.89914163, "epoch": 2.34195726, "global_step/max_steps": "2082/8890", "percentage": "23.42%", "elapsed_time": "43m 49s", "remaining_time": "2h 23m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791743} {"loss": 0.55956757, "grad_norm": 1.91139698, "learning_rate": 9.1e-05, "token_acc": 0.82717391, "epoch": 2.34308211, "global_step/max_steps": "2083/8890", "percentage": "23.43%", "elapsed_time": "43m 50s", "remaining_time": "2h 23m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791736} {"loss": 0.50516772, "grad_norm": 1.99914944, "learning_rate": 9.099e-05, "token_acc": 0.83817427, "epoch": 2.34420697, "global_step/max_steps": "2084/8890", "percentage": "23.44%", "elapsed_time": "43m 52s", "remaining_time": "2h 23m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791771} {"loss": 0.61144626, "grad_norm": 2.06861043, "learning_rate": 9.098e-05, "token_acc": 0.80916031, "epoch": 2.34533183, "global_step/max_steps": "2085/8890", "percentage": "23.45%", "elapsed_time": "43m 53s", "remaining_time": "2h 23m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791759} {"loss": 0.64722586, "grad_norm": 2.46648788, "learning_rate": 9.097e-05, "token_acc": 0.79495614, "epoch": 2.34645669, "global_step/max_steps": "2086/8890", "percentage": "23.46%", "elapsed_time": "43m 54s", "remaining_time": "2h 23m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791788} {"loss": 0.56889635, "grad_norm": 2.54479313, "learning_rate": 9.096e-05, "token_acc": 0.81262729, "epoch": 2.34758155, "global_step/max_steps": "2087/8890", "percentage": "23.48%", "elapsed_time": "43m 55s", "remaining_time": "2h 23m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791846} {"loss": 0.53626209, "grad_norm": 2.10718036, "learning_rate": 9.095e-05, "token_acc": 0.8242142, "epoch": 2.34870641, "global_step/max_steps": "2088/8890", "percentage": "23.49%", "elapsed_time": "43m 56s", "remaining_time": "2h 23m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791932} {"loss": 0.37988466, "grad_norm": 1.95978844, "learning_rate": 9.094e-05, "token_acc": 0.87352246, "epoch": 2.34983127, "global_step/max_steps": "2089/8890", "percentage": "23.50%", "elapsed_time": "43m 57s", "remaining_time": "2h 23m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792005} {"loss": 0.57764381, "grad_norm": 1.99741578, "learning_rate": 9.093e-05, "token_acc": 0.81399632, "epoch": 2.35095613, "global_step/max_steps": "2090/8890", "percentage": "23.51%", "elapsed_time": "43m 58s", "remaining_time": "2h 23m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791996} {"loss": 0.64784706, "grad_norm": 2.49954653, "learning_rate": 9.092e-05, "token_acc": 0.81018519, "epoch": 2.35208099, "global_step/max_steps": "2091/8890", "percentage": "23.52%", "elapsed_time": "43m 59s", "remaining_time": "2h 23m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792054} {"loss": 0.50575507, "grad_norm": 2.48351717, "learning_rate": 9.091e-05, "token_acc": 0.83857729, "epoch": 2.35320585, "global_step/max_steps": "2092/8890", "percentage": "23.53%", "elapsed_time": "44m 0s", "remaining_time": "2h 23m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792149} {"loss": 0.6578232, "grad_norm": 2.13132453, "learning_rate": 9.089e-05, "token_acc": 0.78800414, "epoch": 2.35433071, "global_step/max_steps": "2093/8890", "percentage": "23.54%", "elapsed_time": "44m 2s", "remaining_time": "2h 23m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792186} {"loss": 0.60673767, "grad_norm": 2.2828815, "learning_rate": 9.088e-05, "token_acc": 0.79726368, "epoch": 2.35545557, "global_step/max_steps": "2094/8890", "percentage": "23.55%", "elapsed_time": "44m 3s", "remaining_time": "2h 22m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792181} {"loss": 0.60961986, "grad_norm": 2.03791666, "learning_rate": 9.087e-05, "token_acc": 0.82046138, "epoch": 2.35658043, "global_step/max_steps": "2095/8890", "percentage": "23.57%", "elapsed_time": "44m 4s", "remaining_time": "2h 22m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792216} {"loss": 0.63013506, "grad_norm": 2.53917456, "learning_rate": 9.086e-05, "token_acc": 0.79839786, "epoch": 2.35770529, "global_step/max_steps": "2096/8890", "percentage": "23.58%", "elapsed_time": "44m 5s", "remaining_time": "2h 22m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792323} {"loss": 0.56441474, "grad_norm": 2.45062947, "learning_rate": 9.085e-05, "token_acc": 0.83611111, "epoch": 2.35883015, "global_step/max_steps": "2097/8890", "percentage": "23.59%", "elapsed_time": "44m 6s", "remaining_time": "2h 22m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792381} {"loss": 0.55348599, "grad_norm": 2.61709142, "learning_rate": 9.084e-05, "token_acc": 0.82343988, "epoch": 2.35995501, "global_step/max_steps": "2098/8890", "percentage": "23.60%", "elapsed_time": "44m 7s", "remaining_time": "2h 22m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792474} {"loss": 0.53147042, "grad_norm": 2.39606833, "learning_rate": 9.083e-05, "token_acc": 0.82782369, "epoch": 2.36107987, "global_step/max_steps": "2099/8890", "percentage": "23.61%", "elapsed_time": "44m 8s", "remaining_time": "2h 22m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792505} {"loss": 0.50821543, "grad_norm": 2.4264884, "learning_rate": 9.082e-05, "token_acc": 0.83899557, "epoch": 2.36220472, "global_step/max_steps": "2100/8890", "percentage": "23.62%", "elapsed_time": "44m 9s", "remaining_time": "2h 22m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792571} {"eval_loss": 0.95309889, "eval_runtime": 31.6719, "eval_samples_per_second": 25.354, "eval_steps_per_second": 3.189, "eval_token_acc": 0.73821302, "epoch": 2.36220472, "global_step/max_steps": "2100/8890", "percentage": "23.62%", "elapsed_time": "44m 41s", "remaining_time": "2h 24m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783206} {"loss": 0.49215978, "grad_norm": 2.23855305, "learning_rate": 9.081e-05, "token_acc": 0.83859223, "epoch": 2.36332958, "global_step/max_steps": "2101/8890", "percentage": "23.63%", "elapsed_time": "44m 56s", "remaining_time": "2h 25m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779184} {"loss": 0.58629835, "grad_norm": 2.39554596, "learning_rate": 9.08e-05, "token_acc": 0.82416107, "epoch": 2.36445444, "global_step/max_steps": "2102/8890", "percentage": "23.64%", "elapsed_time": "44m 57s", "remaining_time": "2h 25m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779233} {"loss": 0.67809665, "grad_norm": 2.3399806, "learning_rate": 9.079e-05, "token_acc": 0.8, "epoch": 2.3655793, "global_step/max_steps": "2103/8890", "percentage": "23.66%", "elapsed_time": "44m 58s", "remaining_time": "2h 25m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779269} {"loss": 0.58279037, "grad_norm": 2.24567914, "learning_rate": 9.078e-05, "token_acc": 0.80044843, "epoch": 2.36670416, "global_step/max_steps": "2104/8890", "percentage": "23.67%", "elapsed_time": "44m 59s", "remaining_time": "2h 25m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779301} {"loss": 0.4727909, "grad_norm": 1.94947362, "learning_rate": 9.077e-05, "token_acc": 0.86107922, "epoch": 2.36782902, "global_step/max_steps": "2105/8890", "percentage": "23.68%", "elapsed_time": "45m 0s", "remaining_time": "2h 25m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779343} {"loss": 0.60783255, "grad_norm": 2.43011737, "learning_rate": 9.075e-05, "token_acc": 0.79621849, "epoch": 2.36895388, "global_step/max_steps": "2106/8890", "percentage": "23.69%", "elapsed_time": "45m 2s", "remaining_time": "2h 25m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779385} {"loss": 0.50008005, "grad_norm": 2.35195899, "learning_rate": 9.074e-05, "token_acc": 0.84036145, "epoch": 2.37007874, "global_step/max_steps": "2107/8890", "percentage": "23.70%", "elapsed_time": "45m 3s", "remaining_time": "2h 25m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779455} {"loss": 0.6117031, "grad_norm": 2.09474635, "learning_rate": 9.073e-05, "token_acc": 0.81291946, "epoch": 2.3712036, "global_step/max_steps": "2108/8890", "percentage": "23.71%", "elapsed_time": "45m 4s", "remaining_time": "2h 25m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779433} {"loss": 0.40011314, "grad_norm": 2.12644362, "learning_rate": 9.072e-05, "token_acc": 0.87626263, "epoch": 2.37232846, "global_step/max_steps": "2109/8890", "percentage": "23.72%", "elapsed_time": "45m 5s", "remaining_time": "2h 24m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779479} {"loss": 0.45172802, "grad_norm": 2.04335666, "learning_rate": 9.071e-05, "token_acc": 0.85104167, "epoch": 2.37345332, "global_step/max_steps": "2110/8890", "percentage": "23.73%", "elapsed_time": "45m 6s", "remaining_time": "2h 24m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779569} {"loss": 0.60166723, "grad_norm": 2.22409844, "learning_rate": 9.07e-05, "token_acc": 0.81023454, "epoch": 2.37457818, "global_step/max_steps": "2111/8890", "percentage": "23.75%", "elapsed_time": "45m 7s", "remaining_time": "2h 24m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779632} {"loss": 0.42386287, "grad_norm": 2.05950522, "learning_rate": 9.069e-05, "token_acc": 0.86153846, "epoch": 2.37570304, "global_step/max_steps": "2112/8890", "percentage": "23.76%", "elapsed_time": "45m 8s", "remaining_time": "2h 24m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779671} {"loss": 0.49067971, "grad_norm": 2.2159009, "learning_rate": 9.068e-05, "token_acc": 0.84501845, "epoch": 2.3768279, "global_step/max_steps": "2113/8890", "percentage": "23.77%", "elapsed_time": "45m 9s", "remaining_time": "2h 24m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779711} {"loss": 0.45288512, "grad_norm": 1.96058297, "learning_rate": 9.067e-05, "token_acc": 0.86417323, "epoch": 2.37795276, "global_step/max_steps": "2114/8890", "percentage": "23.78%", "elapsed_time": "45m 11s", "remaining_time": "2h 24m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779754} {"loss": 0.50301862, "grad_norm": 2.04351521, "learning_rate": 9.066e-05, "token_acc": 0.85046729, "epoch": 2.37907762, "global_step/max_steps": "2115/8890", "percentage": "23.79%", "elapsed_time": "45m 12s", "remaining_time": "2h 24m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779803} {"loss": 0.52796978, "grad_norm": 2.14905643, "learning_rate": 9.065e-05, "token_acc": 0.8314978, "epoch": 2.38020247, "global_step/max_steps": "2116/8890", "percentage": "23.80%", "elapsed_time": "45m 13s", "remaining_time": "2h 24m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779834} {"loss": 0.55549496, "grad_norm": 2.192132, "learning_rate": 9.064e-05, "token_acc": 0.81971154, "epoch": 2.38132733, "global_step/max_steps": "2117/8890", "percentage": "23.81%", "elapsed_time": "45m 14s", "remaining_time": "2h 24m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779871} {"loss": 0.3593381, "grad_norm": 1.87436032, "learning_rate": 9.063e-05, "token_acc": 0.88903394, "epoch": 2.38245219, "global_step/max_steps": "2118/8890", "percentage": "23.82%", "elapsed_time": "45m 15s", "remaining_time": "2h 24m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779941} {"loss": 0.48127744, "grad_norm": 2.00771809, "learning_rate": 9.061e-05, "token_acc": 0.84825328, "epoch": 2.38357705, "global_step/max_steps": "2119/8890", "percentage": "23.84%", "elapsed_time": "45m 16s", "remaining_time": "2h 24m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779987} {"loss": 0.55224878, "grad_norm": 2.06836581, "learning_rate": 9.06e-05, "token_acc": 0.81420118, "epoch": 2.38470191, "global_step/max_steps": "2120/8890", "percentage": "23.85%", "elapsed_time": "45m 17s", "remaining_time": "2h 24m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780055} {"loss": 0.47657371, "grad_norm": 2.05225015, "learning_rate": 9.059e-05, "token_acc": 0.84388646, "epoch": 2.38582677, "global_step/max_steps": "2121/8890", "percentage": "23.86%", "elapsed_time": "45m 18s", "remaining_time": "2h 24m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780096} {"loss": 0.43464321, "grad_norm": 2.03158069, "learning_rate": 9.058e-05, "token_acc": 0.85741265, "epoch": 2.38695163, "global_step/max_steps": "2122/8890", "percentage": "23.87%", "elapsed_time": "45m 20s", "remaining_time": "2h 24m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780112} {"loss": 0.4415957, "grad_norm": 2.64755774, "learning_rate": 9.057e-05, "token_acc": 0.84386617, "epoch": 2.38807649, "global_step/max_steps": "2123/8890", "percentage": "23.88%", "elapsed_time": "45m 20s", "remaining_time": "2h 24m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780262} {"loss": 0.4123866, "grad_norm": 1.98576391, "learning_rate": 9.056e-05, "token_acc": 0.87242798, "epoch": 2.38920135, "global_step/max_steps": "2124/8890", "percentage": "23.89%", "elapsed_time": "45m 21s", "remaining_time": "2h 24m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780341} {"loss": 0.52557456, "grad_norm": 2.3591311, "learning_rate": 9.055e-05, "token_acc": 0.8369863, "epoch": 2.39032621, "global_step/max_steps": "2125/8890", "percentage": "23.90%", "elapsed_time": "45m 22s", "remaining_time": "2h 24m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780407} {"loss": 0.38444805, "grad_norm": 1.72554874, "learning_rate": 9.054e-05, "token_acc": 0.88311688, "epoch": 2.39145107, "global_step/max_steps": "2126/8890", "percentage": "23.91%", "elapsed_time": "45m 24s", "remaining_time": "2h 24m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780399} {"loss": 0.51871538, "grad_norm": 1.89016879, "learning_rate": 9.053e-05, "token_acc": 0.83883129, "epoch": 2.39257593, "global_step/max_steps": "2127/8890", "percentage": "23.93%", "elapsed_time": "45m 25s", "remaining_time": "2h 24m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780444} {"loss": 0.58460712, "grad_norm": 2.09938431, "learning_rate": 9.052e-05, "token_acc": 0.82761905, "epoch": 2.39370079, "global_step/max_steps": "2128/8890", "percentage": "23.94%", "elapsed_time": "45m 26s", "remaining_time": "2h 24m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780486} {"loss": 0.47161484, "grad_norm": 2.05008316, "learning_rate": 9.051e-05, "token_acc": 0.82130178, "epoch": 2.39482565, "global_step/max_steps": "2129/8890", "percentage": "23.95%", "elapsed_time": "45m 27s", "remaining_time": "2h 24m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780526} {"loss": 0.48023412, "grad_norm": 2.26857519, "learning_rate": 9.049e-05, "token_acc": 0.83717775, "epoch": 2.39595051, "global_step/max_steps": "2130/8890", "percentage": "23.96%", "elapsed_time": "45m 28s", "remaining_time": "2h 24m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780559} {"loss": 0.5290063, "grad_norm": 2.0590477, "learning_rate": 9.048e-05, "token_acc": 0.83080513, "epoch": 2.39707537, "global_step/max_steps": "2131/8890", "percentage": "23.97%", "elapsed_time": "45m 29s", "remaining_time": "2h 24m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780596} {"loss": 0.48988169, "grad_norm": 2.07876921, "learning_rate": 9.047e-05, "token_acc": 0.82746879, "epoch": 2.39820022, "global_step/max_steps": "2132/8890", "percentage": "23.98%", "elapsed_time": "45m 31s", "remaining_time": "2h 24m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780629} {"loss": 0.59133089, "grad_norm": 2.36312199, "learning_rate": 9.046e-05, "token_acc": 0.81882353, "epoch": 2.39932508, "global_step/max_steps": "2133/8890", "percentage": "23.99%", "elapsed_time": "45m 32s", "remaining_time": "2h 24m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780698} {"loss": 0.45866704, "grad_norm": 2.05159187, "learning_rate": 9.045e-05, "token_acc": 0.8502994, "epoch": 2.40044994, "global_step/max_steps": "2134/8890", "percentage": "24.00%", "elapsed_time": "45m 33s", "remaining_time": "2h 24m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780747} {"loss": 0.3574695, "grad_norm": 1.75256848, "learning_rate": 9.044e-05, "token_acc": 0.89440204, "epoch": 2.4015748, "global_step/max_steps": "2135/8890", "percentage": "24.02%", "elapsed_time": "45m 34s", "remaining_time": "2h 24m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780823} {"loss": 0.45427874, "grad_norm": 2.12178874, "learning_rate": 9.043e-05, "token_acc": 0.85543608, "epoch": 2.40269966, "global_step/max_steps": "2136/8890", "percentage": "24.03%", "elapsed_time": "45m 35s", "remaining_time": "2h 24m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780931} {"loss": 0.59667653, "grad_norm": 2.35909557, "learning_rate": 9.042e-05, "token_acc": 0.82225237, "epoch": 2.40382452, "global_step/max_steps": "2137/8890", "percentage": "24.04%", "elapsed_time": "45m 36s", "remaining_time": "2h 24m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780965} {"loss": 0.69216335, "grad_norm": 2.43116808, "learning_rate": 9.041e-05, "token_acc": 0.77349398, "epoch": 2.40494938, "global_step/max_steps": "2138/8890", "percentage": "24.05%", "elapsed_time": "45m 37s", "remaining_time": "2h 24m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781029} {"loss": 0.43987095, "grad_norm": 2.08872938, "learning_rate": 9.04e-05, "token_acc": 0.85763098, "epoch": 2.40607424, "global_step/max_steps": "2139/8890", "percentage": "24.06%", "elapsed_time": "45m 38s", "remaining_time": "2h 24m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781183} {"loss": 0.73433077, "grad_norm": 2.07461214, "learning_rate": 9.039e-05, "token_acc": 0.76660839, "epoch": 2.4071991, "global_step/max_steps": "2140/8890", "percentage": "24.07%", "elapsed_time": "45m 39s", "remaining_time": "2h 24m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78118} {"loss": 0.45857847, "grad_norm": 2.10124063, "learning_rate": 9.037e-05, "token_acc": 0.84654731, "epoch": 2.40832396, "global_step/max_steps": "2141/8890", "percentage": "24.08%", "elapsed_time": "45m 40s", "remaining_time": "2h 23m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781214} {"loss": 0.56216347, "grad_norm": 2.3045249, "learning_rate": 9.036e-05, "token_acc": 0.81955763, "epoch": 2.40944882, "global_step/max_steps": "2142/8890", "percentage": "24.09%", "elapsed_time": "45m 41s", "remaining_time": "2h 23m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781276} {"loss": 0.4648194, "grad_norm": 1.96266747, "learning_rate": 9.035e-05, "token_acc": 0.85584563, "epoch": 2.41057368, "global_step/max_steps": "2143/8890", "percentage": "24.11%", "elapsed_time": "45m 42s", "remaining_time": "2h 23m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781342} {"loss": 0.65027773, "grad_norm": 1.864326, "learning_rate": 9.034e-05, "token_acc": 0.80911901, "epoch": 2.41169854, "global_step/max_steps": "2144/8890", "percentage": "24.12%", "elapsed_time": "45m 44s", "remaining_time": "2h 23m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781313} {"loss": 0.53979814, "grad_norm": 2.31841445, "learning_rate": 9.033e-05, "token_acc": 0.83629442, "epoch": 2.4128234, "global_step/max_steps": "2145/8890", "percentage": "24.13%", "elapsed_time": "45m 45s", "remaining_time": "2h 23m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781383} {"loss": 0.68730068, "grad_norm": 2.06228995, "learning_rate": 9.032e-05, "token_acc": 0.80053908, "epoch": 2.41394826, "global_step/max_steps": "2146/8890", "percentage": "24.14%", "elapsed_time": "45m 46s", "remaining_time": "2h 23m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78142} {"loss": 0.57218623, "grad_norm": 1.77775025, "learning_rate": 9.031e-05, "token_acc": 0.82002801, "epoch": 2.41507312, "global_step/max_steps": "2147/8890", "percentage": "24.15%", "elapsed_time": "45m 47s", "remaining_time": "2h 23m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781381} {"loss": 0.55404115, "grad_norm": 2.11999631, "learning_rate": 9.03e-05, "token_acc": 0.82957111, "epoch": 2.41619798, "global_step/max_steps": "2148/8890", "percentage": "24.16%", "elapsed_time": "45m 48s", "remaining_time": "2h 23m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781423} {"loss": 0.58119631, "grad_norm": 1.91097844, "learning_rate": 9.029e-05, "token_acc": 0.82632541, "epoch": 2.41732283, "global_step/max_steps": "2149/8890", "percentage": "24.17%", "elapsed_time": "45m 50s", "remaining_time": "2h 23m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781425} {"loss": 0.54560024, "grad_norm": 2.09823108, "learning_rate": 9.028e-05, "token_acc": 0.81644935, "epoch": 2.41844769, "global_step/max_steps": "2150/8890", "percentage": "24.18%", "elapsed_time": "45m 51s", "remaining_time": "2h 23m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781455} {"loss": 0.39732063, "grad_norm": 1.72951543, "learning_rate": 9.026e-05, "token_acc": 0.8712297, "epoch": 2.41957255, "global_step/max_steps": "2151/8890", "percentage": "24.20%", "elapsed_time": "45m 52s", "remaining_time": "2h 23m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781448} {"loss": 0.59665859, "grad_norm": 2.30195785, "learning_rate": 9.025e-05, "token_acc": 0.82808989, "epoch": 2.42069741, "global_step/max_steps": "2152/8890", "percentage": "24.21%", "elapsed_time": "45m 53s", "remaining_time": "2h 23m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781478} {"loss": 0.64289856, "grad_norm": 2.40562701, "learning_rate": 9.024e-05, "token_acc": 0.80381166, "epoch": 2.42182227, "global_step/max_steps": "2153/8890", "percentage": "24.22%", "elapsed_time": "45m 54s", "remaining_time": "2h 23m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78151} {"loss": 0.52437288, "grad_norm": 2.08238697, "learning_rate": 9.023e-05, "token_acc": 0.83718593, "epoch": 2.42294713, "global_step/max_steps": "2154/8890", "percentage": "24.23%", "elapsed_time": "45m 56s", "remaining_time": "2h 23m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781547} {"loss": 0.55554712, "grad_norm": 2.07332754, "learning_rate": 9.022e-05, "token_acc": 0.82708333, "epoch": 2.42407199, "global_step/max_steps": "2155/8890", "percentage": "24.24%", "elapsed_time": "45m 57s", "remaining_time": "2h 23m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781589} {"loss": 0.57417828, "grad_norm": 2.68836308, "learning_rate": 9.021e-05, "token_acc": 0.82335766, "epoch": 2.42519685, "global_step/max_steps": "2156/8890", "percentage": "24.25%", "elapsed_time": "45m 58s", "remaining_time": "2h 23m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78166} {"loss": 0.60219377, "grad_norm": 1.94225013, "learning_rate": 9.02e-05, "token_acc": 0.79433368, "epoch": 2.42632171, "global_step/max_steps": "2157/8890", "percentage": "24.26%", "elapsed_time": "45m 59s", "remaining_time": "2h 23m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7817} {"loss": 0.54177636, "grad_norm": 2.01679707, "learning_rate": 9.019e-05, "token_acc": 0.82851446, "epoch": 2.42744657, "global_step/max_steps": "2158/8890", "percentage": "24.27%", "elapsed_time": "46m 0s", "remaining_time": "2h 23m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781763} {"loss": 0.4372285, "grad_norm": 2.31201339, "learning_rate": 9.018e-05, "token_acc": 0.86266094, "epoch": 2.42857143, "global_step/max_steps": "2159/8890", "percentage": "24.29%", "elapsed_time": "46m 1s", "remaining_time": "2h 23m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781844} {"loss": 0.58656222, "grad_norm": 2.56554174, "learning_rate": 9.016e-05, "token_acc": 0.80128205, "epoch": 2.42969629, "global_step/max_steps": "2160/8890", "percentage": "24.30%", "elapsed_time": "46m 2s", "remaining_time": "2h 23m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78191} {"loss": 0.52768695, "grad_norm": 2.02939296, "learning_rate": 9.015e-05, "token_acc": 0.82613169, "epoch": 2.43082115, "global_step/max_steps": "2161/8890", "percentage": "24.31%", "elapsed_time": "46m 3s", "remaining_time": "2h 23m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781945} {"loss": 0.58221781, "grad_norm": 2.13335395, "learning_rate": 9.014e-05, "token_acc": 0.82170543, "epoch": 2.43194601, "global_step/max_steps": "2162/8890", "percentage": "24.32%", "elapsed_time": "46m 4s", "remaining_time": "2h 23m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782024} {"loss": 0.58715242, "grad_norm": 2.02846575, "learning_rate": 9.013e-05, "token_acc": 0.82661668, "epoch": 2.43307087, "global_step/max_steps": "2163/8890", "percentage": "24.33%", "elapsed_time": "46m 5s", "remaining_time": "2h 23m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782066} {"loss": 0.68912864, "grad_norm": 1.81833875, "learning_rate": 9.012e-05, "token_acc": 0.79114391, "epoch": 2.43419573, "global_step/max_steps": "2164/8890", "percentage": "24.34%", "elapsed_time": "46m 7s", "remaining_time": "2h 23m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782055} {"loss": 0.57036805, "grad_norm": 2.25940466, "learning_rate": 9.011e-05, "token_acc": 0.82147651, "epoch": 2.43532058, "global_step/max_steps": "2165/8890", "percentage": "24.35%", "elapsed_time": "46m 8s", "remaining_time": "2h 23m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782127} {"loss": 0.60150945, "grad_norm": 2.3241415, "learning_rate": 9.01e-05, "token_acc": 0.82057716, "epoch": 2.43644544, "global_step/max_steps": "2166/8890", "percentage": "24.36%", "elapsed_time": "46m 9s", "remaining_time": "2h 23m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782229} {"loss": 0.60878873, "grad_norm": 2.13350892, "learning_rate": 9.009e-05, "token_acc": 0.8115942, "epoch": 2.4375703, "global_step/max_steps": "2167/8890", "percentage": "24.38%", "elapsed_time": "46m 10s", "remaining_time": "2h 23m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78231} {"loss": 0.54547024, "grad_norm": 2.02569389, "learning_rate": 9.008e-05, "token_acc": 0.82692308, "epoch": 2.43869516, "global_step/max_steps": "2168/8890", "percentage": "24.39%", "elapsed_time": "46m 11s", "remaining_time": "2h 23m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782345} {"loss": 0.60614169, "grad_norm": 1.91499496, "learning_rate": 9.006e-05, "token_acc": 0.81209677, "epoch": 2.43982002, "global_step/max_steps": "2169/8890", "percentage": "24.40%", "elapsed_time": "46m 12s", "remaining_time": "2h 23m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782375} {"loss": 0.49558559, "grad_norm": 2.48037148, "learning_rate": 9.005e-05, "token_acc": 0.84590164, "epoch": 2.44094488, "global_step/max_steps": "2170/8890", "percentage": "24.41%", "elapsed_time": "46m 13s", "remaining_time": "2h 23m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782401} {"loss": 0.61017865, "grad_norm": 2.2255547, "learning_rate": 9.004e-05, "token_acc": 0.81124072, "epoch": 2.44206974, "global_step/max_steps": "2171/8890", "percentage": "24.42%", "elapsed_time": "46m 14s", "remaining_time": "2h 23m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782428} {"loss": 0.5622412, "grad_norm": 2.4368999, "learning_rate": 9.003e-05, "token_acc": 0.8185654, "epoch": 2.4431946, "global_step/max_steps": "2172/8890", "percentage": "24.43%", "elapsed_time": "46m 15s", "remaining_time": "2h 23m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782471} {"loss": 0.46462721, "grad_norm": 2.00488424, "learning_rate": 9.002e-05, "token_acc": 0.86328556, "epoch": 2.44431946, "global_step/max_steps": "2173/8890", "percentage": "24.44%", "elapsed_time": "46m 16s", "remaining_time": "2h 23m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782504} {"loss": 0.37168357, "grad_norm": 2.01709747, "learning_rate": 9.001e-05, "token_acc": 0.88135593, "epoch": 2.44544432, "global_step/max_steps": "2174/8890", "percentage": "24.45%", "elapsed_time": "46m 17s", "remaining_time": "2h 23m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782595} {"loss": 0.37666795, "grad_norm": 1.7867763, "learning_rate": 9e-05, "token_acc": 0.87284895, "epoch": 2.44656918, "global_step/max_steps": "2175/8890", "percentage": "24.47%", "elapsed_time": "46m 18s", "remaining_time": "2h 22m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782659} {"loss": 0.36458147, "grad_norm": 1.69062245, "learning_rate": 8.999e-05, "token_acc": 0.88438735, "epoch": 2.44769404, "global_step/max_steps": "2176/8890", "percentage": "24.48%", "elapsed_time": "46m 20s", "remaining_time": "2h 22m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782654} {"loss": 0.57018209, "grad_norm": 2.24505782, "learning_rate": 8.998e-05, "token_acc": 0.81560284, "epoch": 2.4488189, "global_step/max_steps": "2177/8890", "percentage": "24.49%", "elapsed_time": "46m 21s", "remaining_time": "2h 22m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782686} {"loss": 0.40541402, "grad_norm": 1.8268944, "learning_rate": 8.996e-05, "token_acc": 0.86268344, "epoch": 2.44994376, "global_step/max_steps": "2178/8890", "percentage": "24.50%", "elapsed_time": "46m 22s", "remaining_time": "2h 22m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782763} {"loss": 0.48905876, "grad_norm": 2.49159265, "learning_rate": 8.995e-05, "token_acc": 0.82995951, "epoch": 2.45106862, "global_step/max_steps": "2179/8890", "percentage": "24.51%", "elapsed_time": "46m 23s", "remaining_time": "2h 22m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782835} {"loss": 0.58698153, "grad_norm": 2.35495281, "learning_rate": 8.994e-05, "token_acc": 0.83076923, "epoch": 2.45219348, "global_step/max_steps": "2180/8890", "percentage": "24.52%", "elapsed_time": "46m 24s", "remaining_time": "2h 22m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782875} {"loss": 0.59559429, "grad_norm": 2.2599442, "learning_rate": 8.993e-05, "token_acc": 0.81862745, "epoch": 2.45331834, "global_step/max_steps": "2181/8890", "percentage": "24.53%", "elapsed_time": "46m 25s", "remaining_time": "2h 22m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782912} {"loss": 0.57870412, "grad_norm": 2.41667986, "learning_rate": 8.992e-05, "token_acc": 0.81884945, "epoch": 2.45444319, "global_step/max_steps": "2182/8890", "percentage": "24.54%", "elapsed_time": "46m 26s", "remaining_time": "2h 22m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782981} {"loss": 0.61193979, "grad_norm": 2.54580402, "learning_rate": 8.991e-05, "token_acc": 0.80205279, "epoch": 2.45556805, "global_step/max_steps": "2183/8890", "percentage": "24.56%", "elapsed_time": "46m 27s", "remaining_time": "2h 22m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78302} {"loss": 0.47635615, "grad_norm": 2.03524923, "learning_rate": 8.99e-05, "token_acc": 0.83479961, "epoch": 2.45669291, "global_step/max_steps": "2184/8890", "percentage": "24.57%", "elapsed_time": "46m 29s", "remaining_time": "2h 22m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783052} {"loss": 0.49398834, "grad_norm": 2.34953833, "learning_rate": 8.989e-05, "token_acc": 0.84501845, "epoch": 2.45781777, "global_step/max_steps": "2185/8890", "percentage": "24.58%", "elapsed_time": "46m 30s", "remaining_time": "2h 22m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783087} {"loss": 0.65121263, "grad_norm": 2.4535706, "learning_rate": 8.987e-05, "token_acc": 0.80093677, "epoch": 2.45894263, "global_step/max_steps": "2186/8890", "percentage": "24.59%", "elapsed_time": "46m 31s", "remaining_time": "2h 22m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783146} {"loss": 0.60848558, "grad_norm": 1.86028504, "learning_rate": 8.986e-05, "token_acc": 0.80888575, "epoch": 2.46006749, "global_step/max_steps": "2187/8890", "percentage": "24.60%", "elapsed_time": "46m 32s", "remaining_time": "2h 22m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783174} {"loss": 0.54779971, "grad_norm": 2.12962031, "learning_rate": 8.985e-05, "token_acc": 0.82740586, "epoch": 2.46119235, "global_step/max_steps": "2188/8890", "percentage": "24.61%", "elapsed_time": "46m 33s", "remaining_time": "2h 22m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783207} {"loss": 0.46473068, "grad_norm": 2.09995604, "learning_rate": 8.984e-05, "token_acc": 0.84699454, "epoch": 2.46231721, "global_step/max_steps": "2189/8890", "percentage": "24.62%", "elapsed_time": "46m 34s", "remaining_time": "2h 22m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783239} {"loss": 0.60784829, "grad_norm": 2.15118384, "learning_rate": 8.983e-05, "token_acc": 0.80506823, "epoch": 2.46344207, "global_step/max_steps": "2190/8890", "percentage": "24.63%", "elapsed_time": "46m 35s", "remaining_time": "2h 22m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783274} {"loss": 0.70408654, "grad_norm": 2.52396584, "learning_rate": 8.982e-05, "token_acc": 0.7905824, "epoch": 2.46456693, "global_step/max_steps": "2191/8890", "percentage": "24.65%", "elapsed_time": "46m 36s", "remaining_time": "2h 22m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783345} {"loss": 0.53740728, "grad_norm": 2.30301738, "learning_rate": 8.981e-05, "token_acc": 0.82937063, "epoch": 2.46569179, "global_step/max_steps": "2192/8890", "percentage": "24.66%", "elapsed_time": "46m 38s", "remaining_time": "2h 22m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783389} {"loss": 0.61473125, "grad_norm": 2.14210081, "learning_rate": 8.98e-05, "token_acc": 0.81279621, "epoch": 2.46681665, "global_step/max_steps": "2193/8890", "percentage": "24.67%", "elapsed_time": "46m 39s", "remaining_time": "2h 22m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783436} {"loss": 0.62424076, "grad_norm": 2.26887488, "learning_rate": 8.978e-05, "token_acc": 0.81155779, "epoch": 2.46794151, "global_step/max_steps": "2194/8890", "percentage": "24.68%", "elapsed_time": "46m 40s", "remaining_time": "2h 22m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783482} {"loss": 0.66789991, "grad_norm": 2.80525637, "learning_rate": 8.977e-05, "token_acc": 0.79513185, "epoch": 2.46906637, "global_step/max_steps": "2195/8890", "percentage": "24.69%", "elapsed_time": "46m 41s", "remaining_time": "2h 22m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783544} {"loss": 0.61103392, "grad_norm": 2.309762, "learning_rate": 8.976e-05, "token_acc": 0.80455635, "epoch": 2.47019123, "global_step/max_steps": "2196/8890", "percentage": "24.70%", "elapsed_time": "46m 42s", "remaining_time": "2h 22m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783608} {"loss": 0.57734394, "grad_norm": 2.06750655, "learning_rate": 8.975e-05, "token_acc": 0.81989529, "epoch": 2.47131609, "global_step/max_steps": "2197/8890", "percentage": "24.71%", "elapsed_time": "46m 43s", "remaining_time": "2h 22m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783676} {"loss": 0.47416496, "grad_norm": 1.80074584, "learning_rate": 8.974e-05, "token_acc": 0.85714286, "epoch": 2.47244094, "global_step/max_steps": "2198/8890", "percentage": "24.72%", "elapsed_time": "46m 44s", "remaining_time": "2h 22m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783705} {"loss": 0.55570078, "grad_norm": 1.89182949, "learning_rate": 8.973e-05, "token_acc": 0.83303085, "epoch": 2.4735658, "global_step/max_steps": "2199/8890", "percentage": "24.74%", "elapsed_time": "46m 45s", "remaining_time": "2h 22m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783767} {"loss": 0.56936789, "grad_norm": 2.47022748, "learning_rate": 8.972e-05, "token_acc": 0.81457801, "epoch": 2.47469066, "global_step/max_steps": "2200/8890", "percentage": "24.75%", "elapsed_time": "46m 46s", "remaining_time": "2h 22m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783812} {"loss": 0.54187292, "grad_norm": 1.99977207, "learning_rate": 8.971e-05, "token_acc": 0.81385281, "epoch": 2.47581552, "global_step/max_steps": "2201/8890", "percentage": "24.76%", "elapsed_time": "46m 47s", "remaining_time": "2h 22m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783845} {"loss": 0.55537033, "grad_norm": 2.37066698, "learning_rate": 8.969e-05, "token_acc": 0.80945946, "epoch": 2.47694038, "global_step/max_steps": "2202/8890", "percentage": "24.77%", "elapsed_time": "46m 49s", "remaining_time": "2h 22m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783876} {"loss": 0.54945755, "grad_norm": 2.10487127, "learning_rate": 8.968e-05, "token_acc": 0.83443709, "epoch": 2.47806524, "global_step/max_steps": "2203/8890", "percentage": "24.78%", "elapsed_time": "46m 50s", "remaining_time": "2h 22m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783947} {"loss": 0.57783443, "grad_norm": 2.0831964, "learning_rate": 8.967e-05, "token_acc": 0.81489842, "epoch": 2.4791901, "global_step/max_steps": "2204/8890", "percentage": "24.79%", "elapsed_time": "46m 51s", "remaining_time": "2h 22m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78401} {"loss": 0.569875, "grad_norm": 2.03066349, "learning_rate": 8.966e-05, "token_acc": 0.83206107, "epoch": 2.48031496, "global_step/max_steps": "2205/8890", "percentage": "24.80%", "elapsed_time": "46m 52s", "remaining_time": "2h 22m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784036} {"loss": 0.41955602, "grad_norm": 1.78852701, "learning_rate": 8.965e-05, "token_acc": 0.86234357, "epoch": 2.48143982, "global_step/max_steps": "2206/8890", "percentage": "24.81%", "elapsed_time": "46m 53s", "remaining_time": "2h 22m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784063} {"loss": 0.56250733, "grad_norm": 2.55868936, "learning_rate": 8.964e-05, "token_acc": 0.8156682, "epoch": 2.48256468, "global_step/max_steps": "2207/8890", "percentage": "24.83%", "elapsed_time": "46m 54s", "remaining_time": "2h 22m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784127} {"loss": 0.74050015, "grad_norm": 2.063205, "learning_rate": 8.963e-05, "token_acc": 0.77629382, "epoch": 2.48368954, "global_step/max_steps": "2208/8890", "percentage": "24.84%", "elapsed_time": "46m 55s", "remaining_time": "2h 22m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784108} {"loss": 0.51388049, "grad_norm": 2.39162326, "learning_rate": 8.962e-05, "token_acc": 0.82272727, "epoch": 2.4848144, "global_step/max_steps": "2209/8890", "percentage": "24.85%", "elapsed_time": "46m 56s", "remaining_time": "2h 21m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784174} {"loss": 0.54132313, "grad_norm": 2.16520739, "learning_rate": 8.96e-05, "token_acc": 0.84140436, "epoch": 2.48593926, "global_step/max_steps": "2210/8890", "percentage": "24.86%", "elapsed_time": "46m 58s", "remaining_time": "2h 21m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784217} {"loss": 0.6875571, "grad_norm": 2.45032668, "learning_rate": 8.959e-05, "token_acc": 0.79247202, "epoch": 2.48706412, "global_step/max_steps": "2211/8890", "percentage": "24.87%", "elapsed_time": "46m 59s", "remaining_time": "2h 21m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784247} {"loss": 0.49639642, "grad_norm": 2.16167212, "learning_rate": 8.958e-05, "token_acc": 0.84258065, "epoch": 2.48818898, "global_step/max_steps": "2212/8890", "percentage": "24.88%", "elapsed_time": "47m 0s", "remaining_time": "2h 21m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784281} {"loss": 0.5133971, "grad_norm": 2.31847572, "learning_rate": 8.957e-05, "token_acc": 0.83613445, "epoch": 2.48931384, "global_step/max_steps": "2213/8890", "percentage": "24.89%", "elapsed_time": "47m 1s", "remaining_time": "2h 21m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784347} {"loss": 0.50991172, "grad_norm": 2.49126744, "learning_rate": 8.956e-05, "token_acc": 0.83703704, "epoch": 2.4904387, "global_step/max_steps": "2214/8890", "percentage": "24.90%", "elapsed_time": "47m 2s", "remaining_time": "2h 21m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784408} {"loss": 0.52406752, "grad_norm": 2.11899352, "learning_rate": 8.955e-05, "token_acc": 0.8340564, "epoch": 2.49156355, "global_step/max_steps": "2215/8890", "percentage": "24.92%", "elapsed_time": "47m 3s", "remaining_time": "2h 21m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784472} {"loss": 0.42925328, "grad_norm": 1.98547888, "learning_rate": 8.954e-05, "token_acc": 0.86625, "epoch": 2.49268841, "global_step/max_steps": "2216/8890", "percentage": "24.93%", "elapsed_time": "47m 4s", "remaining_time": "2h 21m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784533} {"loss": 0.53484404, "grad_norm": 2.36782074, "learning_rate": 8.952e-05, "token_acc": 0.80913349, "epoch": 2.49381327, "global_step/max_steps": "2217/8890", "percentage": "24.94%", "elapsed_time": "47m 5s", "remaining_time": "2h 21m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784566} {"loss": 0.53197515, "grad_norm": 2.19008446, "learning_rate": 8.951e-05, "token_acc": 0.82118451, "epoch": 2.49493813, "global_step/max_steps": "2218/8890", "percentage": "24.95%", "elapsed_time": "47m 7s", "remaining_time": "2h 21m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784562} {"loss": 0.71974254, "grad_norm": 2.17990232, "learning_rate": 8.95e-05, "token_acc": 0.78508772, "epoch": 2.49606299, "global_step/max_steps": "2219/8890", "percentage": "24.96%", "elapsed_time": "47m 8s", "remaining_time": "2h 21m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784625} {"loss": 0.52142382, "grad_norm": 1.84695756, "learning_rate": 8.949e-05, "token_acc": 0.85127737, "epoch": 2.49718785, "global_step/max_steps": "2220/8890", "percentage": "24.97%", "elapsed_time": "47m 9s", "remaining_time": "2h 21m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784653} {"loss": 0.62424481, "grad_norm": 2.03709269, "learning_rate": 8.948e-05, "token_acc": 0.80868025, "epoch": 2.49831271, "global_step/max_steps": "2221/8890", "percentage": "24.98%", "elapsed_time": "47m 10s", "remaining_time": "2h 21m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78464} {"loss": 0.56113112, "grad_norm": 2.37548232, "learning_rate": 8.947e-05, "token_acc": 0.81606218, "epoch": 2.49943757, "global_step/max_steps": "2222/8890", "percentage": "24.99%", "elapsed_time": "47m 11s", "remaining_time": "2h 21m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784723} {"loss": 0.52217966, "grad_norm": 1.90764534, "learning_rate": 8.946e-05, "token_acc": 0.83714547, "epoch": 2.50056243, "global_step/max_steps": "2223/8890", "percentage": "25.01%", "elapsed_time": "47m 12s", "remaining_time": "2h 21m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784764} {"loss": 0.4026978, "grad_norm": 2.12491179, "learning_rate": 8.944e-05, "token_acc": 0.85454545, "epoch": 2.50168729, "global_step/max_steps": "2224/8890", "percentage": "25.02%", "elapsed_time": "47m 13s", "remaining_time": "2h 21m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784832} {"loss": 0.55014569, "grad_norm": 2.08460927, "learning_rate": 8.943e-05, "token_acc": 0.82134831, "epoch": 2.50281215, "global_step/max_steps": "2225/8890", "percentage": "25.03%", "elapsed_time": "47m 14s", "remaining_time": "2h 21m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784867} {"loss": 0.46247065, "grad_norm": 2.42034101, "learning_rate": 8.942e-05, "token_acc": 0.84871407, "epoch": 2.50393701, "global_step/max_steps": "2226/8890", "percentage": "25.04%", "elapsed_time": "47m 15s", "remaining_time": "2h 21m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784946} {"loss": 0.48673424, "grad_norm": 1.91880584, "learning_rate": 8.941e-05, "token_acc": 0.83951965, "epoch": 2.50506187, "global_step/max_steps": "2227/8890", "percentage": "25.05%", "elapsed_time": "47m 16s", "remaining_time": "2h 21m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784986} {"loss": 0.69826907, "grad_norm": 2.45663548, "learning_rate": 8.94e-05, "token_acc": 0.78361858, "epoch": 2.50618673, "global_step/max_steps": "2228/8890", "percentage": "25.06%", "elapsed_time": "47m 18s", "remaining_time": "2h 21m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785024} {"loss": 0.78443635, "grad_norm": 1.75352275, "learning_rate": 8.939e-05, "token_acc": 0.77946768, "epoch": 2.50731159, "global_step/max_steps": "2229/8890", "percentage": "25.07%", "elapsed_time": "47m 19s", "remaining_time": "2h 21m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78502} {"loss": 0.59435147, "grad_norm": 2.11198616, "learning_rate": 8.938e-05, "token_acc": 0.80638916, "epoch": 2.50843645, "global_step/max_steps": "2230/8890", "percentage": "25.08%", "elapsed_time": "47m 20s", "remaining_time": "2h 21m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785086} {"loss": 0.51181054, "grad_norm": 1.8583616, "learning_rate": 8.936e-05, "token_acc": 0.85993821, "epoch": 2.5095613, "global_step/max_steps": "2231/8890", "percentage": "25.10%", "elapsed_time": "47m 21s", "remaining_time": "2h 21m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785111} {"loss": 0.42158222, "grad_norm": 1.98411202, "learning_rate": 8.935e-05, "token_acc": 0.86683739, "epoch": 2.51068616, "global_step/max_steps": "2232/8890", "percentage": "25.11%", "elapsed_time": "47m 22s", "remaining_time": "2h 21m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78515} {"loss": 0.48543888, "grad_norm": 2.07157278, "learning_rate": 8.934e-05, "token_acc": 0.84725849, "epoch": 2.51181102, "global_step/max_steps": "2233/8890", "percentage": "25.12%", "elapsed_time": "47m 23s", "remaining_time": "2h 21m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785187} {"loss": 0.44682431, "grad_norm": 2.02904654, "learning_rate": 8.933e-05, "token_acc": 0.84328358, "epoch": 2.51293588, "global_step/max_steps": "2234/8890", "percentage": "25.13%", "elapsed_time": "47m 25s", "remaining_time": "2h 21m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785223} {"loss": 0.54122561, "grad_norm": 1.92364454, "learning_rate": 8.932e-05, "token_acc": 0.83350569, "epoch": 2.51406074, "global_step/max_steps": "2235/8890", "percentage": "25.14%", "elapsed_time": "47m 26s", "remaining_time": "2h 21m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785256} {"loss": 0.51560014, "grad_norm": 2.33129215, "learning_rate": 8.931e-05, "token_acc": 0.81932773, "epoch": 2.5151856, "global_step/max_steps": "2236/8890", "percentage": "25.15%", "elapsed_time": "47m 27s", "remaining_time": "2h 21m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785298} {"loss": 0.54797655, "grad_norm": 2.16791248, "learning_rate": 8.93e-05, "token_acc": 0.82823529, "epoch": 2.51631046, "global_step/max_steps": "2237/8890", "percentage": "25.16%", "elapsed_time": "47m 28s", "remaining_time": "2h 21m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785337} {"loss": 0.55616105, "grad_norm": 2.09685683, "learning_rate": 8.928e-05, "token_acc": 0.8130531, "epoch": 2.51743532, "global_step/max_steps": "2238/8890", "percentage": "25.17%", "elapsed_time": "47m 29s", "remaining_time": "2h 21m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785372} {"loss": 0.44148362, "grad_norm": 2.15118194, "learning_rate": 8.927e-05, "token_acc": 0.85549133, "epoch": 2.51856018, "global_step/max_steps": "2239/8890", "percentage": "25.19%", "elapsed_time": "47m 30s", "remaining_time": "2h 21m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78544} {"loss": 0.49140292, "grad_norm": 2.2203362, "learning_rate": 8.926e-05, "token_acc": 0.84284051, "epoch": 2.51968504, "global_step/max_steps": "2240/8890", "percentage": "25.20%", "elapsed_time": "47m 31s", "remaining_time": "2h 21m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785464} {"loss": 0.48923504, "grad_norm": 2.07616138, "learning_rate": 8.925e-05, "token_acc": 0.83432658, "epoch": 2.5208099, "global_step/max_steps": "2241/8890", "percentage": "25.21%", "elapsed_time": "47m 32s", "remaining_time": "2h 21m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785497} {"loss": 0.50502592, "grad_norm": 2.25138021, "learning_rate": 8.924e-05, "token_acc": 0.84096386, "epoch": 2.52193476, "global_step/max_steps": "2242/8890", "percentage": "25.22%", "elapsed_time": "47m 33s", "remaining_time": "2h 21m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785567} {"loss": 0.4277193, "grad_norm": 2.10985923, "learning_rate": 8.923e-05, "token_acc": 0.85696041, "epoch": 2.52305962, "global_step/max_steps": "2243/8890", "percentage": "25.23%", "elapsed_time": "47m 35s", "remaining_time": "2h 21m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785599} {"loss": 0.52407801, "grad_norm": 2.06543946, "learning_rate": 8.921e-05, "token_acc": 0.83956574, "epoch": 2.52418448, "global_step/max_steps": "2244/8890", "percentage": "25.24%", "elapsed_time": "47m 36s", "remaining_time": "2h 20m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785657} {"loss": 0.75601947, "grad_norm": 2.49900293, "learning_rate": 8.92e-05, "token_acc": 0.77572254, "epoch": 2.52530934, "global_step/max_steps": "2245/8890", "percentage": "25.25%", "elapsed_time": "47m 37s", "remaining_time": "2h 20m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785695} {"loss": 0.43470743, "grad_norm": 2.09366727, "learning_rate": 8.919e-05, "token_acc": 0.85341074, "epoch": 2.5264342, "global_step/max_steps": "2246/8890", "percentage": "25.26%", "elapsed_time": "47m 38s", "remaining_time": "2h 20m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785728} {"loss": 0.38096288, "grad_norm": 2.22405243, "learning_rate": 8.918e-05, "token_acc": 0.85908319, "epoch": 2.52755906, "global_step/max_steps": "2247/8890", "percentage": "25.28%", "elapsed_time": "47m 39s", "remaining_time": "2h 20m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785767} {"loss": 0.52337593, "grad_norm": 2.09296274, "learning_rate": 8.917e-05, "token_acc": 0.82900943, "epoch": 2.52868391, "global_step/max_steps": "2248/8890", "percentage": "25.29%", "elapsed_time": "47m 40s", "remaining_time": "2h 20m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785799} {"loss": 0.59488785, "grad_norm": 2.44521546, "learning_rate": 8.916e-05, "token_acc": 0.80666667, "epoch": 2.52980877, "global_step/max_steps": "2249/8890", "percentage": "25.30%", "elapsed_time": "47m 41s", "remaining_time": "2h 20m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785856} {"loss": 0.43832082, "grad_norm": 1.92820215, "learning_rate": 8.915e-05, "token_acc": 0.85363357, "epoch": 2.53093363, "global_step/max_steps": "2250/8890", "percentage": "25.31%", "elapsed_time": "47m 43s", "remaining_time": "2h 20m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785887} {"loss": 0.42973438, "grad_norm": 2.22358084, "learning_rate": 8.913e-05, "token_acc": 0.86094675, "epoch": 2.53205849, "global_step/max_steps": "2251/8890", "percentage": "25.32%", "elapsed_time": "47m 43s", "remaining_time": "2h 20m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785991} {"loss": 0.60224915, "grad_norm": 2.46711659, "learning_rate": 8.912e-05, "token_acc": 0.81578947, "epoch": 2.53318335, "global_step/max_steps": "2252/8890", "percentage": "25.33%", "elapsed_time": "47m 44s", "remaining_time": "2h 20m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786052} {"loss": 0.6043613, "grad_norm": 2.09988666, "learning_rate": 8.911e-05, "token_acc": 0.8299389, "epoch": 2.53430821, "global_step/max_steps": "2253/8890", "percentage": "25.34%", "elapsed_time": "47m 46s", "remaining_time": "2h 20m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786083} {"loss": 0.52812719, "grad_norm": 2.14861155, "learning_rate": 8.91e-05, "token_acc": 0.83289125, "epoch": 2.53543307, "global_step/max_steps": "2254/8890", "percentage": "25.35%", "elapsed_time": "47m 47s", "remaining_time": "2h 20m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786146} {"loss": 0.47263479, "grad_norm": 2.30843163, "learning_rate": 8.909e-05, "token_acc": 0.85569986, "epoch": 2.53655793, "global_step/max_steps": "2255/8890", "percentage": "25.37%", "elapsed_time": "47m 48s", "remaining_time": "2h 20m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786209} {"loss": 0.44863349, "grad_norm": 1.7087965, "learning_rate": 8.908e-05, "token_acc": 0.85195531, "epoch": 2.53768279, "global_step/max_steps": "2256/8890", "percentage": "25.38%", "elapsed_time": "47m 49s", "remaining_time": "2h 20m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78627} {"loss": 0.56414807, "grad_norm": 2.05393219, "learning_rate": 8.906e-05, "token_acc": 0.82579934, "epoch": 2.53880765, "global_step/max_steps": "2257/8890", "percentage": "25.39%", "elapsed_time": "47m 50s", "remaining_time": "2h 20m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786329} {"loss": 0.45900899, "grad_norm": 2.01709914, "learning_rate": 8.905e-05, "token_acc": 0.86582278, "epoch": 2.53993251, "global_step/max_steps": "2258/8890", "percentage": "25.40%", "elapsed_time": "47m 51s", "remaining_time": "2h 20m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786366} {"loss": 0.54238373, "grad_norm": 2.06895566, "learning_rate": 8.904e-05, "token_acc": 0.83168317, "epoch": 2.54105737, "global_step/max_steps": "2259/8890", "percentage": "25.41%", "elapsed_time": "47m 52s", "remaining_time": "2h 20m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786403} {"loss": 0.49332523, "grad_norm": 1.76886821, "learning_rate": 8.903e-05, "token_acc": 0.84812623, "epoch": 2.54218223, "global_step/max_steps": "2260/8890", "percentage": "25.42%", "elapsed_time": "47m 53s", "remaining_time": "2h 20m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786442} {"loss": 0.60621989, "grad_norm": 2.35184455, "learning_rate": 8.902e-05, "token_acc": 0.8168028, "epoch": 2.54330709, "global_step/max_steps": "2261/8890", "percentage": "25.43%", "elapsed_time": "47m 54s", "remaining_time": "2h 20m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786479} {"loss": 0.59903544, "grad_norm": 2.32738805, "learning_rate": 8.901e-05, "token_acc": 0.81395349, "epoch": 2.54443195, "global_step/max_steps": "2262/8890", "percentage": "25.44%", "elapsed_time": "47m 55s", "remaining_time": "2h 20m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786539} {"loss": 0.44928545, "grad_norm": 1.84836519, "learning_rate": 8.899e-05, "token_acc": 0.85531915, "epoch": 2.54555681, "global_step/max_steps": "2263/8890", "percentage": "25.46%", "elapsed_time": "47m 57s", "remaining_time": "2h 20m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786566} {"loss": 0.57154381, "grad_norm": 2.20273972, "learning_rate": 8.898e-05, "token_acc": 0.82219706, "epoch": 2.54668166, "global_step/max_steps": "2264/8890", "percentage": "25.47%", "elapsed_time": "47m 58s", "remaining_time": "2h 20m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786605} {"loss": 0.40321043, "grad_norm": 1.72767663, "learning_rate": 8.897e-05, "token_acc": 0.86977648, "epoch": 2.54780652, "global_step/max_steps": "2265/8890", "percentage": "25.48%", "elapsed_time": "47m 59s", "remaining_time": "2h 20m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786631} {"loss": 0.62539423, "grad_norm": 2.28819871, "learning_rate": 8.896e-05, "token_acc": 0.8030303, "epoch": 2.54893138, "global_step/max_steps": "2266/8890", "percentage": "25.49%", "elapsed_time": "48m 0s", "remaining_time": "2h 20m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786683} {"loss": 0.47743449, "grad_norm": 2.04519606, "learning_rate": 8.895e-05, "token_acc": 0.85074627, "epoch": 2.55005624, "global_step/max_steps": "2267/8890", "percentage": "25.50%", "elapsed_time": "48m 1s", "remaining_time": "2h 20m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786757} {"loss": 0.51608336, "grad_norm": 2.32949638, "learning_rate": 8.894e-05, "token_acc": 0.83232628, "epoch": 2.5511811, "global_step/max_steps": "2268/8890", "percentage": "25.51%", "elapsed_time": "48m 2s", "remaining_time": "2h 20m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786795} {"loss": 0.5043627, "grad_norm": 2.41798615, "learning_rate": 8.892e-05, "token_acc": 0.82566248, "epoch": 2.55230596, "global_step/max_steps": "2269/8890", "percentage": "25.52%", "elapsed_time": "48m 3s", "remaining_time": "2h 20m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786894} {"loss": 0.53251541, "grad_norm": 1.92264175, "learning_rate": 8.891e-05, "token_acc": 0.83003597, "epoch": 2.55343082, "global_step/max_steps": "2270/8890", "percentage": "25.53%", "elapsed_time": "48m 4s", "remaining_time": "2h 20m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786925} {"loss": 0.57496655, "grad_norm": 2.7535181, "learning_rate": 8.89e-05, "token_acc": 0.8363047, "epoch": 2.55455568, "global_step/max_steps": "2271/8890", "percentage": "25.55%", "elapsed_time": "48m 5s", "remaining_time": "2h 20m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787055} {"loss": 0.70559514, "grad_norm": 2.1693027, "learning_rate": 8.889e-05, "token_acc": 0.79543459, "epoch": 2.55568054, "global_step/max_steps": "2272/8890", "percentage": "25.56%", "elapsed_time": "48m 6s", "remaining_time": "2h 20m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787088} {"loss": 0.3947401, "grad_norm": 1.83895051, "learning_rate": 8.888e-05, "token_acc": 0.87015945, "epoch": 2.5568054, "global_step/max_steps": "2273/8890", "percentage": "25.57%", "elapsed_time": "48m 7s", "remaining_time": "2h 20m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787089} {"loss": 0.52379769, "grad_norm": 2.16526175, "learning_rate": 8.887e-05, "token_acc": 0.82327113, "epoch": 2.55793026, "global_step/max_steps": "2274/8890", "percentage": "25.58%", "elapsed_time": "48m 9s", "remaining_time": "2h 20m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787121} {"loss": 0.57342958, "grad_norm": 2.50653839, "learning_rate": 8.885e-05, "token_acc": 0.82514451, "epoch": 2.55905512, "global_step/max_steps": "2275/8890", "percentage": "25.59%", "elapsed_time": "48m 10s", "remaining_time": "2h 20m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78716} {"loss": 0.62628925, "grad_norm": 2.35524678, "learning_rate": 8.884e-05, "token_acc": 0.80368906, "epoch": 2.56017998, "global_step/max_steps": "2276/8890", "percentage": "25.60%", "elapsed_time": "48m 11s", "remaining_time": "2h 20m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787197} {"loss": 0.58261919, "grad_norm": 2.1957562, "learning_rate": 8.883e-05, "token_acc": 0.82107574, "epoch": 2.56130484, "global_step/max_steps": "2277/8890", "percentage": "25.61%", "elapsed_time": "48m 12s", "remaining_time": "2h 19m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787292} {"loss": 0.61994112, "grad_norm": 2.88041282, "learning_rate": 8.882e-05, "token_acc": 0.80428135, "epoch": 2.5624297, "global_step/max_steps": "2278/8890", "percentage": "25.62%", "elapsed_time": "48m 12s", "remaining_time": "2h 19m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787418} {"loss": 0.64281881, "grad_norm": 2.47939873, "learning_rate": 8.881e-05, "token_acc": 0.81024096, "epoch": 2.56355456, "global_step/max_steps": "2279/8890", "percentage": "25.64%", "elapsed_time": "48m 14s", "remaining_time": "2h 19m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787484} {"loss": 0.58481836, "grad_norm": 1.96888709, "learning_rate": 8.88e-05, "token_acc": 0.82168186, "epoch": 2.56467942, "global_step/max_steps": "2280/8890", "percentage": "25.65%", "elapsed_time": "48m 15s", "remaining_time": "2h 19m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787511} {"loss": 0.52836722, "grad_norm": 2.11525106, "learning_rate": 8.878e-05, "token_acc": 0.82915718, "epoch": 2.56580427, "global_step/max_steps": "2281/8890", "percentage": "25.66%", "elapsed_time": "48m 16s", "remaining_time": "2h 19m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787548} {"loss": 0.5150128, "grad_norm": 2.00477743, "learning_rate": 8.877e-05, "token_acc": 0.84347826, "epoch": 2.56692913, "global_step/max_steps": "2282/8890", "percentage": "25.67%", "elapsed_time": "48m 17s", "remaining_time": "2h 19m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787576} {"loss": 0.36012569, "grad_norm": 1.87556255, "learning_rate": 8.876e-05, "token_acc": 0.87617766, "epoch": 2.56805399, "global_step/max_steps": "2283/8890", "percentage": "25.68%", "elapsed_time": "48m 18s", "remaining_time": "2h 19m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787603} {"loss": 0.62552047, "grad_norm": 2.30524254, "learning_rate": 8.875e-05, "token_acc": 0.80369515, "epoch": 2.56917885, "global_step/max_steps": "2284/8890", "percentage": "25.69%", "elapsed_time": "48m 19s", "remaining_time": "2h 19m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787655} {"loss": 0.55035925, "grad_norm": 1.83666372, "learning_rate": 8.874e-05, "token_acc": 0.81647635, "epoch": 2.57030371, "global_step/max_steps": "2285/8890", "percentage": "25.70%", "elapsed_time": "48m 21s", "remaining_time": "2h 19m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787653} {"loss": 0.56138951, "grad_norm": 2.07356381, "learning_rate": 8.873e-05, "token_acc": 0.8136646, "epoch": 2.57142857, "global_step/max_steps": "2286/8890", "percentage": "25.71%", "elapsed_time": "48m 22s", "remaining_time": "2h 19m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787693} {"loss": 0.45606512, "grad_norm": 1.70975149, "learning_rate": 8.871e-05, "token_acc": 0.84719711, "epoch": 2.57255343, "global_step/max_steps": "2287/8890", "percentage": "25.73%", "elapsed_time": "48m 23s", "remaining_time": "2h 19m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78773} {"loss": 0.4708634, "grad_norm": 2.27770543, "learning_rate": 8.87e-05, "token_acc": 0.84104046, "epoch": 2.57367829, "global_step/max_steps": "2288/8890", "percentage": "25.74%", "elapsed_time": "48m 24s", "remaining_time": "2h 19m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787796} {"loss": 0.52797759, "grad_norm": 1.92845178, "learning_rate": 8.869e-05, "token_acc": 0.83837331, "epoch": 2.57480315, "global_step/max_steps": "2289/8890", "percentage": "25.75%", "elapsed_time": "48m 25s", "remaining_time": "2h 19m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787794} {"loss": 0.50778037, "grad_norm": 2.22226191, "learning_rate": 8.868e-05, "token_acc": 0.84066768, "epoch": 2.57592801, "global_step/max_steps": "2290/8890", "percentage": "25.76%", "elapsed_time": "48m 26s", "remaining_time": "2h 19m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787857} {"loss": 0.4674283, "grad_norm": 1.97520649, "learning_rate": 8.867e-05, "token_acc": 0.84375, "epoch": 2.57705287, "global_step/max_steps": "2291/8890", "percentage": "25.77%", "elapsed_time": "48m 27s", "remaining_time": "2h 19m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787914} {"loss": 0.53149009, "grad_norm": 2.22266483, "learning_rate": 8.865e-05, "token_acc": 0.82663605, "epoch": 2.57817773, "global_step/max_steps": "2292/8890", "percentage": "25.78%", "elapsed_time": "48m 28s", "remaining_time": "2h 19m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787984} {"loss": 0.52241123, "grad_norm": 2.22822642, "learning_rate": 8.864e-05, "token_acc": 0.81818182, "epoch": 2.57930259, "global_step/max_steps": "2293/8890", "percentage": "25.79%", "elapsed_time": "48m 29s", "remaining_time": "2h 19m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788046} {"loss": 0.53469121, "grad_norm": 2.16495013, "learning_rate": 8.863e-05, "token_acc": 0.82326622, "epoch": 2.58042745, "global_step/max_steps": "2294/8890", "percentage": "25.80%", "elapsed_time": "48m 31s", "remaining_time": "2h 19m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788043} {"loss": 0.60602331, "grad_norm": 1.94681549, "learning_rate": 8.862e-05, "token_acc": 0.8155416, "epoch": 2.58155231, "global_step/max_steps": "2295/8890", "percentage": "25.82%", "elapsed_time": "48m 32s", "remaining_time": "2h 19m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788036} {"loss": 0.49939734, "grad_norm": 2.47532821, "learning_rate": 8.861e-05, "token_acc": 0.83021583, "epoch": 2.58267717, "global_step/max_steps": "2296/8890", "percentage": "25.83%", "elapsed_time": "48m 33s", "remaining_time": "2h 19m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788076} {"loss": 0.48186401, "grad_norm": 2.48058319, "learning_rate": 8.86e-05, "token_acc": 0.841133, "epoch": 2.58380202, "global_step/max_steps": "2297/8890", "percentage": "25.84%", "elapsed_time": "48m 34s", "remaining_time": "2h 19m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788133} {"loss": 0.52347863, "grad_norm": 2.11115026, "learning_rate": 8.858e-05, "token_acc": 0.83787129, "epoch": 2.58492688, "global_step/max_steps": "2298/8890", "percentage": "25.85%", "elapsed_time": "48m 35s", "remaining_time": "2h 19m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788172} {"loss": 0.61169624, "grad_norm": 1.90613234, "learning_rate": 8.857e-05, "token_acc": 0.81180812, "epoch": 2.58605174, "global_step/max_steps": "2299/8890", "percentage": "25.86%", "elapsed_time": "48m 36s", "remaining_time": "2h 19m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788164} {"loss": 0.48982602, "grad_norm": 2.44862366, "learning_rate": 8.856e-05, "token_acc": 0.83387622, "epoch": 2.5871766, "global_step/max_steps": "2300/8890", "percentage": "25.87%", "elapsed_time": "48m 37s", "remaining_time": "2h 19m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788293} {"loss": 0.60550457, "grad_norm": 2.07336807, "learning_rate": 8.855e-05, "token_acc": 0.82883642, "epoch": 2.58830146, "global_step/max_steps": "2301/8890", "percentage": "25.88%", "elapsed_time": "48m 38s", "remaining_time": "2h 19m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788352} {"loss": 0.40108454, "grad_norm": 1.81059766, "learning_rate": 8.854e-05, "token_acc": 0.87609649, "epoch": 2.58942632, "global_step/max_steps": "2302/8890", "percentage": "25.89%", "elapsed_time": "48m 40s", "remaining_time": "2h 19m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788353} {"loss": 0.5543834, "grad_norm": 2.11831617, "learning_rate": 8.852e-05, "token_acc": 0.81818182, "epoch": 2.59055118, "global_step/max_steps": "2303/8890", "percentage": "25.91%", "elapsed_time": "48m 41s", "remaining_time": "2h 19m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788389} {"loss": 0.529414, "grad_norm": 2.40202308, "learning_rate": 8.851e-05, "token_acc": 0.82932996, "epoch": 2.59167604, "global_step/max_steps": "2304/8890", "percentage": "25.92%", "elapsed_time": "48m 42s", "remaining_time": "2h 19m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78844} {"loss": 0.55981719, "grad_norm": 2.13209319, "learning_rate": 8.85e-05, "token_acc": 0.82119914, "epoch": 2.5928009, "global_step/max_steps": "2305/8890", "percentage": "25.93%", "elapsed_time": "48m 43s", "remaining_time": "2h 19m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7885} {"loss": 0.44282475, "grad_norm": 2.27558136, "learning_rate": 8.849e-05, "token_acc": 0.85650888, "epoch": 2.59392576, "global_step/max_steps": "2306/8890", "percentage": "25.94%", "elapsed_time": "48m 44s", "remaining_time": "2h 19m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788561} {"loss": 0.59539795, "grad_norm": 2.07660961, "learning_rate": 8.848e-05, "token_acc": 0.82232558, "epoch": 2.59505062, "global_step/max_steps": "2307/8890", "percentage": "25.95%", "elapsed_time": "48m 45s", "remaining_time": "2h 19m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788611} {"loss": 0.45194826, "grad_norm": 1.90340972, "learning_rate": 8.847e-05, "token_acc": 0.85594111, "epoch": 2.59617548, "global_step/max_steps": "2308/8890", "percentage": "25.96%", "elapsed_time": "48m 46s", "remaining_time": "2h 19m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788579} {"loss": 0.43931866, "grad_norm": 1.90003097, "learning_rate": 8.845e-05, "token_acc": 0.8557047, "epoch": 2.59730034, "global_step/max_steps": "2309/8890", "percentage": "25.97%", "elapsed_time": "48m 47s", "remaining_time": "2h 19m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788614} {"loss": 0.53894478, "grad_norm": 2.24645185, "learning_rate": 8.844e-05, "token_acc": 0.81779207, "epoch": 2.5984252, "global_step/max_steps": "2310/8890", "percentage": "25.98%", "elapsed_time": "48m 49s", "remaining_time": "2h 19m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788641} {"loss": 0.39248753, "grad_norm": 2.17426062, "learning_rate": 8.843e-05, "token_acc": 0.8776435, "epoch": 2.59955006, "global_step/max_steps": "2311/8890", "percentage": "26.00%", "elapsed_time": "48m 50s", "remaining_time": "2h 19m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788692} {"loss": 0.53301281, "grad_norm": 2.18572664, "learning_rate": 8.842e-05, "token_acc": 0.83234947, "epoch": 2.60067492, "global_step/max_steps": "2312/8890", "percentage": "26.01%", "elapsed_time": "48m 51s", "remaining_time": "2h 18m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788732} {"loss": 0.41700768, "grad_norm": 1.85207701, "learning_rate": 8.841e-05, "token_acc": 0.85658915, "epoch": 2.60179978, "global_step/max_steps": "2313/8890", "percentage": "26.02%", "elapsed_time": "48m 52s", "remaining_time": "2h 18m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788814} {"loss": 0.61774862, "grad_norm": 2.25907588, "learning_rate": 8.839e-05, "token_acc": 0.79831933, "epoch": 2.60292463, "global_step/max_steps": "2314/8890", "percentage": "26.03%", "elapsed_time": "48m 53s", "remaining_time": "2h 18m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788811} {"loss": 0.3606551, "grad_norm": 2.00925207, "learning_rate": 8.838e-05, "token_acc": 0.9, "epoch": 2.60404949, "global_step/max_steps": "2315/8890", "percentage": "26.04%", "elapsed_time": "48m 54s", "remaining_time": "2h 18m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788835} {"loss": 0.56134164, "grad_norm": 1.98118198, "learning_rate": 8.837e-05, "token_acc": 0.81616341, "epoch": 2.60517435, "global_step/max_steps": "2316/8890", "percentage": "26.05%", "elapsed_time": "48m 55s", "remaining_time": "2h 18m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788868} {"loss": 0.55076283, "grad_norm": 2.23094034, "learning_rate": 8.836e-05, "token_acc": 0.82445141, "epoch": 2.60629921, "global_step/max_steps": "2317/8890", "percentage": "26.06%", "elapsed_time": "48m 56s", "remaining_time": "2h 18m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788951} {"loss": 0.49946883, "grad_norm": 2.16701126, "learning_rate": 8.835e-05, "token_acc": 0.83431953, "epoch": 2.60742407, "global_step/max_steps": "2318/8890", "percentage": "26.07%", "elapsed_time": "48m 57s", "remaining_time": "2h 18m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788982} {"loss": 0.62517786, "grad_norm": 2.3164494, "learning_rate": 8.833e-05, "token_acc": 0.80509554, "epoch": 2.60854893, "global_step/max_steps": "2319/8890", "percentage": "26.09%", "elapsed_time": "48m 59s", "remaining_time": "2h 18m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789012} {"loss": 0.58784854, "grad_norm": 2.18083692, "learning_rate": 8.832e-05, "token_acc": 0.82032854, "epoch": 2.60967379, "global_step/max_steps": "2320/8890", "percentage": "26.10%", "elapsed_time": "49m 0s", "remaining_time": "2h 18m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789084} {"loss": 0.58163863, "grad_norm": 2.37315941, "learning_rate": 8.831e-05, "token_acc": 0.82097187, "epoch": 2.61079865, "global_step/max_steps": "2321/8890", "percentage": "26.11%", "elapsed_time": "49m 1s", "remaining_time": "2h 18m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789107} {"loss": 0.64855421, "grad_norm": 2.30980682, "learning_rate": 8.83e-05, "token_acc": 0.78671329, "epoch": 2.61192351, "global_step/max_steps": "2322/8890", "percentage": "26.12%", "elapsed_time": "49m 2s", "remaining_time": "2h 18m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789157} {"loss": 0.72545254, "grad_norm": 2.29742813, "learning_rate": 8.829e-05, "token_acc": 0.7904328, "epoch": 2.61304837, "global_step/max_steps": "2323/8890", "percentage": "26.13%", "elapsed_time": "49m 3s", "remaining_time": "2h 18m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789187} {"loss": 0.50220853, "grad_norm": 2.1619153, "learning_rate": 8.827e-05, "token_acc": 0.83974359, "epoch": 2.61417323, "global_step/max_steps": "2324/8890", "percentage": "26.14%", "elapsed_time": "49m 4s", "remaining_time": "2h 18m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789251} {"loss": 0.54578304, "grad_norm": 1.90945005, "learning_rate": 8.826e-05, "token_acc": 0.81826742, "epoch": 2.61529809, "global_step/max_steps": "2325/8890", "percentage": "26.15%", "elapsed_time": "49m 5s", "remaining_time": "2h 18m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789283} {"loss": 0.55432093, "grad_norm": 2.27953148, "learning_rate": 8.825e-05, "token_acc": 0.81184669, "epoch": 2.61642295, "global_step/max_steps": "2326/8890", "percentage": "26.16%", "elapsed_time": "49m 6s", "remaining_time": "2h 18m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789343} {"loss": 0.3846828, "grad_norm": 2.20910144, "learning_rate": 8.824e-05, "token_acc": 0.87112561, "epoch": 2.61754781, "global_step/max_steps": "2327/8890", "percentage": "26.18%", "elapsed_time": "49m 7s", "remaining_time": "2h 18m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7894} {"loss": 0.49579144, "grad_norm": 2.17445111, "learning_rate": 8.823e-05, "token_acc": 0.83573807, "epoch": 2.61867267, "global_step/max_steps": "2328/8890", "percentage": "26.19%", "elapsed_time": "49m 8s", "remaining_time": "2h 18m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789436} {"loss": 0.52500379, "grad_norm": 2.45169735, "learning_rate": 8.821e-05, "token_acc": 0.83258595, "epoch": 2.61979753, "global_step/max_steps": "2329/8890", "percentage": "26.20%", "elapsed_time": "49m 10s", "remaining_time": "2h 18m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789462} {"loss": 0.4731662, "grad_norm": 2.22010016, "learning_rate": 8.82e-05, "token_acc": 0.83773087, "epoch": 2.62092238, "global_step/max_steps": "2330/8890", "percentage": "26.21%", "elapsed_time": "49m 11s", "remaining_time": "2h 18m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789498} {"loss": 0.56076276, "grad_norm": 2.23617768, "learning_rate": 8.819e-05, "token_acc": 0.81807372, "epoch": 2.62204724, "global_step/max_steps": "2331/8890", "percentage": "26.22%", "elapsed_time": "49m 12s", "remaining_time": "2h 18m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789538} {"loss": 0.36625832, "grad_norm": 1.83772361, "learning_rate": 8.818e-05, "token_acc": 0.88304094, "epoch": 2.6231721, "global_step/max_steps": "2332/8890", "percentage": "26.23%", "elapsed_time": "49m 13s", "remaining_time": "2h 18m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789606} {"loss": 0.63317907, "grad_norm": 2.03321075, "learning_rate": 8.817e-05, "token_acc": 0.81764706, "epoch": 2.62429696, "global_step/max_steps": "2333/8890", "percentage": "26.24%", "elapsed_time": "49m 14s", "remaining_time": "2h 18m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789633} {"loss": 0.51680344, "grad_norm": 2.40580487, "learning_rate": 8.815e-05, "token_acc": 0.83333333, "epoch": 2.62542182, "global_step/max_steps": "2334/8890", "percentage": "26.25%", "elapsed_time": "49m 15s", "remaining_time": "2h 18m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789757} {"loss": 0.43024406, "grad_norm": 2.060323, "learning_rate": 8.814e-05, "token_acc": 0.86926287, "epoch": 2.62654668, "global_step/max_steps": "2335/8890", "percentage": "26.27%", "elapsed_time": "49m 16s", "remaining_time": "2h 18m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789786} {"loss": 0.36420348, "grad_norm": 1.96897268, "learning_rate": 8.813e-05, "token_acc": 0.88491446, "epoch": 2.62767154, "global_step/max_steps": "2336/8890", "percentage": "26.28%", "elapsed_time": "49m 17s", "remaining_time": "2h 18m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789825} {"loss": 0.51750326, "grad_norm": 1.8529743, "learning_rate": 8.812e-05, "token_acc": 0.8226691, "epoch": 2.6287964, "global_step/max_steps": "2337/8890", "percentage": "26.29%", "elapsed_time": "49m 18s", "remaining_time": "2h 18m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789858} {"loss": 0.66668534, "grad_norm": 2.12392306, "learning_rate": 8.811e-05, "token_acc": 0.80160858, "epoch": 2.62992126, "global_step/max_steps": "2338/8890", "percentage": "26.30%", "elapsed_time": "49m 19s", "remaining_time": "2h 18m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789889} {"loss": 0.50156593, "grad_norm": 1.73031902, "learning_rate": 8.809e-05, "token_acc": 0.84573643, "epoch": 2.63104612, "global_step/max_steps": "2339/8890", "percentage": "26.31%", "elapsed_time": "49m 21s", "remaining_time": "2h 18m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789925} {"loss": 0.60553265, "grad_norm": 2.14529514, "learning_rate": 8.808e-05, "token_acc": 0.81755424, "epoch": 2.63217098, "global_step/max_steps": "2340/8890", "percentage": "26.32%", "elapsed_time": "49m 22s", "remaining_time": "2h 18m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789961} {"loss": 0.52456558, "grad_norm": 2.33544898, "learning_rate": 8.807e-05, "token_acc": 0.82991803, "epoch": 2.63329584, "global_step/max_steps": "2341/8890", "percentage": "26.33%", "elapsed_time": "49m 23s", "remaining_time": "2h 18m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789982} {"loss": 0.50264984, "grad_norm": 2.15049934, "learning_rate": 8.806e-05, "token_acc": 0.85294118, "epoch": 2.6344207, "global_step/max_steps": "2342/8890", "percentage": "26.34%", "elapsed_time": "49m 24s", "remaining_time": "2h 18m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790041} {"loss": 0.5772388, "grad_norm": 2.45454216, "learning_rate": 8.805e-05, "token_acc": 0.80853816, "epoch": 2.63554556, "global_step/max_steps": "2343/8890", "percentage": "26.36%", "elapsed_time": "49m 25s", "remaining_time": "2h 18m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790069} {"loss": 0.48108906, "grad_norm": 2.25135922, "learning_rate": 8.803e-05, "token_acc": 0.82253521, "epoch": 2.63667042, "global_step/max_steps": "2344/8890", "percentage": "26.37%", "elapsed_time": "49m 26s", "remaining_time": "2h 18m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790104} {"loss": 0.4426918, "grad_norm": 1.820346, "learning_rate": 8.802e-05, "token_acc": 0.85272045, "epoch": 2.63779528, "global_step/max_steps": "2345/8890", "percentage": "26.38%", "elapsed_time": "49m 27s", "remaining_time": "2h 18m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790137} {"loss": 0.52270567, "grad_norm": 2.10095739, "learning_rate": 8.801e-05, "token_acc": 0.83611384, "epoch": 2.63892013, "global_step/max_steps": "2346/8890", "percentage": "26.39%", "elapsed_time": "49m 29s", "remaining_time": "2h 18m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790164} {"loss": 0.54967129, "grad_norm": 2.28541899, "learning_rate": 8.8e-05, "token_acc": 0.82054616, "epoch": 2.64004499, "global_step/max_steps": "2347/8890", "percentage": "26.40%", "elapsed_time": "49m 30s", "remaining_time": "2h 18m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790199} {"loss": 0.45316592, "grad_norm": 2.01865363, "learning_rate": 8.799e-05, "token_acc": 0.85223725, "epoch": 2.64116985, "global_step/max_steps": "2348/8890", "percentage": "26.41%", "elapsed_time": "49m 31s", "remaining_time": "2h 17m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790199} {"loss": 0.68939763, "grad_norm": 2.17269015, "learning_rate": 8.797e-05, "token_acc": 0.79621668, "epoch": 2.64229471, "global_step/max_steps": "2349/8890", "percentage": "26.42%", "elapsed_time": "49m 32s", "remaining_time": "2h 17m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790225} {"loss": 0.55852485, "grad_norm": 2.05471945, "learning_rate": 8.796e-05, "token_acc": 0.82368421, "epoch": 2.64341957, "global_step/max_steps": "2350/8890", "percentage": "26.43%", "elapsed_time": "49m 33s", "remaining_time": "2h 17m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790259} {"loss": 0.60290885, "grad_norm": 2.40589547, "learning_rate": 8.795e-05, "token_acc": 0.81656051, "epoch": 2.64454443, "global_step/max_steps": "2351/8890", "percentage": "26.45%", "elapsed_time": "49m 34s", "remaining_time": "2h 17m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790318} {"loss": 0.42019141, "grad_norm": 1.95513153, "learning_rate": 8.794e-05, "token_acc": 0.86574074, "epoch": 2.64566929, "global_step/max_steps": "2352/8890", "percentage": "26.46%", "elapsed_time": "49m 35s", "remaining_time": "2h 17m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790374} {"loss": 0.59049594, "grad_norm": 2.35271287, "learning_rate": 8.793e-05, "token_acc": 0.81663327, "epoch": 2.64679415, "global_step/max_steps": "2353/8890", "percentage": "26.47%", "elapsed_time": "49m 37s", "remaining_time": "2h 17m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790364} {"loss": 0.47057515, "grad_norm": 2.26431203, "learning_rate": 8.791e-05, "token_acc": 0.83734088, "epoch": 2.64791901, "global_step/max_steps": "2354/8890", "percentage": "26.48%", "elapsed_time": "49m 38s", "remaining_time": "2h 17m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790403} {"loss": 0.59424019, "grad_norm": 2.09918547, "learning_rate": 8.79e-05, "token_acc": 0.81096408, "epoch": 2.64904387, "global_step/max_steps": "2355/8890", "percentage": "26.49%", "elapsed_time": "49m 39s", "remaining_time": "2h 17m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790441} {"loss": 0.47287977, "grad_norm": 2.22562814, "learning_rate": 8.789e-05, "token_acc": 0.85006519, "epoch": 2.65016873, "global_step/max_steps": "2356/8890", "percentage": "26.50%", "elapsed_time": "49m 40s", "remaining_time": "2h 17m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790494} {"loss": 0.5977993, "grad_norm": 2.29366875, "learning_rate": 8.788e-05, "token_acc": 0.81091877, "epoch": 2.65129359, "global_step/max_steps": "2357/8890", "percentage": "26.51%", "elapsed_time": "49m 41s", "remaining_time": "2h 17m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79055} {"loss": 0.45518404, "grad_norm": 2.10448527, "learning_rate": 8.786e-05, "token_acc": 0.84661118, "epoch": 2.65241845, "global_step/max_steps": "2358/8890", "percentage": "26.52%", "elapsed_time": "49m 42s", "remaining_time": "2h 17m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790607} {"loss": 0.57137311, "grad_norm": 2.23049998, "learning_rate": 8.785e-05, "token_acc": 0.8129771, "epoch": 2.65354331, "global_step/max_steps": "2359/8890", "percentage": "26.54%", "elapsed_time": "49m 43s", "remaining_time": "2h 17m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790681} {"loss": 0.53115726, "grad_norm": 2.20731783, "learning_rate": 8.784e-05, "token_acc": 0.82224909, "epoch": 2.65466817, "global_step/max_steps": "2360/8890", "percentage": "26.55%", "elapsed_time": "49m 44s", "remaining_time": "2h 17m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790712} {"loss": 0.61521667, "grad_norm": 2.39609456, "learning_rate": 8.783e-05, "token_acc": 0.80595369, "epoch": 2.65579303, "global_step/max_steps": "2361/8890", "percentage": "26.56%", "elapsed_time": "49m 45s", "remaining_time": "2h 17m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790751} {"loss": 0.41209298, "grad_norm": 2.00454497, "learning_rate": 8.782e-05, "token_acc": 0.86894923, "epoch": 2.65691789, "global_step/max_steps": "2362/8890", "percentage": "26.57%", "elapsed_time": "49m 46s", "remaining_time": "2h 17m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790818} {"loss": 0.55785328, "grad_norm": 2.09719229, "learning_rate": 8.78e-05, "token_acc": 0.82758621, "epoch": 2.65804274, "global_step/max_steps": "2363/8890", "percentage": "26.58%", "elapsed_time": "49m 47s", "remaining_time": "2h 17m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790853} {"loss": 0.45375729, "grad_norm": 2.09479356, "learning_rate": 8.779e-05, "token_acc": 0.86129458, "epoch": 2.6591676, "global_step/max_steps": "2364/8890", "percentage": "26.59%", "elapsed_time": "49m 49s", "remaining_time": "2h 17m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790891} {"loss": 0.55729777, "grad_norm": 2.19434237, "learning_rate": 8.778e-05, "token_acc": 0.83175355, "epoch": 2.66029246, "global_step/max_steps": "2365/8890", "percentage": "26.60%", "elapsed_time": "49m 50s", "remaining_time": "2h 17m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790926} {"loss": 0.47255865, "grad_norm": 2.04549813, "learning_rate": 8.777e-05, "token_acc": 0.84080717, "epoch": 2.66141732, "global_step/max_steps": "2366/8890", "percentage": "26.61%", "elapsed_time": "49m 51s", "remaining_time": "2h 17m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790948} {"loss": 0.41675434, "grad_norm": 2.02048421, "learning_rate": 8.775e-05, "token_acc": 0.85840708, "epoch": 2.66254218, "global_step/max_steps": "2367/8890", "percentage": "26.63%", "elapsed_time": "49m 52s", "remaining_time": "2h 17m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790978} {"loss": 0.54465383, "grad_norm": 2.20379019, "learning_rate": 8.774e-05, "token_acc": 0.82512953, "epoch": 2.66366704, "global_step/max_steps": "2368/8890", "percentage": "26.64%", "elapsed_time": "49m 53s", "remaining_time": "2h 17m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791012} {"loss": 0.46599454, "grad_norm": 1.84542167, "learning_rate": 8.773e-05, "token_acc": 0.85152409, "epoch": 2.6647919, "global_step/max_steps": "2369/8890", "percentage": "26.65%", "elapsed_time": "49m 54s", "remaining_time": "2h 17m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791047} {"loss": 0.59850883, "grad_norm": 2.27618599, "learning_rate": 8.772e-05, "token_acc": 0.81936246, "epoch": 2.66591676, "global_step/max_steps": "2370/8890", "percentage": "26.66%", "elapsed_time": "49m 55s", "remaining_time": "2h 17m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791075} {"loss": 0.56317663, "grad_norm": 2.76408219, "learning_rate": 8.771e-05, "token_acc": 0.82586428, "epoch": 2.66704162, "global_step/max_steps": "2371/8890", "percentage": "26.67%", "elapsed_time": "49m 56s", "remaining_time": "2h 17m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79113} {"loss": 0.60449898, "grad_norm": 2.36706138, "learning_rate": 8.769e-05, "token_acc": 0.81136951, "epoch": 2.66816648, "global_step/max_steps": "2372/8890", "percentage": "26.68%", "elapsed_time": "49m 58s", "remaining_time": "2h 17m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791178} {"loss": 0.51302171, "grad_norm": 2.12776971, "learning_rate": 8.768e-05, "token_acc": 0.83205128, "epoch": 2.66929134, "global_step/max_steps": "2373/8890", "percentage": "26.69%", "elapsed_time": "49m 59s", "remaining_time": "2h 17m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791208} {"loss": 0.54807818, "grad_norm": 2.30045438, "learning_rate": 8.767e-05, "token_acc": 0.8254717, "epoch": 2.6704162, "global_step/max_steps": "2374/8890", "percentage": "26.70%", "elapsed_time": "49m 59s", "remaining_time": "2h 17m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791349} {"loss": 0.51582152, "grad_norm": 2.29892874, "learning_rate": 8.766e-05, "token_acc": 0.82487923, "epoch": 2.67154106, "global_step/max_steps": "2375/8890", "percentage": "26.72%", "elapsed_time": "50m 1s", "remaining_time": "2h 17m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791375} {"loss": 0.61002237, "grad_norm": 2.18664789, "learning_rate": 8.764e-05, "token_acc": 0.809101, "epoch": 2.67266592, "global_step/max_steps": "2376/8890", "percentage": "26.73%", "elapsed_time": "50m 2s", "remaining_time": "2h 17m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791412} {"loss": 0.54401028, "grad_norm": 2.14492273, "learning_rate": 8.763e-05, "token_acc": 0.8484472, "epoch": 2.67379078, "global_step/max_steps": "2377/8890", "percentage": "26.74%", "elapsed_time": "50m 3s", "remaining_time": "2h 17m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791439} {"loss": 0.59706378, "grad_norm": 2.38536954, "learning_rate": 8.762e-05, "token_acc": 0.82195846, "epoch": 2.67491564, "global_step/max_steps": "2378/8890", "percentage": "26.75%", "elapsed_time": "50m 4s", "remaining_time": "2h 17m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791477} {"loss": 0.50050032, "grad_norm": 2.25728846, "learning_rate": 8.761e-05, "token_acc": 0.83988764, "epoch": 2.67604049, "global_step/max_steps": "2379/8890", "percentage": "26.76%", "elapsed_time": "50m 5s", "remaining_time": "2h 17m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791556} {"loss": 0.40189046, "grad_norm": 2.09359217, "learning_rate": 8.76e-05, "token_acc": 0.87537092, "epoch": 2.67716535, "global_step/max_steps": "2380/8890", "percentage": "26.77%", "elapsed_time": "50m 6s", "remaining_time": "2h 17m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791595} {"loss": 0.64933968, "grad_norm": 2.24658823, "learning_rate": 8.758e-05, "token_acc": 0.80513919, "epoch": 2.67829021, "global_step/max_steps": "2381/8890", "percentage": "26.78%", "elapsed_time": "50m 7s", "remaining_time": "2h 17m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791632} {"loss": 0.47355509, "grad_norm": 2.18537283, "learning_rate": 8.757e-05, "token_acc": 0.85163205, "epoch": 2.67941507, "global_step/max_steps": "2382/8890", "percentage": "26.79%", "elapsed_time": "50m 8s", "remaining_time": "2h 17m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791699} {"loss": 0.40383017, "grad_norm": 2.05954647, "learning_rate": 8.756e-05, "token_acc": 0.86018237, "epoch": 2.68053993, "global_step/max_steps": "2383/8890", "percentage": "26.81%", "elapsed_time": "50m 9s", "remaining_time": "2h 16m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791759} {"loss": 0.53854883, "grad_norm": 2.11403823, "learning_rate": 8.755e-05, "token_acc": 0.82880756, "epoch": 2.68166479, "global_step/max_steps": "2384/8890", "percentage": "26.82%", "elapsed_time": "50m 10s", "remaining_time": "2h 16m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79178} {"loss": 0.56580579, "grad_norm": 2.08280134, "learning_rate": 8.753e-05, "token_acc": 0.82298851, "epoch": 2.68278965, "global_step/max_steps": "2385/8890", "percentage": "26.83%", "elapsed_time": "50m 12s", "remaining_time": "2h 16m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791806} {"loss": 0.50196385, "grad_norm": 2.17357945, "learning_rate": 8.752e-05, "token_acc": 0.84260516, "epoch": 2.68391451, "global_step/max_steps": "2386/8890", "percentage": "26.84%", "elapsed_time": "50m 13s", "remaining_time": "2h 16m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791832} {"loss": 0.48985165, "grad_norm": 1.94976044, "learning_rate": 8.751e-05, "token_acc": 0.84192825, "epoch": 2.68503937, "global_step/max_steps": "2387/8890", "percentage": "26.85%", "elapsed_time": "50m 14s", "remaining_time": "2h 16m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79183} {"loss": 0.54904193, "grad_norm": 2.11778522, "learning_rate": 8.75e-05, "token_acc": 0.83490073, "epoch": 2.68616423, "global_step/max_steps": "2388/8890", "percentage": "26.86%", "elapsed_time": "50m 15s", "remaining_time": "2h 16m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791867} {"loss": 0.5455265, "grad_norm": 2.35194302, "learning_rate": 8.749e-05, "token_acc": 0.81862745, "epoch": 2.68728909, "global_step/max_steps": "2389/8890", "percentage": "26.87%", "elapsed_time": "50m 16s", "remaining_time": "2h 16m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791892} {"loss": 0.45250592, "grad_norm": 2.13741994, "learning_rate": 8.747e-05, "token_acc": 0.85475578, "epoch": 2.68841395, "global_step/max_steps": "2390/8890", "percentage": "26.88%", "elapsed_time": "50m 17s", "remaining_time": "2h 16m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791942} {"loss": 0.32672489, "grad_norm": 1.67970264, "learning_rate": 8.746e-05, "token_acc": 0.88737201, "epoch": 2.68953881, "global_step/max_steps": "2391/8890", "percentage": "26.90%", "elapsed_time": "50m 19s", "remaining_time": "2h 16m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.791972} {"loss": 0.52166426, "grad_norm": 1.98663604, "learning_rate": 8.745e-05, "token_acc": 0.83812261, "epoch": 2.69066367, "global_step/max_steps": "2392/8890", "percentage": "26.91%", "elapsed_time": "50m 20s", "remaining_time": "2h 16m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792002} {"loss": 0.41238388, "grad_norm": 1.90208471, "learning_rate": 8.744e-05, "token_acc": 0.86387435, "epoch": 2.69178853, "global_step/max_steps": "2393/8890", "percentage": "26.92%", "elapsed_time": "50m 21s", "remaining_time": "2h 16m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792036} {"loss": 0.45839429, "grad_norm": 2.17503786, "learning_rate": 8.742e-05, "token_acc": 0.83704735, "epoch": 2.69291339, "global_step/max_steps": "2394/8890", "percentage": "26.93%", "elapsed_time": "50m 22s", "remaining_time": "2h 16m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792088} {"loss": 0.63299763, "grad_norm": 2.21850133, "learning_rate": 8.741e-05, "token_acc": 0.81647059, "epoch": 2.69403825, "global_step/max_steps": "2395/8890", "percentage": "26.94%", "elapsed_time": "50m 23s", "remaining_time": "2h 16m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792115} {"loss": 0.66277266, "grad_norm": 2.30470777, "learning_rate": 8.74e-05, "token_acc": 0.79856115, "epoch": 2.6951631, "global_step/max_steps": "2396/8890", "percentage": "26.95%", "elapsed_time": "50m 24s", "remaining_time": "2h 16m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792175} {"loss": 0.53971869, "grad_norm": 2.12217212, "learning_rate": 8.739e-05, "token_acc": 0.83792723, "epoch": 2.69628796, "global_step/max_steps": "2397/8890", "percentage": "26.96%", "elapsed_time": "50m 25s", "remaining_time": "2h 16m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792225} {"loss": 0.45100716, "grad_norm": 2.27043915, "learning_rate": 8.737e-05, "token_acc": 0.85070423, "epoch": 2.69741282, "global_step/max_steps": "2398/8890", "percentage": "26.97%", "elapsed_time": "50m 26s", "remaining_time": "2h 16m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792248} {"loss": 0.66277677, "grad_norm": 2.25251365, "learning_rate": 8.736e-05, "token_acc": 0.7954779, "epoch": 2.69853768, "global_step/max_steps": "2399/8890", "percentage": "26.99%", "elapsed_time": "50m 28s", "remaining_time": "2h 16m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792272} {"loss": 0.56588501, "grad_norm": 1.98402452, "learning_rate": 8.735e-05, "token_acc": 0.82403433, "epoch": 2.69966254, "global_step/max_steps": "2400/8890", "percentage": "27.00%", "elapsed_time": "50m 29s", "remaining_time": "2h 16m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.792306} {"eval_loss": 0.94022012, "eval_runtime": 31.6621, "eval_samples_per_second": 25.362, "eval_steps_per_second": 3.19, "eval_token_acc": 0.73878718, "epoch": 2.69966254, "global_step/max_steps": "2400/8890", "percentage": "27.00%", "elapsed_time": "51m 0s", "remaining_time": "2h 17m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784108} {"loss": 0.64703155, "grad_norm": 2.22713351, "learning_rate": 8.734e-05, "token_acc": 0.80728051, "epoch": 2.7007874, "global_step/max_steps": "2401/8890", "percentage": "27.01%", "elapsed_time": "51m 15s", "remaining_time": "2h 18m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780573} {"loss": 0.40940523, "grad_norm": 2.38048506, "learning_rate": 8.732e-05, "token_acc": 0.85584219, "epoch": 2.70191226, "global_step/max_steps": "2402/8890", "percentage": "27.02%", "elapsed_time": "51m 17s", "remaining_time": "2h 18m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780616} {"loss": 0.57422709, "grad_norm": 2.13097334, "learning_rate": 8.731e-05, "token_acc": 0.81673307, "epoch": 2.70303712, "global_step/max_steps": "2403/8890", "percentage": "27.03%", "elapsed_time": "51m 18s", "remaining_time": "2h 18m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780678} {"loss": 0.62961525, "grad_norm": 2.48441172, "learning_rate": 8.73e-05, "token_acc": 0.80744544, "epoch": 2.70416198, "global_step/max_steps": "2404/8890", "percentage": "27.04%", "elapsed_time": "51m 19s", "remaining_time": "2h 18m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780741} {"loss": 0.54342937, "grad_norm": 2.22762227, "learning_rate": 8.729e-05, "token_acc": 0.83415233, "epoch": 2.70528684, "global_step/max_steps": "2405/8890", "percentage": "27.05%", "elapsed_time": "51m 20s", "remaining_time": "2h 18m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780775} {"loss": 0.60669661, "grad_norm": 2.0138731, "learning_rate": 8.728e-05, "token_acc": 0.82028112, "epoch": 2.7064117, "global_step/max_steps": "2406/8890", "percentage": "27.06%", "elapsed_time": "51m 21s", "remaining_time": "2h 18m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780808} {"loss": 0.63030124, "grad_norm": 2.07017112, "learning_rate": 8.726e-05, "token_acc": 0.81898239, "epoch": 2.70753656, "global_step/max_steps": "2407/8890", "percentage": "27.08%", "elapsed_time": "51m 22s", "remaining_time": "2h 18m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780837} {"loss": 0.61827797, "grad_norm": 1.85672712, "learning_rate": 8.725e-05, "token_acc": 0.81458146, "epoch": 2.70866142, "global_step/max_steps": "2408/8890", "percentage": "27.09%", "elapsed_time": "51m 23s", "remaining_time": "2h 18m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78082} {"loss": 0.54994261, "grad_norm": 2.18620539, "learning_rate": 8.724e-05, "token_acc": 0.81561822, "epoch": 2.70978628, "global_step/max_steps": "2409/8890", "percentage": "27.10%", "elapsed_time": "51m 25s", "remaining_time": "2h 18m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780852} {"loss": 0.62620068, "grad_norm": 2.19825912, "learning_rate": 8.723e-05, "token_acc": 0.81194907, "epoch": 2.71091114, "global_step/max_steps": "2410/8890", "percentage": "27.11%", "elapsed_time": "51m 26s", "remaining_time": "2h 18m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780885} {"loss": 0.62602425, "grad_norm": 2.30927515, "learning_rate": 8.721e-05, "token_acc": 0.799117, "epoch": 2.712036, "global_step/max_steps": "2411/8890", "percentage": "27.12%", "elapsed_time": "51m 27s", "remaining_time": "2h 18m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780922} {"loss": 0.48992947, "grad_norm": 2.29777431, "learning_rate": 8.72e-05, "token_acc": 0.82079343, "epoch": 2.71316085, "global_step/max_steps": "2412/8890", "percentage": "27.13%", "elapsed_time": "51m 28s", "remaining_time": "2h 18m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78099} {"loss": 0.6385603, "grad_norm": 2.18984723, "learning_rate": 8.719e-05, "token_acc": 0.8008342, "epoch": 2.71428571, "global_step/max_steps": "2413/8890", "percentage": "27.14%", "elapsed_time": "51m 29s", "remaining_time": "2h 18m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781049} {"loss": 0.56862986, "grad_norm": 2.01984954, "learning_rate": 8.718e-05, "token_acc": 0.84122288, "epoch": 2.71541057, "global_step/max_steps": "2414/8890", "percentage": "27.15%", "elapsed_time": "51m 30s", "remaining_time": "2h 18m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78105} {"loss": 0.58539474, "grad_norm": 2.0915091, "learning_rate": 8.716e-05, "token_acc": 0.82545455, "epoch": 2.71653543, "global_step/max_steps": "2415/8890", "percentage": "27.17%", "elapsed_time": "51m 31s", "remaining_time": "2h 18m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781086} {"loss": 0.5464927, "grad_norm": 2.17546439, "learning_rate": 8.715e-05, "token_acc": 0.82134571, "epoch": 2.71766029, "global_step/max_steps": "2416/8890", "percentage": "27.18%", "elapsed_time": "51m 32s", "remaining_time": "2h 18m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781123} {"loss": 0.64871746, "grad_norm": 2.07129145, "learning_rate": 8.714e-05, "token_acc": 0.8048048, "epoch": 2.71878515, "global_step/max_steps": "2417/8890", "percentage": "27.19%", "elapsed_time": "51m 34s", "remaining_time": "2h 18m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781163} {"loss": 0.63109219, "grad_norm": 2.28948951, "learning_rate": 8.713e-05, "token_acc": 0.79204108, "epoch": 2.71991001, "global_step/max_steps": "2418/8890", "percentage": "27.20%", "elapsed_time": "51m 35s", "remaining_time": "2h 18m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781188} {"loss": 0.41247052, "grad_norm": 1.71996975, "learning_rate": 8.711e-05, "token_acc": 0.84347826, "epoch": 2.72103487, "global_step/max_steps": "2419/8890", "percentage": "27.21%", "elapsed_time": "51m 36s", "remaining_time": "2h 18m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781222} {"loss": 0.49702227, "grad_norm": 1.94585335, "learning_rate": 8.71e-05, "token_acc": 0.84755245, "epoch": 2.72215973, "global_step/max_steps": "2420/8890", "percentage": "27.22%", "elapsed_time": "51m 37s", "remaining_time": "2h 18m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781253} {"loss": 0.49396098, "grad_norm": 2.13809443, "learning_rate": 8.709e-05, "token_acc": 0.83970407, "epoch": 2.72328459, "global_step/max_steps": "2421/8890", "percentage": "27.23%", "elapsed_time": "51m 38s", "remaining_time": "2h 17m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781285} {"loss": 0.52755785, "grad_norm": 1.68223941, "learning_rate": 8.708e-05, "token_acc": 0.83621399, "epoch": 2.72440945, "global_step/max_steps": "2422/8890", "percentage": "27.24%", "elapsed_time": "51m 40s", "remaining_time": "2h 17m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781273} {"loss": 0.46405584, "grad_norm": 1.82752669, "learning_rate": 8.706e-05, "token_acc": 0.85630499, "epoch": 2.72553431, "global_step/max_steps": "2423/8890", "percentage": "27.26%", "elapsed_time": "51m 41s", "remaining_time": "2h 17m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781295} {"loss": 0.40565544, "grad_norm": 1.94526911, "learning_rate": 8.705e-05, "token_acc": 0.86555698, "epoch": 2.72665917, "global_step/max_steps": "2424/8890", "percentage": "27.27%", "elapsed_time": "51m 42s", "remaining_time": "2h 17m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781357} {"loss": 0.47853875, "grad_norm": 1.80211341, "learning_rate": 8.704e-05, "token_acc": 0.84566596, "epoch": 2.72778403, "global_step/max_steps": "2425/8890", "percentage": "27.28%", "elapsed_time": "51m 43s", "remaining_time": "2h 17m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781394} {"loss": 0.41680223, "grad_norm": 2.16889453, "learning_rate": 8.703e-05, "token_acc": 0.85878963, "epoch": 2.72890889, "global_step/max_steps": "2426/8890", "percentage": "27.29%", "elapsed_time": "51m 44s", "remaining_time": "2h 17m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781434} {"loss": 0.49332768, "grad_norm": 2.14415002, "learning_rate": 8.701e-05, "token_acc": 0.83538084, "epoch": 2.73003375, "global_step/max_steps": "2427/8890", "percentage": "27.30%", "elapsed_time": "51m 45s", "remaining_time": "2h 17m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781407} {"loss": 0.66497803, "grad_norm": 2.09888101, "learning_rate": 8.7e-05, "token_acc": 0.7967128, "epoch": 2.73115861, "global_step/max_steps": "2428/8890", "percentage": "27.31%", "elapsed_time": "51m 47s", "remaining_time": "2h 17m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781446} {"loss": 0.60867536, "grad_norm": 1.98278892, "learning_rate": 8.699e-05, "token_acc": 0.81784729, "epoch": 2.73228346, "global_step/max_steps": "2429/8890", "percentage": "27.32%", "elapsed_time": "51m 48s", "remaining_time": "2h 17m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78148} {"loss": 0.47499931, "grad_norm": 2.18205118, "learning_rate": 8.698e-05, "token_acc": 0.85239362, "epoch": 2.73340832, "global_step/max_steps": "2430/8890", "percentage": "27.33%", "elapsed_time": "51m 49s", "remaining_time": "2h 17m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781511} {"loss": 0.56729585, "grad_norm": 2.6166482, "learning_rate": 8.696e-05, "token_acc": 0.82270607, "epoch": 2.73453318, "global_step/max_steps": "2431/8890", "percentage": "27.35%", "elapsed_time": "51m 50s", "remaining_time": "2h 17m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781595} {"loss": 0.67105818, "grad_norm": 2.11284709, "learning_rate": 8.695e-05, "token_acc": 0.80558428, "epoch": 2.73565804, "global_step/max_steps": "2432/8890", "percentage": "27.36%", "elapsed_time": "51m 51s", "remaining_time": "2h 17m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78163} {"loss": 0.52441043, "grad_norm": 2.14157391, "learning_rate": 8.694e-05, "token_acc": 0.85352113, "epoch": 2.7367829, "global_step/max_steps": "2433/8890", "percentage": "27.37%", "elapsed_time": "51m 52s", "remaining_time": "2h 17m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781707} {"loss": 0.6777842, "grad_norm": 2.1176343, "learning_rate": 8.693e-05, "token_acc": 0.79788258, "epoch": 2.73790776, "global_step/max_steps": "2434/8890", "percentage": "27.38%", "elapsed_time": "51m 53s", "remaining_time": "2h 17m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781747} {"loss": 0.70745432, "grad_norm": 1.95732522, "learning_rate": 8.691e-05, "token_acc": 0.77327935, "epoch": 2.73903262, "global_step/max_steps": "2435/8890", "percentage": "27.39%", "elapsed_time": "51m 54s", "remaining_time": "2h 17m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781804} {"loss": 0.52244627, "grad_norm": 1.91532362, "learning_rate": 8.69e-05, "token_acc": 0.85323887, "epoch": 2.74015748, "global_step/max_steps": "2436/8890", "percentage": "27.40%", "elapsed_time": "51m 55s", "remaining_time": "2h 17m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781841} {"loss": 0.48652977, "grad_norm": 2.23985529, "learning_rate": 8.689e-05, "token_acc": 0.84472934, "epoch": 2.74128234, "global_step/max_steps": "2437/8890", "percentage": "27.41%", "elapsed_time": "51m 56s", "remaining_time": "2h 17m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781897} {"loss": 0.4411864, "grad_norm": 1.79672396, "learning_rate": 8.688e-05, "token_acc": 0.8559322, "epoch": 2.7424072, "global_step/max_steps": "2438/8890", "percentage": "27.42%", "elapsed_time": "51m 57s", "remaining_time": "2h 17m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781935} {"loss": 0.57243109, "grad_norm": 1.95726395, "learning_rate": 8.686e-05, "token_acc": 0.82241015, "epoch": 2.74353206, "global_step/max_steps": "2439/8890", "percentage": "27.44%", "elapsed_time": "51m 58s", "remaining_time": "2h 17m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782017} {"loss": 0.57850051, "grad_norm": 2.29439521, "learning_rate": 8.685e-05, "token_acc": 0.81403941, "epoch": 2.74465692, "global_step/max_steps": "2440/8890", "percentage": "27.45%", "elapsed_time": "51m 59s", "remaining_time": "2h 17m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782052} {"loss": 0.48626339, "grad_norm": 1.96148133, "learning_rate": 8.684e-05, "token_acc": 0.84793814, "epoch": 2.74578178, "global_step/max_steps": "2441/8890", "percentage": "27.46%", "elapsed_time": "52m 1s", "remaining_time": "2h 17m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782087} {"loss": 0.60975844, "grad_norm": 2.25588226, "learning_rate": 8.683e-05, "token_acc": 0.81263617, "epoch": 2.74690664, "global_step/max_steps": "2442/8890", "percentage": "27.47%", "elapsed_time": "52m 2s", "remaining_time": "2h 17m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782066} {"loss": 0.64688116, "grad_norm": 2.23965311, "learning_rate": 8.681e-05, "token_acc": 0.80201342, "epoch": 2.7480315, "global_step/max_steps": "2443/8890", "percentage": "27.48%", "elapsed_time": "52m 3s", "remaining_time": "2h 17m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782105} {"loss": 0.66766608, "grad_norm": 2.16930556, "learning_rate": 8.68e-05, "token_acc": 0.78392484, "epoch": 2.74915636, "global_step/max_steps": "2444/8890", "percentage": "27.49%", "elapsed_time": "52m 4s", "remaining_time": "2h 17m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782161} {"loss": 0.51043087, "grad_norm": 2.0940752, "learning_rate": 8.679e-05, "token_acc": 0.83333333, "epoch": 2.75028121, "global_step/max_steps": "2445/8890", "percentage": "27.50%", "elapsed_time": "52m 5s", "remaining_time": "2h 17m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782256} {"loss": 0.52244753, "grad_norm": 2.35393691, "learning_rate": 8.678e-05, "token_acc": 0.83559578, "epoch": 2.75140607, "global_step/max_steps": "2446/8890", "percentage": "27.51%", "elapsed_time": "52m 6s", "remaining_time": "2h 17m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782348} {"loss": 0.6148963, "grad_norm": 2.23662138, "learning_rate": 8.676e-05, "token_acc": 0.79598662, "epoch": 2.75253093, "global_step/max_steps": "2447/8890", "percentage": "27.53%", "elapsed_time": "52m 7s", "remaining_time": "2h 17m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782387} {"loss": 0.58746475, "grad_norm": 2.04417682, "learning_rate": 8.675e-05, "token_acc": 0.81139489, "epoch": 2.75365579, "global_step/max_steps": "2448/8890", "percentage": "27.54%", "elapsed_time": "52m 8s", "remaining_time": "2h 17m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782424} {"loss": 0.47280613, "grad_norm": 2.06088471, "learning_rate": 8.674e-05, "token_acc": 0.85193622, "epoch": 2.75478065, "global_step/max_steps": "2449/8890", "percentage": "27.55%", "elapsed_time": "52m 9s", "remaining_time": "2h 17m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782489} {"loss": 0.54600036, "grad_norm": 2.39652872, "learning_rate": 8.672e-05, "token_acc": 0.82472826, "epoch": 2.75590551, "global_step/max_steps": "2450/8890", "percentage": "27.56%", "elapsed_time": "52m 10s", "remaining_time": "2h 17m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782524} {"loss": 0.41716897, "grad_norm": 2.00986767, "learning_rate": 8.671e-05, "token_acc": 0.87093023, "epoch": 2.75703037, "global_step/max_steps": "2451/8890", "percentage": "27.57%", "elapsed_time": "52m 12s", "remaining_time": "2h 17m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782555} {"loss": 0.56868297, "grad_norm": 2.18260074, "learning_rate": 8.67e-05, "token_acc": 0.82762313, "epoch": 2.75815523, "global_step/max_steps": "2452/8890", "percentage": "27.58%", "elapsed_time": "52m 13s", "remaining_time": "2h 17m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782549} {"loss": 0.53373504, "grad_norm": 2.06458235, "learning_rate": 8.669e-05, "token_acc": 0.83781095, "epoch": 2.75928009, "global_step/max_steps": "2453/8890", "percentage": "27.59%", "elapsed_time": "52m 14s", "remaining_time": "2h 17m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782602} {"loss": 0.62946808, "grad_norm": 2.31087208, "learning_rate": 8.667e-05, "token_acc": 0.81014151, "epoch": 2.76040495, "global_step/max_steps": "2454/8890", "percentage": "27.60%", "elapsed_time": "52m 15s", "remaining_time": "2h 17m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78264} {"loss": 0.61155379, "grad_norm": 2.15720177, "learning_rate": 8.666e-05, "token_acc": 0.81873112, "epoch": 2.76152981, "global_step/max_steps": "2455/8890", "percentage": "27.62%", "elapsed_time": "52m 16s", "remaining_time": "2h 17m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782675} {"loss": 0.58934438, "grad_norm": 2.18788767, "learning_rate": 8.665e-05, "token_acc": 0.81835564, "epoch": 2.76265467, "global_step/max_steps": "2456/8890", "percentage": "27.63%", "elapsed_time": "52m 17s", "remaining_time": "2h 17m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782706} {"loss": 0.50670612, "grad_norm": 2.02427173, "learning_rate": 8.664e-05, "token_acc": 0.83141361, "epoch": 2.76377953, "global_step/max_steps": "2457/8890", "percentage": "27.64%", "elapsed_time": "52m 19s", "remaining_time": "2h 16m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7827} {"loss": 0.64128679, "grad_norm": 2.24042535, "learning_rate": 8.662e-05, "token_acc": 0.78693182, "epoch": 2.76490439, "global_step/max_steps": "2458/8890", "percentage": "27.65%", "elapsed_time": "52m 20s", "remaining_time": "2h 16m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782766} {"loss": 0.56082499, "grad_norm": 2.11719346, "learning_rate": 8.661e-05, "token_acc": 0.8289225, "epoch": 2.76602925, "global_step/max_steps": "2459/8890", "percentage": "27.66%", "elapsed_time": "52m 21s", "remaining_time": "2h 16m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782823} {"loss": 0.487712, "grad_norm": 1.9181813, "learning_rate": 8.66e-05, "token_acc": 0.84222222, "epoch": 2.76715411, "global_step/max_steps": "2460/8890", "percentage": "27.67%", "elapsed_time": "52m 22s", "remaining_time": "2h 16m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782858} {"loss": 0.53597951, "grad_norm": 1.97264421, "learning_rate": 8.659e-05, "token_acc": 0.83172656, "epoch": 2.76827897, "global_step/max_steps": "2461/8890", "percentage": "27.68%", "elapsed_time": "52m 23s", "remaining_time": "2h 16m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782895} {"loss": 0.48597676, "grad_norm": 2.14987278, "learning_rate": 8.657e-05, "token_acc": 0.84370678, "epoch": 2.76940382, "global_step/max_steps": "2462/8890", "percentage": "27.69%", "elapsed_time": "52m 24s", "remaining_time": "2h 16m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782921} {"loss": 0.61341655, "grad_norm": 2.29924512, "learning_rate": 8.656e-05, "token_acc": 0.79813665, "epoch": 2.77052868, "global_step/max_steps": "2463/8890", "percentage": "27.71%", "elapsed_time": "52m 25s", "remaining_time": "2h 16m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782946} {"loss": 0.5197953, "grad_norm": 2.46239591, "learning_rate": 8.655e-05, "token_acc": 0.84155456, "epoch": 2.77165354, "global_step/max_steps": "2464/8890", "percentage": "27.72%", "elapsed_time": "52m 26s", "remaining_time": "2h 16m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783027} {"loss": 0.56482208, "grad_norm": 2.38396072, "learning_rate": 8.653e-05, "token_acc": 0.8238842, "epoch": 2.7727784, "global_step/max_steps": "2465/8890", "percentage": "27.73%", "elapsed_time": "52m 27s", "remaining_time": "2h 16m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78309} {"loss": 0.49128842, "grad_norm": 2.03345346, "learning_rate": 8.652e-05, "token_acc": 0.84298332, "epoch": 2.77390326, "global_step/max_steps": "2466/8890", "percentage": "27.74%", "elapsed_time": "52m 28s", "remaining_time": "2h 16m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783147} {"loss": 0.52978659, "grad_norm": 2.33749008, "learning_rate": 8.651e-05, "token_acc": 0.81866667, "epoch": 2.77502812, "global_step/max_steps": "2467/8890", "percentage": "27.75%", "elapsed_time": "52m 29s", "remaining_time": "2h 16m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783184} {"loss": 0.66625369, "grad_norm": 2.40719104, "learning_rate": 8.65e-05, "token_acc": 0.79634146, "epoch": 2.77615298, "global_step/max_steps": "2468/8890", "percentage": "27.76%", "elapsed_time": "52m 31s", "remaining_time": "2h 16m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783218} {"loss": 0.63361734, "grad_norm": 2.01118684, "learning_rate": 8.648e-05, "token_acc": 0.82126899, "epoch": 2.77727784, "global_step/max_steps": "2469/8890", "percentage": "27.77%", "elapsed_time": "52m 32s", "remaining_time": "2h 16m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783246} {"loss": 0.58293515, "grad_norm": 2.24413037, "learning_rate": 8.647e-05, "token_acc": 0.81661273, "epoch": 2.7784027, "global_step/max_steps": "2470/8890", "percentage": "27.78%", "elapsed_time": "52m 33s", "remaining_time": "2h 16m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783247} {"loss": 0.54341108, "grad_norm": 2.08100724, "learning_rate": 8.646e-05, "token_acc": 0.81750267, "epoch": 2.77952756, "global_step/max_steps": "2471/8890", "percentage": "27.80%", "elapsed_time": "52m 34s", "remaining_time": "2h 16m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783224} {"loss": 0.51275647, "grad_norm": 2.3042388, "learning_rate": 8.645e-05, "token_acc": 0.83751846, "epoch": 2.78065242, "global_step/max_steps": "2472/8890", "percentage": "27.81%", "elapsed_time": "52m 35s", "remaining_time": "2h 16m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783283} {"loss": 0.62977517, "grad_norm": 2.37321329, "learning_rate": 8.643e-05, "token_acc": 0.81524548, "epoch": 2.78177728, "global_step/max_steps": "2473/8890", "percentage": "27.82%", "elapsed_time": "52m 37s", "remaining_time": "2h 16m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783319} {"loss": 0.62152958, "grad_norm": 2.32774186, "learning_rate": 8.642e-05, "token_acc": 0.80352941, "epoch": 2.78290214, "global_step/max_steps": "2474/8890", "percentage": "27.83%", "elapsed_time": "52m 38s", "remaining_time": "2h 16m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783352} {"loss": 0.54661649, "grad_norm": 1.94536781, "learning_rate": 8.641e-05, "token_acc": 0.81741573, "epoch": 2.784027, "global_step/max_steps": "2475/8890", "percentage": "27.84%", "elapsed_time": "52m 39s", "remaining_time": "2h 16m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783386} {"loss": 0.50127518, "grad_norm": 2.28716183, "learning_rate": 8.639e-05, "token_acc": 0.8372093, "epoch": 2.78515186, "global_step/max_steps": "2476/8890", "percentage": "27.85%", "elapsed_time": "52m 40s", "remaining_time": "2h 16m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783418} {"loss": 0.54173362, "grad_norm": 2.29632998, "learning_rate": 8.638e-05, "token_acc": 0.82513661, "epoch": 2.78627672, "global_step/max_steps": "2477/8890", "percentage": "27.86%", "elapsed_time": "52m 41s", "remaining_time": "2h 16m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783467} {"loss": 0.60046506, "grad_norm": 2.18986106, "learning_rate": 8.637e-05, "token_acc": 0.80631579, "epoch": 2.78740157, "global_step/max_steps": "2478/8890", "percentage": "27.87%", "elapsed_time": "52m 42s", "remaining_time": "2h 16m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783578} {"loss": 0.5153091, "grad_norm": 2.15193272, "learning_rate": 8.636e-05, "token_acc": 0.85615491, "epoch": 2.78852643, "global_step/max_steps": "2479/8890", "percentage": "27.89%", "elapsed_time": "52m 43s", "remaining_time": "2h 16m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783602} {"loss": 0.5457691, "grad_norm": 2.28932238, "learning_rate": 8.634e-05, "token_acc": 0.82352941, "epoch": 2.78965129, "global_step/max_steps": "2480/8890", "percentage": "27.90%", "elapsed_time": "52m 44s", "remaining_time": "2h 16m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783642} {"loss": 0.42535782, "grad_norm": 1.98842239, "learning_rate": 8.633e-05, "token_acc": 0.85385656, "epoch": 2.79077615, "global_step/max_steps": "2481/8890", "percentage": "27.91%", "elapsed_time": "52m 45s", "remaining_time": "2h 16m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783678} {"loss": 0.47123688, "grad_norm": 2.22977662, "learning_rate": 8.632e-05, "token_acc": 0.8537234, "epoch": 2.79190101, "global_step/max_steps": "2482/8890", "percentage": "27.92%", "elapsed_time": "52m 46s", "remaining_time": "2h 16m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783716} {"loss": 0.4741146, "grad_norm": 2.06307364, "learning_rate": 8.631e-05, "token_acc": 0.84624697, "epoch": 2.79302587, "global_step/max_steps": "2483/8890", "percentage": "27.93%", "elapsed_time": "52m 48s", "remaining_time": "2h 16m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783754} {"loss": 0.55622107, "grad_norm": 2.03753567, "learning_rate": 8.629e-05, "token_acc": 0.8204878, "epoch": 2.79415073, "global_step/max_steps": "2484/8890", "percentage": "27.94%", "elapsed_time": "52m 49s", "remaining_time": "2h 16m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783787} {"loss": 0.50408852, "grad_norm": 2.02484441, "learning_rate": 8.628e-05, "token_acc": 0.82566586, "epoch": 2.79527559, "global_step/max_steps": "2485/8890", "percentage": "27.95%", "elapsed_time": "52m 50s", "remaining_time": "2h 16m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783786} {"loss": 0.40416947, "grad_norm": 2.06744123, "learning_rate": 8.627e-05, "token_acc": 0.85343511, "epoch": 2.79640045, "global_step/max_steps": "2486/8890", "percentage": "27.96%", "elapsed_time": "52m 51s", "remaining_time": "2h 16m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783846} {"loss": 0.55971348, "grad_norm": 2.11650801, "learning_rate": 8.625e-05, "token_acc": 0.81938326, "epoch": 2.79752531, "global_step/max_steps": "2487/8890", "percentage": "27.98%", "elapsed_time": "52m 52s", "remaining_time": "2h 16m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78388} {"loss": 0.58755726, "grad_norm": 2.14330864, "learning_rate": 8.624e-05, "token_acc": 0.8165038, "epoch": 2.79865017, "global_step/max_steps": "2488/8890", "percentage": "27.99%", "elapsed_time": "52m 53s", "remaining_time": "2h 16m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783909} {"loss": 0.56979436, "grad_norm": 2.3902564, "learning_rate": 8.623e-05, "token_acc": 0.81891892, "epoch": 2.79977503, "global_step/max_steps": "2489/8890", "percentage": "28.00%", "elapsed_time": "52m 54s", "remaining_time": "2h 16m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783942} {"loss": 0.56124735, "grad_norm": 2.18747878, "learning_rate": 8.622e-05, "token_acc": 0.82766706, "epoch": 2.80089989, "global_step/max_steps": "2490/8890", "percentage": "28.01%", "elapsed_time": "52m 56s", "remaining_time": "2h 16m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783968} {"loss": 0.59125257, "grad_norm": 2.36786437, "learning_rate": 8.62e-05, "token_acc": 0.81891892, "epoch": 2.80202475, "global_step/max_steps": "2491/8890", "percentage": "28.02%", "elapsed_time": "52m 57s", "remaining_time": "2h 16m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784022} {"loss": 0.48885411, "grad_norm": 2.09271646, "learning_rate": 8.619e-05, "token_acc": 0.83333333, "epoch": 2.80314961, "global_step/max_steps": "2492/8890", "percentage": "28.03%", "elapsed_time": "52m 58s", "remaining_time": "2h 16m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784054} {"loss": 0.61276323, "grad_norm": 2.42881775, "learning_rate": 8.618e-05, "token_acc": 0.80190931, "epoch": 2.80427447, "global_step/max_steps": "2493/8890", "percentage": "28.04%", "elapsed_time": "52m 59s", "remaining_time": "2h 15m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784115} {"loss": 0.60138041, "grad_norm": 2.13394284, "learning_rate": 8.616e-05, "token_acc": 0.82220039, "epoch": 2.80539933, "global_step/max_steps": "2494/8890", "percentage": "28.05%", "elapsed_time": "53m 0s", "remaining_time": "2h 15m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784143} {"loss": 0.64017892, "grad_norm": 2.40499043, "learning_rate": 8.615e-05, "token_acc": 0.80184941, "epoch": 2.80652418, "global_step/max_steps": "2495/8890", "percentage": "28.07%", "elapsed_time": "53m 1s", "remaining_time": "2h 15m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784226} {"loss": 0.50648445, "grad_norm": 2.33965778, "learning_rate": 8.614e-05, "token_acc": 0.81937173, "epoch": 2.80764904, "global_step/max_steps": "2496/8890", "percentage": "28.08%", "elapsed_time": "53m 2s", "remaining_time": "2h 15m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784277} {"loss": 0.61038423, "grad_norm": 2.19388366, "learning_rate": 8.613e-05, "token_acc": 0.80548628, "epoch": 2.8087739, "global_step/max_steps": "2497/8890", "percentage": "28.09%", "elapsed_time": "53m 3s", "remaining_time": "2h 15m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784336} {"loss": 0.50080693, "grad_norm": 2.32120132, "learning_rate": 8.611e-05, "token_acc": 0.83462133, "epoch": 2.80989876, "global_step/max_steps": "2498/8890", "percentage": "28.10%", "elapsed_time": "53m 4s", "remaining_time": "2h 15m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784406} {"loss": 0.56477082, "grad_norm": 2.11773896, "learning_rate": 8.61e-05, "token_acc": 0.82773564, "epoch": 2.81102362, "global_step/max_steps": "2499/8890", "percentage": "28.11%", "elapsed_time": "53m 5s", "remaining_time": "2h 15m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784438} {"loss": 0.46272931, "grad_norm": 2.14971495, "learning_rate": 8.609e-05, "token_acc": 0.85164051, "epoch": 2.81214848, "global_step/max_steps": "2500/8890", "percentage": "28.12%", "elapsed_time": "53m 6s", "remaining_time": "2h 15m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78453} {"loss": 0.50795686, "grad_norm": 2.16008615, "learning_rate": 8.607e-05, "token_acc": 0.83879423, "epoch": 2.81327334, "global_step/max_steps": "2501/8890", "percentage": "28.13%", "elapsed_time": "53m 7s", "remaining_time": "2h 15m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784564} {"loss": 0.72074544, "grad_norm": 2.21434903, "learning_rate": 8.606e-05, "token_acc": 0.78230703, "epoch": 2.8143982, "global_step/max_steps": "2502/8890", "percentage": "28.14%", "elapsed_time": "53m 8s", "remaining_time": "2h 15m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784597} {"loss": 0.58231276, "grad_norm": 2.257195, "learning_rate": 8.605e-05, "token_acc": 0.81430364, "epoch": 2.81552306, "global_step/max_steps": "2503/8890", "percentage": "28.16%", "elapsed_time": "53m 10s", "remaining_time": "2h 15m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784632} {"loss": 0.56736958, "grad_norm": 2.02141094, "learning_rate": 8.604e-05, "token_acc": 0.82277121, "epoch": 2.81664792, "global_step/max_steps": "2504/8890", "percentage": "28.17%", "elapsed_time": "53m 11s", "remaining_time": "2h 15m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784664} {"loss": 0.81529188, "grad_norm": 2.43800592, "learning_rate": 8.602e-05, "token_acc": 0.76431425, "epoch": 2.81777278, "global_step/max_steps": "2505/8890", "percentage": "28.18%", "elapsed_time": "53m 12s", "remaining_time": "2h 15m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784661} {"loss": 0.60108054, "grad_norm": 2.07786322, "learning_rate": 8.601e-05, "token_acc": 0.79868709, "epoch": 2.81889764, "global_step/max_steps": "2506/8890", "percentage": "28.19%", "elapsed_time": "53m 13s", "remaining_time": "2h 15m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784684} {"loss": 0.52044535, "grad_norm": 1.8536737, "learning_rate": 8.6e-05, "token_acc": 0.83251232, "epoch": 2.8200225, "global_step/max_steps": "2507/8890", "percentage": "28.20%", "elapsed_time": "53m 14s", "remaining_time": "2h 15m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784709} {"loss": 0.53997123, "grad_norm": 2.1061883, "learning_rate": 8.598e-05, "token_acc": 0.82709251, "epoch": 2.82114736, "global_step/max_steps": "2508/8890", "percentage": "28.21%", "elapsed_time": "53m 15s", "remaining_time": "2h 15m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784766} {"loss": 0.47692055, "grad_norm": 2.16693044, "learning_rate": 8.597e-05, "token_acc": 0.84363178, "epoch": 2.82227222, "global_step/max_steps": "2509/8890", "percentage": "28.22%", "elapsed_time": "53m 16s", "remaining_time": "2h 15m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784829} {"loss": 0.58569074, "grad_norm": 2.17208195, "learning_rate": 8.596e-05, "token_acc": 0.81192189, "epoch": 2.82339708, "global_step/max_steps": "2510/8890", "percentage": "28.23%", "elapsed_time": "53m 17s", "remaining_time": "2h 15m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784885} {"loss": 0.43297458, "grad_norm": 1.86052787, "learning_rate": 8.595e-05, "token_acc": 0.85714286, "epoch": 2.82452193, "global_step/max_steps": "2511/8890", "percentage": "28.25%", "elapsed_time": "53m 19s", "remaining_time": "2h 15m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78491} {"loss": 0.48823288, "grad_norm": 2.20306301, "learning_rate": 8.593e-05, "token_acc": 0.85751979, "epoch": 2.82564679, "global_step/max_steps": "2512/8890", "percentage": "28.26%", "elapsed_time": "53m 20s", "remaining_time": "2h 15m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784967} {"loss": 0.520706, "grad_norm": 2.26844716, "learning_rate": 8.592e-05, "token_acc": 0.81657609, "epoch": 2.82677165, "global_step/max_steps": "2513/8890", "percentage": "28.27%", "elapsed_time": "53m 21s", "remaining_time": "2h 15m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784997} {"loss": 0.59774983, "grad_norm": 2.11830378, "learning_rate": 8.591e-05, "token_acc": 0.79802956, "epoch": 2.82789651, "global_step/max_steps": "2514/8890", "percentage": "28.28%", "elapsed_time": "53m 22s", "remaining_time": "2h 15m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784892} {"loss": 0.5435729, "grad_norm": 2.02696705, "learning_rate": 8.589e-05, "token_acc": 0.82128099, "epoch": 2.82902137, "global_step/max_steps": "2515/8890", "percentage": "28.29%", "elapsed_time": "53m 23s", "remaining_time": "2h 15m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78497} {"loss": 0.46267942, "grad_norm": 2.11800742, "learning_rate": 8.588e-05, "token_acc": 0.85365854, "epoch": 2.83014623, "global_step/max_steps": "2516/8890", "percentage": "28.30%", "elapsed_time": "53m 25s", "remaining_time": "2h 15m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784992} {"loss": 0.51122117, "grad_norm": 2.09613848, "learning_rate": 8.587e-05, "token_acc": 0.84152334, "epoch": 2.83127109, "global_step/max_steps": "2517/8890", "percentage": "28.31%", "elapsed_time": "53m 26s", "remaining_time": "2h 15m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785046} {"loss": 0.49243245, "grad_norm": 1.74811447, "learning_rate": 8.585e-05, "token_acc": 0.85425812, "epoch": 2.83239595, "global_step/max_steps": "2518/8890", "percentage": "28.32%", "elapsed_time": "53m 27s", "remaining_time": "2h 15m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785074} {"loss": 0.45111859, "grad_norm": 2.02674556, "learning_rate": 8.584e-05, "token_acc": 0.8568306, "epoch": 2.83352081, "global_step/max_steps": "2519/8890", "percentage": "28.34%", "elapsed_time": "53m 28s", "remaining_time": "2h 15m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785103} {"loss": 0.48063758, "grad_norm": 1.76985312, "learning_rate": 8.583e-05, "token_acc": 0.86139535, "epoch": 2.83464567, "global_step/max_steps": "2520/8890", "percentage": "28.35%", "elapsed_time": "53m 29s", "remaining_time": "2h 15m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785131} {"loss": 0.55855656, "grad_norm": 2.10870552, "learning_rate": 8.582e-05, "token_acc": 0.83376963, "epoch": 2.83577053, "global_step/max_steps": "2521/8890", "percentage": "28.36%", "elapsed_time": "53m 30s", "remaining_time": "2h 15m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785155} {"loss": 0.57176411, "grad_norm": 2.06269646, "learning_rate": 8.58e-05, "token_acc": 0.82058288, "epoch": 2.83689539, "global_step/max_steps": "2522/8890", "percentage": "28.37%", "elapsed_time": "53m 31s", "remaining_time": "2h 15m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785182} {"loss": 0.53086841, "grad_norm": 2.21341085, "learning_rate": 8.579e-05, "token_acc": 0.83984868, "epoch": 2.83802025, "global_step/max_steps": "2523/8890", "percentage": "28.38%", "elapsed_time": "53m 33s", "remaining_time": "2h 15m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785228} {"loss": 0.52101386, "grad_norm": 2.39609742, "learning_rate": 8.578e-05, "token_acc": 0.82320442, "epoch": 2.83914511, "global_step/max_steps": "2524/8890", "percentage": "28.39%", "elapsed_time": "53m 34s", "remaining_time": "2h 15m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785261} {"loss": 0.54621971, "grad_norm": 2.24753428, "learning_rate": 8.576e-05, "token_acc": 0.8379085, "epoch": 2.84026997, "global_step/max_steps": "2525/8890", "percentage": "28.40%", "elapsed_time": "53m 35s", "remaining_time": "2h 15m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785299} {"loss": 0.44994327, "grad_norm": 1.81816387, "learning_rate": 8.575e-05, "token_acc": 0.85402185, "epoch": 2.84139483, "global_step/max_steps": "2526/8890", "percentage": "28.41%", "elapsed_time": "53m 36s", "remaining_time": "2h 15m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785354} {"loss": 0.54413795, "grad_norm": 1.90831649, "learning_rate": 8.574e-05, "token_acc": 0.82580645, "epoch": 2.84251969, "global_step/max_steps": "2527/8890", "percentage": "28.43%", "elapsed_time": "53m 37s", "remaining_time": "2h 15m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785348} {"loss": 0.63077754, "grad_norm": 2.09213376, "learning_rate": 8.572e-05, "token_acc": 0.81372549, "epoch": 2.84364454, "global_step/max_steps": "2528/8890", "percentage": "28.44%", "elapsed_time": "53m 38s", "remaining_time": "2h 15m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785376} {"loss": 0.52652788, "grad_norm": 2.08458257, "learning_rate": 8.571e-05, "token_acc": 0.83011583, "epoch": 2.8447694, "global_step/max_steps": "2529/8890", "percentage": "28.45%", "elapsed_time": "53m 39s", "remaining_time": "2h 14m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785408} {"loss": 0.49719602, "grad_norm": 2.40569973, "learning_rate": 8.57e-05, "token_acc": 0.8361204, "epoch": 2.84589426, "global_step/max_steps": "2530/8890", "percentage": "28.46%", "elapsed_time": "53m 41s", "remaining_time": "2h 14m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785439} {"loss": 0.36322454, "grad_norm": 1.80082881, "learning_rate": 8.569e-05, "token_acc": 0.89159562, "epoch": 2.84701912, "global_step/max_steps": "2531/8890", "percentage": "28.47%", "elapsed_time": "53m 42s", "remaining_time": "2h 14m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785494} {"loss": 0.4984771, "grad_norm": 2.05495238, "learning_rate": 8.567e-05, "token_acc": 0.83786982, "epoch": 2.84814398, "global_step/max_steps": "2532/8890", "percentage": "28.48%", "elapsed_time": "53m 43s", "remaining_time": "2h 14m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785571} {"loss": 0.65555757, "grad_norm": 2.45998621, "learning_rate": 8.566e-05, "token_acc": 0.82416268, "epoch": 2.84926884, "global_step/max_steps": "2533/8890", "percentage": "28.49%", "elapsed_time": "53m 44s", "remaining_time": "2h 14m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785625} {"loss": 0.64766473, "grad_norm": 2.01205969, "learning_rate": 8.565e-05, "token_acc": 0.80259222, "epoch": 2.8503937, "global_step/max_steps": "2534/8890", "percentage": "28.50%", "elapsed_time": "53m 45s", "remaining_time": "2h 14m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785652} {"loss": 0.55929363, "grad_norm": 2.38060331, "learning_rate": 8.563e-05, "token_acc": 0.82816901, "epoch": 2.85151856, "global_step/max_steps": "2535/8890", "percentage": "28.52%", "elapsed_time": "53m 46s", "remaining_time": "2h 14m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785698} {"loss": 0.5969913, "grad_norm": 2.02777719, "learning_rate": 8.562e-05, "token_acc": 0.82556987, "epoch": 2.85264342, "global_step/max_steps": "2536/8890", "percentage": "28.53%", "elapsed_time": "53m 47s", "remaining_time": "2h 14m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785722} {"loss": 0.54123271, "grad_norm": 2.17069411, "learning_rate": 8.561e-05, "token_acc": 0.82829374, "epoch": 2.85376828, "global_step/max_steps": "2537/8890", "percentage": "28.54%", "elapsed_time": "53m 48s", "remaining_time": "2h 14m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785748} {"loss": 0.45255175, "grad_norm": 1.94744432, "learning_rate": 8.559e-05, "token_acc": 0.84624846, "epoch": 2.85489314, "global_step/max_steps": "2538/8890", "percentage": "28.55%", "elapsed_time": "53m 49s", "remaining_time": "2h 14m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785796} {"loss": 0.46946028, "grad_norm": 1.92331946, "learning_rate": 8.558e-05, "token_acc": 0.85374554, "epoch": 2.856018, "global_step/max_steps": "2539/8890", "percentage": "28.56%", "elapsed_time": "53m 51s", "remaining_time": "2h 14m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785788} {"loss": 0.63696378, "grad_norm": 2.34758091, "learning_rate": 8.557e-05, "token_acc": 0.8090011, "epoch": 2.85714286, "global_step/max_steps": "2540/8890", "percentage": "28.57%", "elapsed_time": "53m 52s", "remaining_time": "2h 14m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785823} {"loss": 0.63444167, "grad_norm": 2.520612, "learning_rate": 8.556e-05, "token_acc": 0.80658436, "epoch": 2.85826772, "global_step/max_steps": "2541/8890", "percentage": "28.58%", "elapsed_time": "53m 53s", "remaining_time": "2h 14m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78588} {"loss": 0.63594091, "grad_norm": 2.21298909, "learning_rate": 8.554e-05, "token_acc": 0.79691517, "epoch": 2.85939258, "global_step/max_steps": "2542/8890", "percentage": "28.59%", "elapsed_time": "53m 54s", "remaining_time": "2h 14m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785915} {"loss": 0.59488809, "grad_norm": 1.9661907, "learning_rate": 8.553e-05, "token_acc": 0.80685921, "epoch": 2.86051744, "global_step/max_steps": "2543/8890", "percentage": "28.61%", "elapsed_time": "53m 55s", "remaining_time": "2h 14m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786} {"loss": 0.62903929, "grad_norm": 2.2615273, "learning_rate": 8.552e-05, "token_acc": 0.80023095, "epoch": 2.86164229, "global_step/max_steps": "2544/8890", "percentage": "28.62%", "elapsed_time": "53m 57s", "remaining_time": "2h 14m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785867} {"loss": 0.65480119, "grad_norm": 2.60515594, "learning_rate": 8.55e-05, "token_acc": 0.78097622, "epoch": 2.86276715, "global_step/max_steps": "2545/8890", "percentage": "28.63%", "elapsed_time": "53m 58s", "remaining_time": "2h 14m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785898} {"loss": 0.64609921, "grad_norm": 2.32018232, "learning_rate": 8.549e-05, "token_acc": 0.78586279, "epoch": 2.86389201, "global_step/max_steps": "2546/8890", "percentage": "28.64%", "elapsed_time": "53m 59s", "remaining_time": "2h 14m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785988} {"loss": 0.55743152, "grad_norm": 2.23453832, "learning_rate": 8.548e-05, "token_acc": 0.80715198, "epoch": 2.86501687, "global_step/max_steps": "2547/8890", "percentage": "28.65%", "elapsed_time": "54m 0s", "remaining_time": "2h 14m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786037} {"loss": 0.71126527, "grad_norm": 2.60811162, "learning_rate": 8.546e-05, "token_acc": 0.78103837, "epoch": 2.86614173, "global_step/max_steps": "2548/8890", "percentage": "28.66%", "elapsed_time": "54m 1s", "remaining_time": "2h 14m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786116} {"loss": 0.52268863, "grad_norm": 2.18764043, "learning_rate": 8.545e-05, "token_acc": 0.84171779, "epoch": 2.86726659, "global_step/max_steps": "2549/8890", "percentage": "28.67%", "elapsed_time": "54m 2s", "remaining_time": "2h 14m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786147} {"loss": 0.60985374, "grad_norm": 2.08286691, "learning_rate": 8.544e-05, "token_acc": 0.81469298, "epoch": 2.86839145, "global_step/max_steps": "2550/8890", "percentage": "28.68%", "elapsed_time": "54m 3s", "remaining_time": "2h 14m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786183} {"loss": 0.51157892, "grad_norm": 1.86068881, "learning_rate": 8.542e-05, "token_acc": 0.8414496, "epoch": 2.86951631, "global_step/max_steps": "2551/8890", "percentage": "28.70%", "elapsed_time": "54m 4s", "remaining_time": "2h 14m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786229} {"loss": 0.45590711, "grad_norm": 2.24330187, "learning_rate": 8.541e-05, "token_acc": 0.84202212, "epoch": 2.87064117, "global_step/max_steps": "2552/8890", "percentage": "28.71%", "elapsed_time": "54m 5s", "remaining_time": "2h 14m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786257} {"loss": 0.49691516, "grad_norm": 2.01792765, "learning_rate": 8.54e-05, "token_acc": 0.8344519, "epoch": 2.87176603, "global_step/max_steps": "2553/8890", "percentage": "28.72%", "elapsed_time": "54m 6s", "remaining_time": "2h 14m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786288} {"loss": 0.5590269, "grad_norm": 2.18440723, "learning_rate": 8.538e-05, "token_acc": 0.8283208, "epoch": 2.87289089, "global_step/max_steps": "2554/8890", "percentage": "28.73%", "elapsed_time": "54m 7s", "remaining_time": "2h 14m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786336} {"loss": 0.55521488, "grad_norm": 2.44745874, "learning_rate": 8.537e-05, "token_acc": 0.81395349, "epoch": 2.87401575, "global_step/max_steps": "2555/8890", "percentage": "28.74%", "elapsed_time": "54m 9s", "remaining_time": "2h 14m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78637} {"loss": 0.59020472, "grad_norm": 2.28570819, "learning_rate": 8.536e-05, "token_acc": 0.8254172, "epoch": 2.87514061, "global_step/max_steps": "2556/8890", "percentage": "28.75%", "elapsed_time": "54m 10s", "remaining_time": "2h 14m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7864} {"loss": 0.60340738, "grad_norm": 2.51008177, "learning_rate": 8.535e-05, "token_acc": 0.80025608, "epoch": 2.87626547, "global_step/max_steps": "2557/8890", "percentage": "28.76%", "elapsed_time": "54m 11s", "remaining_time": "2h 14m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786439} {"loss": 0.70123279, "grad_norm": 2.1232214, "learning_rate": 8.533e-05, "token_acc": 0.79650092, "epoch": 2.87739033, "global_step/max_steps": "2558/8890", "percentage": "28.77%", "elapsed_time": "54m 12s", "remaining_time": "2h 14m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786486} {"loss": 0.56526744, "grad_norm": 2.11953425, "learning_rate": 8.532e-05, "token_acc": 0.81090909, "epoch": 2.87851519, "global_step/max_steps": "2559/8890", "percentage": "28.79%", "elapsed_time": "54m 13s", "remaining_time": "2h 14m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786546} {"loss": 0.39900118, "grad_norm": 1.96249294, "learning_rate": 8.531e-05, "token_acc": 0.87516088, "epoch": 2.87964004, "global_step/max_steps": "2560/8890", "percentage": "28.80%", "elapsed_time": "54m 14s", "remaining_time": "2h 14m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786575} {"loss": 0.60107362, "grad_norm": 2.28286052, "learning_rate": 8.529e-05, "token_acc": 0.79902558, "epoch": 2.8807649, "global_step/max_steps": "2561/8890", "percentage": "28.81%", "elapsed_time": "54m 15s", "remaining_time": "2h 14m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786627} {"loss": 0.56243241, "grad_norm": 1.92559612, "learning_rate": 8.528e-05, "token_acc": 0.82082082, "epoch": 2.88188976, "global_step/max_steps": "2562/8890", "percentage": "28.82%", "elapsed_time": "54m 16s", "remaining_time": "2h 14m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786661} {"loss": 0.5914343, "grad_norm": 1.96008277, "learning_rate": 8.527e-05, "token_acc": 0.8086785, "epoch": 2.88301462, "global_step/max_steps": "2563/8890", "percentage": "28.83%", "elapsed_time": "54m 17s", "remaining_time": "2h 14m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786687} {"loss": 0.53803396, "grad_norm": 2.16912389, "learning_rate": 8.525e-05, "token_acc": 0.85317919, "epoch": 2.88413948, "global_step/max_steps": "2564/8890", "percentage": "28.84%", "elapsed_time": "54m 19s", "remaining_time": "2h 14m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786723} {"loss": 0.49421507, "grad_norm": 2.1060102, "learning_rate": 8.524e-05, "token_acc": 0.84409257, "epoch": 2.88526434, "global_step/max_steps": "2565/8890", "percentage": "28.85%", "elapsed_time": "54m 20s", "remaining_time": "2h 13m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786756} {"loss": 0.6238324, "grad_norm": 1.94141877, "learning_rate": 8.523e-05, "token_acc": 0.80440529, "epoch": 2.8863892, "global_step/max_steps": "2566/8890", "percentage": "28.86%", "elapsed_time": "54m 21s", "remaining_time": "2h 13m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786746} {"loss": 0.51086318, "grad_norm": 2.03404927, "learning_rate": 8.521e-05, "token_acc": 0.82442748, "epoch": 2.88751406, "global_step/max_steps": "2567/8890", "percentage": "28.88%", "elapsed_time": "54m 22s", "remaining_time": "2h 13m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786782} {"loss": 0.44498941, "grad_norm": 2.13461423, "learning_rate": 8.52e-05, "token_acc": 0.83825598, "epoch": 2.88863892, "global_step/max_steps": "2568/8890", "percentage": "28.89%", "elapsed_time": "54m 23s", "remaining_time": "2h 13m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786834} {"loss": 0.59609032, "grad_norm": 2.18452716, "learning_rate": 8.519e-05, "token_acc": 0.83501684, "epoch": 2.88976378, "global_step/max_steps": "2569/8890", "percentage": "28.90%", "elapsed_time": "54m 24s", "remaining_time": "2h 13m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78687} {"loss": 0.35248891, "grad_norm": 1.94596267, "learning_rate": 8.517e-05, "token_acc": 0.87262079, "epoch": 2.89088864, "global_step/max_steps": "2570/8890", "percentage": "28.91%", "elapsed_time": "54m 25s", "remaining_time": "2h 13m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786931} {"loss": 0.59176564, "grad_norm": 2.44282627, "learning_rate": 8.516e-05, "token_acc": 0.80678851, "epoch": 2.8920135, "global_step/max_steps": "2571/8890", "percentage": "28.92%", "elapsed_time": "54m 27s", "remaining_time": "2h 13m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786959} {"loss": 0.48779523, "grad_norm": 1.93837905, "learning_rate": 8.515e-05, "token_acc": 0.83556012, "epoch": 2.89313836, "global_step/max_steps": "2572/8890", "percentage": "28.93%", "elapsed_time": "54m 28s", "remaining_time": "2h 13m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786982} {"loss": 0.67356104, "grad_norm": 2.20689869, "learning_rate": 8.513e-05, "token_acc": 0.78301887, "epoch": 2.89426322, "global_step/max_steps": "2573/8890", "percentage": "28.94%", "elapsed_time": "54m 29s", "remaining_time": "2h 13m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786984} {"loss": 0.57575917, "grad_norm": 1.92628825, "learning_rate": 8.512e-05, "token_acc": 0.823049, "epoch": 2.89538808, "global_step/max_steps": "2574/8890", "percentage": "28.95%", "elapsed_time": "54m 30s", "remaining_time": "2h 13m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787008} {"loss": 0.36602336, "grad_norm": 1.99486494, "learning_rate": 8.511e-05, "token_acc": 0.87630402, "epoch": 2.89651294, "global_step/max_steps": "2575/8890", "percentage": "28.97%", "elapsed_time": "54m 31s", "remaining_time": "2h 13m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787056} {"loss": 0.43752074, "grad_norm": 1.88868392, "learning_rate": 8.509e-05, "token_acc": 0.86017897, "epoch": 2.8976378, "global_step/max_steps": "2576/8890", "percentage": "28.98%", "elapsed_time": "54m 32s", "remaining_time": "2h 13m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787093} {"loss": 0.50122899, "grad_norm": 1.82797563, "learning_rate": 8.508e-05, "token_acc": 0.842871, "epoch": 2.89876265, "global_step/max_steps": "2577/8890", "percentage": "28.99%", "elapsed_time": "54m 33s", "remaining_time": "2h 13m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787128} {"loss": 0.39449954, "grad_norm": 1.77311206, "learning_rate": 8.507e-05, "token_acc": 0.87839102, "epoch": 2.89988751, "global_step/max_steps": "2578/8890", "percentage": "29.00%", "elapsed_time": "54m 34s", "remaining_time": "2h 13m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787187} {"loss": 0.59187651, "grad_norm": 2.15575504, "learning_rate": 8.505e-05, "token_acc": 0.81072874, "epoch": 2.90101237, "global_step/max_steps": "2579/8890", "percentage": "29.01%", "elapsed_time": "54m 36s", "remaining_time": "2h 13m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787215} {"loss": 0.4091031, "grad_norm": 1.8232137, "learning_rate": 8.504e-05, "token_acc": 0.86345776, "epoch": 2.90213723, "global_step/max_steps": "2580/8890", "percentage": "29.02%", "elapsed_time": "54m 37s", "remaining_time": "2h 13m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787246} {"loss": 0.55522561, "grad_norm": 2.05363703, "learning_rate": 8.503e-05, "token_acc": 0.84675073, "epoch": 2.90326209, "global_step/max_steps": "2581/8890", "percentage": "29.03%", "elapsed_time": "54m 38s", "remaining_time": "2h 13m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7873} {"loss": 0.70032907, "grad_norm": 2.22886395, "learning_rate": 8.502e-05, "token_acc": 0.78611111, "epoch": 2.90438695, "global_step/max_steps": "2582/8890", "percentage": "29.04%", "elapsed_time": "54m 39s", "remaining_time": "2h 13m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787325} {"loss": 0.43541583, "grad_norm": 2.20945168, "learning_rate": 8.5e-05, "token_acc": 0.85290323, "epoch": 2.90551181, "global_step/max_steps": "2583/8890", "percentage": "29.06%", "elapsed_time": "54m 40s", "remaining_time": "2h 13m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787371} {"loss": 0.66737902, "grad_norm": 2.50791955, "learning_rate": 8.499e-05, "token_acc": 0.8115942, "epoch": 2.90663667, "global_step/max_steps": "2584/8890", "percentage": "29.07%", "elapsed_time": "54m 41s", "remaining_time": "2h 13m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787425} {"loss": 0.59558415, "grad_norm": 2.08363748, "learning_rate": 8.498e-05, "token_acc": 0.81290926, "epoch": 2.90776153, "global_step/max_steps": "2585/8890", "percentage": "29.08%", "elapsed_time": "54m 42s", "remaining_time": "2h 13m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787422} {"loss": 0.56596911, "grad_norm": 2.1955018, "learning_rate": 8.496e-05, "token_acc": 0.83313749, "epoch": 2.90888639, "global_step/max_steps": "2586/8890", "percentage": "29.09%", "elapsed_time": "54m 44s", "remaining_time": "2h 13m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787446} {"loss": 0.4557727, "grad_norm": 1.73887515, "learning_rate": 8.495e-05, "token_acc": 0.84948097, "epoch": 2.91001125, "global_step/max_steps": "2587/8890", "percentage": "29.10%", "elapsed_time": "54m 45s", "remaining_time": "2h 13m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787479} {"loss": 0.59943277, "grad_norm": 2.14649391, "learning_rate": 8.494e-05, "token_acc": 0.80406654, "epoch": 2.91113611, "global_step/max_steps": "2588/8890", "percentage": "29.11%", "elapsed_time": "54m 46s", "remaining_time": "2h 13m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787509} {"loss": 0.50348091, "grad_norm": 2.30586767, "learning_rate": 8.492e-05, "token_acc": 0.83662714, "epoch": 2.91226097, "global_step/max_steps": "2589/8890", "percentage": "29.12%", "elapsed_time": "54m 47s", "remaining_time": "2h 13m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78754} {"loss": 0.53668672, "grad_norm": 2.38729525, "learning_rate": 8.491e-05, "token_acc": 0.82744565, "epoch": 2.91338583, "global_step/max_steps": "2590/8890", "percentage": "29.13%", "elapsed_time": "54m 48s", "remaining_time": "2h 13m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787571} {"loss": 0.56529504, "grad_norm": 2.27282381, "learning_rate": 8.49e-05, "token_acc": 0.82623318, "epoch": 2.91451069, "global_step/max_steps": "2591/8890", "percentage": "29.15%", "elapsed_time": "54m 49s", "remaining_time": "2h 13m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7876} {"loss": 0.66693795, "grad_norm": 2.42287445, "learning_rate": 8.488e-05, "token_acc": 0.7832618, "epoch": 2.91563555, "global_step/max_steps": "2592/8890", "percentage": "29.16%", "elapsed_time": "54m 50s", "remaining_time": "2h 13m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787623} {"loss": 0.67063904, "grad_norm": 2.28258061, "learning_rate": 8.487e-05, "token_acc": 0.79431072, "epoch": 2.9167604, "global_step/max_steps": "2593/8890", "percentage": "29.17%", "elapsed_time": "54m 51s", "remaining_time": "2h 13m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787671} {"loss": 0.53256476, "grad_norm": 2.15095544, "learning_rate": 8.486e-05, "token_acc": 0.82762097, "epoch": 2.91788526, "global_step/max_steps": "2594/8890", "percentage": "29.18%", "elapsed_time": "54m 53s", "remaining_time": "2h 13m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7877} {"loss": 0.42761311, "grad_norm": 2.03717971, "learning_rate": 8.484e-05, "token_acc": 0.85301508, "epoch": 2.91901012, "global_step/max_steps": "2595/8890", "percentage": "29.19%", "elapsed_time": "54m 54s", "remaining_time": "2h 13m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787735} {"loss": 0.61358488, "grad_norm": 2.29237795, "learning_rate": 8.483e-05, "token_acc": 0.80065717, "epoch": 2.92013498, "global_step/max_steps": "2596/8890", "percentage": "29.20%", "elapsed_time": "54m 55s", "remaining_time": "2h 13m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787756} {"loss": 0.32434338, "grad_norm": 1.87761354, "learning_rate": 8.482e-05, "token_acc": 0.89114659, "epoch": 2.92125984, "global_step/max_steps": "2597/8890", "percentage": "29.21%", "elapsed_time": "54m 56s", "remaining_time": "2h 13m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787783} {"loss": 0.51726389, "grad_norm": 2.35221052, "learning_rate": 8.48e-05, "token_acc": 0.84329349, "epoch": 2.9223847, "global_step/max_steps": "2598/8890", "percentage": "29.22%", "elapsed_time": "54m 57s", "remaining_time": "2h 13m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787829} {"loss": 0.66778851, "grad_norm": 2.31534004, "learning_rate": 8.479e-05, "token_acc": 0.78579117, "epoch": 2.92350956, "global_step/max_steps": "2599/8890", "percentage": "29.24%", "elapsed_time": "54m 59s", "remaining_time": "2h 13m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787739} {"loss": 0.49926069, "grad_norm": 1.9701159, "learning_rate": 8.478e-05, "token_acc": 0.85046729, "epoch": 2.92463442, "global_step/max_steps": "2600/8890", "percentage": "29.25%", "elapsed_time": "55m 0s", "remaining_time": "2h 13m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787735} {"loss": 0.59597695, "grad_norm": 2.19707251, "learning_rate": 8.476e-05, "token_acc": 0.82978723, "epoch": 2.92575928, "global_step/max_steps": "2601/8890", "percentage": "29.26%", "elapsed_time": "55m 1s", "remaining_time": "2h 13m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787761} {"loss": 0.55183446, "grad_norm": 2.36654568, "learning_rate": 8.475e-05, "token_acc": 0.83282675, "epoch": 2.92688414, "global_step/max_steps": "2602/8890", "percentage": "29.27%", "elapsed_time": "55m 2s", "remaining_time": "2h 13m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787811} {"loss": 0.58077157, "grad_norm": 2.23979735, "learning_rate": 8.474e-05, "token_acc": 0.81949934, "epoch": 2.928009, "global_step/max_steps": "2603/8890", "percentage": "29.28%", "elapsed_time": "55m 3s", "remaining_time": "2h 13m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787838} {"loss": 0.57199299, "grad_norm": 2.04170275, "learning_rate": 8.472e-05, "token_acc": 0.81075269, "epoch": 2.92913386, "global_step/max_steps": "2604/8890", "percentage": "29.29%", "elapsed_time": "55m 5s", "remaining_time": "2h 12m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787815} {"loss": 0.7009865, "grad_norm": 2.18892217, "learning_rate": 8.471e-05, "token_acc": 0.79882812, "epoch": 2.93025872, "global_step/max_steps": "2605/8890", "percentage": "29.30%", "elapsed_time": "55m 6s", "remaining_time": "2h 12m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787872} {"loss": 0.47198164, "grad_norm": 1.99503052, "learning_rate": 8.469e-05, "token_acc": 0.84840764, "epoch": 2.93138358, "global_step/max_steps": "2606/8890", "percentage": "29.31%", "elapsed_time": "55m 7s", "remaining_time": "2h 12m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787918} {"loss": 0.6035136, "grad_norm": 2.198879, "learning_rate": 8.468e-05, "token_acc": 0.80242424, "epoch": 2.93250844, "global_step/max_steps": "2607/8890", "percentage": "29.33%", "elapsed_time": "55m 8s", "remaining_time": "2h 12m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787906} {"loss": 0.48232371, "grad_norm": 2.15159655, "learning_rate": 8.467e-05, "token_acc": 0.83312422, "epoch": 2.9336333, "global_step/max_steps": "2608/8890", "percentage": "29.34%", "elapsed_time": "55m 9s", "remaining_time": "2h 12m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787967} {"loss": 0.60372156, "grad_norm": 1.75836849, "learning_rate": 8.465e-05, "token_acc": 0.81687898, "epoch": 2.93475816, "global_step/max_steps": "2609/8890", "percentage": "29.35%", "elapsed_time": "55m 11s", "remaining_time": "2h 12m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787963} {"loss": 0.57534879, "grad_norm": 2.3240118, "learning_rate": 8.464e-05, "token_acc": 0.81850117, "epoch": 2.93588301, "global_step/max_steps": "2610/8890", "percentage": "29.36%", "elapsed_time": "55m 12s", "remaining_time": "2h 12m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788012} {"loss": 0.63085413, "grad_norm": 2.56335092, "learning_rate": 8.463e-05, "token_acc": 0.82190132, "epoch": 2.93700787, "global_step/max_steps": "2611/8890", "percentage": "29.37%", "elapsed_time": "55m 13s", "remaining_time": "2h 12m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788038} {"loss": 0.5193845, "grad_norm": 2.28743315, "learning_rate": 8.461e-05, "token_acc": 0.82689747, "epoch": 2.93813273, "global_step/max_steps": "2612/8890", "percentage": "29.38%", "elapsed_time": "55m 14s", "remaining_time": "2h 12m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788066} {"loss": 0.52806985, "grad_norm": 1.9091022, "learning_rate": 8.46e-05, "token_acc": 0.84215501, "epoch": 2.93925759, "global_step/max_steps": "2613/8890", "percentage": "29.39%", "elapsed_time": "55m 15s", "remaining_time": "2h 12m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788101} {"loss": 0.64587104, "grad_norm": 2.37152243, "learning_rate": 8.459e-05, "token_acc": 0.80673317, "epoch": 2.94038245, "global_step/max_steps": "2614/8890", "percentage": "29.40%", "elapsed_time": "55m 17s", "remaining_time": "2h 12m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788024} {"loss": 0.5945825, "grad_norm": 2.31016135, "learning_rate": 8.457e-05, "token_acc": 0.80548926, "epoch": 2.94150731, "global_step/max_steps": "2615/8890", "percentage": "29.42%", "elapsed_time": "55m 18s", "remaining_time": "2h 12m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788076} {"loss": 0.55277985, "grad_norm": 2.02149773, "learning_rate": 8.456e-05, "token_acc": 0.83174603, "epoch": 2.94263217, "global_step/max_steps": "2616/8890", "percentage": "29.43%", "elapsed_time": "55m 19s", "remaining_time": "2h 12m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788132} {"loss": 0.59306592, "grad_norm": 2.18079042, "learning_rate": 8.455e-05, "token_acc": 0.81425486, "epoch": 2.94375703, "global_step/max_steps": "2617/8890", "percentage": "29.44%", "elapsed_time": "55m 20s", "remaining_time": "2h 12m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788167} {"loss": 0.60996765, "grad_norm": 2.27110052, "learning_rate": 8.453e-05, "token_acc": 0.79855247, "epoch": 2.94488189, "global_step/max_steps": "2618/8890", "percentage": "29.45%", "elapsed_time": "55m 21s", "remaining_time": "2h 12m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788198} {"loss": 0.48564991, "grad_norm": 2.17284584, "learning_rate": 8.452e-05, "token_acc": 0.84015852, "epoch": 2.94600675, "global_step/max_steps": "2619/8890", "percentage": "29.46%", "elapsed_time": "55m 22s", "remaining_time": "2h 12m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788258} {"loss": 0.62761247, "grad_norm": 1.95058501, "learning_rate": 8.451e-05, "token_acc": 0.79697987, "epoch": 2.94713161, "global_step/max_steps": "2620/8890", "percentage": "29.47%", "elapsed_time": "55m 23s", "remaining_time": "2h 12m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788281} {"loss": 0.67425185, "grad_norm": 2.14888167, "learning_rate": 8.449e-05, "token_acc": 0.78860104, "epoch": 2.94825647, "global_step/max_steps": "2621/8890", "percentage": "29.48%", "elapsed_time": "55m 24s", "remaining_time": "2h 12m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788273} {"loss": 0.55039692, "grad_norm": 1.94867158, "learning_rate": 8.448e-05, "token_acc": 0.82208029, "epoch": 2.94938133, "global_step/max_steps": "2622/8890", "percentage": "29.49%", "elapsed_time": "55m 26s", "remaining_time": "2h 12m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788265} {"loss": 0.51320672, "grad_norm": 1.98615777, "learning_rate": 8.447e-05, "token_acc": 0.83482143, "epoch": 2.95050619, "global_step/max_steps": "2623/8890", "percentage": "29.51%", "elapsed_time": "55m 27s", "remaining_time": "2h 12m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788311} {"loss": 0.66289032, "grad_norm": 2.12245703, "learning_rate": 8.445e-05, "token_acc": 0.80701754, "epoch": 2.95163105, "global_step/max_steps": "2624/8890", "percentage": "29.52%", "elapsed_time": "55m 28s", "remaining_time": "2h 12m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788301} {"loss": 0.36084241, "grad_norm": 1.95065486, "learning_rate": 8.444e-05, "token_acc": 0.88888889, "epoch": 2.95275591, "global_step/max_steps": "2625/8890", "percentage": "29.53%", "elapsed_time": "55m 29s", "remaining_time": "2h 12m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788378} {"loss": 0.35976571, "grad_norm": 1.76725459, "learning_rate": 8.443e-05, "token_acc": 0.87944162, "epoch": 2.95388076, "global_step/max_steps": "2626/8890", "percentage": "29.54%", "elapsed_time": "55m 30s", "remaining_time": "2h 12m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788461} {"loss": 0.37062827, "grad_norm": 1.57739365, "learning_rate": 8.441e-05, "token_acc": 0.8700291, "epoch": 2.95500562, "global_step/max_steps": "2627/8890", "percentage": "29.55%", "elapsed_time": "55m 31s", "remaining_time": "2h 12m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788483} {"loss": 0.6470179, "grad_norm": 2.552495, "learning_rate": 8.44e-05, "token_acc": 0.81069959, "epoch": 2.95613048, "global_step/max_steps": "2628/8890", "percentage": "29.56%", "elapsed_time": "55m 32s", "remaining_time": "2h 12m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788535} {"loss": 0.57369149, "grad_norm": 2.19455361, "learning_rate": 8.439e-05, "token_acc": 0.80582524, "epoch": 2.95725534, "global_step/max_steps": "2629/8890", "percentage": "29.57%", "elapsed_time": "55m 33s", "remaining_time": "2h 12m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788565} {"loss": 0.58603173, "grad_norm": 2.00538158, "learning_rate": 8.437e-05, "token_acc": 0.80411361, "epoch": 2.9583802, "global_step/max_steps": "2630/8890", "percentage": "29.58%", "elapsed_time": "55m 35s", "remaining_time": "2h 12m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788599} {"loss": 0.65665746, "grad_norm": 1.81128287, "learning_rate": 8.436e-05, "token_acc": 0.79917695, "epoch": 2.95950506, "global_step/max_steps": "2631/8890", "percentage": "29.60%", "elapsed_time": "55m 36s", "remaining_time": "2h 12m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788628} {"loss": 0.60437518, "grad_norm": 2.13682294, "learning_rate": 8.435e-05, "token_acc": 0.80597015, "epoch": 2.96062992, "global_step/max_steps": "2632/8890", "percentage": "29.61%", "elapsed_time": "55m 37s", "remaining_time": "2h 12m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788661} {"loss": 0.61463398, "grad_norm": 2.19330931, "learning_rate": 8.433e-05, "token_acc": 0.80256137, "epoch": 2.96175478, "global_step/max_steps": "2633/8890", "percentage": "29.62%", "elapsed_time": "55m 38s", "remaining_time": "2h 12m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788693} {"loss": 0.58426464, "grad_norm": 2.11245847, "learning_rate": 8.432e-05, "token_acc": 0.81208054, "epoch": 2.96287964, "global_step/max_steps": "2634/8890", "percentage": "29.63%", "elapsed_time": "55m 39s", "remaining_time": "2h 12m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788726} {"loss": 0.4473469, "grad_norm": 1.77971983, "learning_rate": 8.43e-05, "token_acc": 0.85663925, "epoch": 2.9640045, "global_step/max_steps": "2635/8890", "percentage": "29.64%", "elapsed_time": "55m 40s", "remaining_time": "2h 12m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788755} {"loss": 0.61541462, "grad_norm": 1.93528819, "learning_rate": 8.429e-05, "token_acc": 0.80938697, "epoch": 2.96512936, "global_step/max_steps": "2636/8890", "percentage": "29.65%", "elapsed_time": "55m 41s", "remaining_time": "2h 12m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788753} {"loss": 0.60894561, "grad_norm": 2.21960187, "learning_rate": 8.428e-05, "token_acc": 0.81003202, "epoch": 2.96625422, "global_step/max_steps": "2637/8890", "percentage": "29.66%", "elapsed_time": "55m 42s", "remaining_time": "2h 12m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788836} {"loss": 0.70326054, "grad_norm": 2.11641765, "learning_rate": 8.426e-05, "token_acc": 0.79099099, "epoch": 2.96737908, "global_step/max_steps": "2638/8890", "percentage": "29.67%", "elapsed_time": "55m 43s", "remaining_time": "2h 12m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788906} {"loss": 0.54478025, "grad_norm": 2.07511473, "learning_rate": 8.425e-05, "token_acc": 0.81400689, "epoch": 2.96850394, "global_step/max_steps": "2639/8890", "percentage": "29.69%", "elapsed_time": "55m 44s", "remaining_time": "2h 12m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788954} {"loss": 0.53959441, "grad_norm": 1.95596933, "learning_rate": 8.424e-05, "token_acc": 0.83680175, "epoch": 2.9696288, "global_step/max_steps": "2640/8890", "percentage": "29.70%", "elapsed_time": "55m 46s", "remaining_time": "2h 12m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788975} {"loss": 0.60027671, "grad_norm": 1.88403583, "learning_rate": 8.422e-05, "token_acc": 0.82696177, "epoch": 2.97075366, "global_step/max_steps": "2641/8890", "percentage": "29.71%", "elapsed_time": "55m 47s", "remaining_time": "2h 12m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788964} {"loss": 0.56172669, "grad_norm": 2.27227092, "learning_rate": 8.421e-05, "token_acc": 0.82666667, "epoch": 2.97187852, "global_step/max_steps": "2642/8890", "percentage": "29.72%", "elapsed_time": "55m 48s", "remaining_time": "2h 11m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788996} {"loss": 0.68944216, "grad_norm": 2.35048532, "learning_rate": 8.42e-05, "token_acc": 0.79896373, "epoch": 2.97300337, "global_step/max_steps": "2643/8890", "percentage": "29.73%", "elapsed_time": "55m 49s", "remaining_time": "2h 11m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788996} {"loss": 0.52464533, "grad_norm": 2.1960628, "learning_rate": 8.418e-05, "token_acc": 0.83264746, "epoch": 2.97412823, "global_step/max_steps": "2644/8890", "percentage": "29.74%", "elapsed_time": "55m 50s", "remaining_time": "2h 11m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789022} {"loss": 0.52270687, "grad_norm": 1.865201, "learning_rate": 8.417e-05, "token_acc": 0.84326923, "epoch": 2.97525309, "global_step/max_steps": "2645/8890", "percentage": "29.75%", "elapsed_time": "55m 52s", "remaining_time": "2h 11m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789045} {"loss": 0.51650178, "grad_norm": 2.41088152, "learning_rate": 8.416e-05, "token_acc": 0.83704974, "epoch": 2.97637795, "global_step/max_steps": "2646/8890", "percentage": "29.76%", "elapsed_time": "55m 53s", "remaining_time": "2h 11m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789105} {"loss": 0.4250949, "grad_norm": 1.88975751, "learning_rate": 8.414e-05, "token_acc": 0.85103627, "epoch": 2.97750281, "global_step/max_steps": "2647/8890", "percentage": "29.78%", "elapsed_time": "55m 54s", "remaining_time": "2h 11m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789145} {"loss": 0.54521126, "grad_norm": 2.21829987, "learning_rate": 8.413e-05, "token_acc": 0.81554677, "epoch": 2.97862767, "global_step/max_steps": "2648/8890", "percentage": "29.79%", "elapsed_time": "55m 55s", "remaining_time": "2h 11m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789173} {"loss": 0.69414473, "grad_norm": 2.07710028, "learning_rate": 8.411e-05, "token_acc": 0.79345955, "epoch": 2.97975253, "global_step/max_steps": "2649/8890", "percentage": "29.80%", "elapsed_time": "55m 56s", "remaining_time": "2h 11m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789194} {"loss": 0.60492384, "grad_norm": 2.13559175, "learning_rate": 8.41e-05, "token_acc": 0.81781782, "epoch": 2.98087739, "global_step/max_steps": "2650/8890", "percentage": "29.81%", "elapsed_time": "55m 57s", "remaining_time": "2h 11m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789218} {"loss": 0.62012708, "grad_norm": 2.28664136, "learning_rate": 8.409e-05, "token_acc": 0.80989011, "epoch": 2.98200225, "global_step/max_steps": "2651/8890", "percentage": "29.82%", "elapsed_time": "55m 58s", "remaining_time": "2h 11m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789243} {"loss": 0.54810089, "grad_norm": 2.17425466, "learning_rate": 8.407e-05, "token_acc": 0.832, "epoch": 2.98312711, "global_step/max_steps": "2652/8890", "percentage": "29.83%", "elapsed_time": "55m 59s", "remaining_time": "2h 11m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789291} {"loss": 0.43538174, "grad_norm": 2.07656932, "learning_rate": 8.406e-05, "token_acc": 0.8540146, "epoch": 2.98425197, "global_step/max_steps": "2653/8890", "percentage": "29.84%", "elapsed_time": "56m 1s", "remaining_time": "2h 11m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789324} {"loss": 0.49313208, "grad_norm": 2.47968411, "learning_rate": 8.405e-05, "token_acc": 0.82488479, "epoch": 2.98537683, "global_step/max_steps": "2654/8890", "percentage": "29.85%", "elapsed_time": "56m 2s", "remaining_time": "2h 11m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789396} {"loss": 0.55804521, "grad_norm": 2.20071983, "learning_rate": 8.403e-05, "token_acc": 0.82208589, "epoch": 2.98650169, "global_step/max_steps": "2655/8890", "percentage": "29.87%", "elapsed_time": "56m 3s", "remaining_time": "2h 11m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789427} {"loss": 0.48465869, "grad_norm": 2.04946423, "learning_rate": 8.402e-05, "token_acc": 0.82822086, "epoch": 2.98762655, "global_step/max_steps": "2656/8890", "percentage": "29.88%", "elapsed_time": "56m 4s", "remaining_time": "2h 11m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78946} {"loss": 0.59364349, "grad_norm": 2.18066549, "learning_rate": 8.401e-05, "token_acc": 0.81789802, "epoch": 2.98875141, "global_step/max_steps": "2657/8890", "percentage": "29.89%", "elapsed_time": "56m 5s", "remaining_time": "2h 11m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789495} {"loss": 0.57283187, "grad_norm": 2.32693958, "learning_rate": 8.399e-05, "token_acc": 0.8125, "epoch": 2.98987627, "global_step/max_steps": "2658/8890", "percentage": "29.90%", "elapsed_time": "56m 6s", "remaining_time": "2h 11m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789515} {"loss": 0.54167283, "grad_norm": 2.33654404, "learning_rate": 8.398e-05, "token_acc": 0.83333333, "epoch": 2.99100112, "global_step/max_steps": "2659/8890", "percentage": "29.91%", "elapsed_time": "56m 7s", "remaining_time": "2h 11m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789544} {"loss": 0.63429254, "grad_norm": 2.44574523, "learning_rate": 8.396e-05, "token_acc": 0.7997558, "epoch": 2.99212598, "global_step/max_steps": "2660/8890", "percentage": "29.92%", "elapsed_time": "56m 8s", "remaining_time": "2h 11m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789572} {"loss": 0.36795682, "grad_norm": 1.82475424, "learning_rate": 8.395e-05, "token_acc": 0.88037634, "epoch": 2.99325084, "global_step/max_steps": "2661/8890", "percentage": "29.93%", "elapsed_time": "56m 10s", "remaining_time": "2h 11m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789604} {"loss": 0.61611354, "grad_norm": 2.397295, "learning_rate": 8.394e-05, "token_acc": 0.79242979, "epoch": 2.9943757, "global_step/max_steps": "2662/8890", "percentage": "29.94%", "elapsed_time": "56m 10s", "remaining_time": "2h 11m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789678} {"loss": 0.60110241, "grad_norm": 2.33595252, "learning_rate": 8.392e-05, "token_acc": 0.82076814, "epoch": 2.99550056, "global_step/max_steps": "2663/8890", "percentage": "29.96%", "elapsed_time": "56m 12s", "remaining_time": "2h 11m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789737} {"loss": 0.59000874, "grad_norm": 2.37687325, "learning_rate": 8.391e-05, "token_acc": 0.80607815, "epoch": 2.99662542, "global_step/max_steps": "2664/8890", "percentage": "29.97%", "elapsed_time": "56m 13s", "remaining_time": "2h 11m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789759} {"loss": 0.58060205, "grad_norm": 1.89414036, "learning_rate": 8.39e-05, "token_acc": 0.80901077, "epoch": 2.99775028, "global_step/max_steps": "2665/8890", "percentage": "29.98%", "elapsed_time": "56m 14s", "remaining_time": "2h 11m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78981} {"loss": 0.59382749, "grad_norm": 2.21729088, "learning_rate": 8.388e-05, "token_acc": 0.81168831, "epoch": 2.99887514, "global_step/max_steps": "2666/8890", "percentage": "29.99%", "elapsed_time": "56m 15s", "remaining_time": "2h 11m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78988} {"loss": 0.39378071, "grad_norm": 2.13225055, "learning_rate": 8.387e-05, "token_acc": 0.8662614, "epoch": 3.0, "global_step/max_steps": "2667/8890", "percentage": "30.00%", "elapsed_time": "56m 16s", "remaining_time": "2h 11m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789932} {"loss": 0.31551376, "grad_norm": 1.49023461, "learning_rate": 8.386e-05, "token_acc": 0.91077636, "epoch": 3.00112486, "global_step/max_steps": "2668/8890", "percentage": "30.01%", "elapsed_time": "56m 18s", "remaining_time": "2h 11m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789772} {"loss": 0.374309, "grad_norm": 1.62354195, "learning_rate": 8.384e-05, "token_acc": 0.89132507, "epoch": 3.00224972, "global_step/max_steps": "2669/8890", "percentage": "30.02%", "elapsed_time": "56m 19s", "remaining_time": "2h 11m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789804} {"loss": 0.362582, "grad_norm": 1.53199255, "learning_rate": 8.383e-05, "token_acc": 0.87789661, "epoch": 3.00337458, "global_step/max_steps": "2670/8890", "percentage": "30.03%", "elapsed_time": "56m 20s", "remaining_time": "2h 11m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789854} {"loss": 0.41789395, "grad_norm": 1.93961799, "learning_rate": 8.381e-05, "token_acc": 0.87798742, "epoch": 3.00449944, "global_step/max_steps": "2671/8890", "percentage": "30.04%", "elapsed_time": "56m 21s", "remaining_time": "2h 11m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789883} {"loss": 0.25144529, "grad_norm": 1.29029906, "learning_rate": 8.38e-05, "token_acc": 0.93025283, "epoch": 3.0056243, "global_step/max_steps": "2672/8890", "percentage": "30.06%", "elapsed_time": "56m 22s", "remaining_time": "2h 11m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789942} {"loss": 0.34826404, "grad_norm": 1.66613698, "learning_rate": 8.379e-05, "token_acc": 0.90319458, "epoch": 3.00674916, "global_step/max_steps": "2673/8890", "percentage": "30.07%", "elapsed_time": "56m 23s", "remaining_time": "2h 11m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789975} {"loss": 0.34133196, "grad_norm": 1.7942512, "learning_rate": 8.377e-05, "token_acc": 0.90452876, "epoch": 3.00787402, "global_step/max_steps": "2674/8890", "percentage": "30.08%", "elapsed_time": "56m 24s", "remaining_time": "2h 11m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790031} {"loss": 0.38038361, "grad_norm": 1.66900599, "learning_rate": 8.376e-05, "token_acc": 0.8948891, "epoch": 3.00899888, "global_step/max_steps": "2675/8890", "percentage": "30.09%", "elapsed_time": "56m 25s", "remaining_time": "2h 11m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790059} {"loss": 0.31026918, "grad_norm": 1.81085849, "learning_rate": 8.375e-05, "token_acc": 0.89813084, "epoch": 3.01012373, "global_step/max_steps": "2676/8890", "percentage": "30.10%", "elapsed_time": "56m 26s", "remaining_time": "2h 11m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790084} {"loss": 0.36978191, "grad_norm": 2.09673238, "learning_rate": 8.373e-05, "token_acc": 0.88557214, "epoch": 3.01124859, "global_step/max_steps": "2677/8890", "percentage": "30.11%", "elapsed_time": "56m 28s", "remaining_time": "2h 11m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790138} {"loss": 0.24034312, "grad_norm": 1.73817337, "learning_rate": 8.372e-05, "token_acc": 0.91713326, "epoch": 3.01237345, "global_step/max_steps": "2678/8890", "percentage": "30.12%", "elapsed_time": "56m 29s", "remaining_time": "2h 11m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790132} {"loss": 0.27963534, "grad_norm": 1.85071707, "learning_rate": 8.37e-05, "token_acc": 0.90325866, "epoch": 3.01349831, "global_step/max_steps": "2679/8890", "percentage": "30.13%", "elapsed_time": "56m 30s", "remaining_time": "2h 11m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790153} {"loss": 0.28417191, "grad_norm": 1.75206935, "learning_rate": 8.369e-05, "token_acc": 0.91567224, "epoch": 3.01462317, "global_step/max_steps": "2680/8890", "percentage": "30.15%", "elapsed_time": "56m 31s", "remaining_time": "2h 10m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790154} {"loss": 0.33681598, "grad_norm": 2.53966188, "learning_rate": 8.368e-05, "token_acc": 0.87966805, "epoch": 3.01574803, "global_step/max_steps": "2681/8890", "percentage": "30.16%", "elapsed_time": "56m 32s", "remaining_time": "2h 10m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790184} {"loss": 0.37391466, "grad_norm": 2.52213264, "learning_rate": 8.366e-05, "token_acc": 0.88554217, "epoch": 3.01687289, "global_step/max_steps": "2682/8890", "percentage": "30.17%", "elapsed_time": "56m 34s", "remaining_time": "2h 10m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790213} {"loss": 0.39888582, "grad_norm": 2.739815, "learning_rate": 8.365e-05, "token_acc": 0.85888889, "epoch": 3.01799775, "global_step/max_steps": "2683/8890", "percentage": "30.18%", "elapsed_time": "56m 35s", "remaining_time": "2h 10m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790266} {"loss": 0.32306498, "grad_norm": 2.61519718, "learning_rate": 8.364e-05, "token_acc": 0.88424821, "epoch": 3.01912261, "global_step/max_steps": "2684/8890", "percentage": "30.19%", "elapsed_time": "56m 36s", "remaining_time": "2h 10m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790318} {"loss": 0.37442732, "grad_norm": 2.66864228, "learning_rate": 8.362e-05, "token_acc": 0.87890625, "epoch": 3.02024747, "global_step/max_steps": "2685/8890", "percentage": "30.20%", "elapsed_time": "56m 37s", "remaining_time": "2h 10m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790348} {"loss": 0.33393794, "grad_norm": 2.84844828, "learning_rate": 8.361e-05, "token_acc": 0.89565217, "epoch": 3.02137233, "global_step/max_steps": "2686/8890", "percentage": "30.21%", "elapsed_time": "56m 38s", "remaining_time": "2h 10m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790376} {"loss": 0.27556249, "grad_norm": 2.76919675, "learning_rate": 8.359e-05, "token_acc": 0.91531756, "epoch": 3.02249719, "global_step/max_steps": "2687/8890", "percentage": "30.22%", "elapsed_time": "56m 39s", "remaining_time": "2h 10m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790405} {"loss": 0.30495018, "grad_norm": 2.39770341, "learning_rate": 8.358e-05, "token_acc": 0.90742625, "epoch": 3.02362205, "global_step/max_steps": "2688/8890", "percentage": "30.24%", "elapsed_time": "56m 40s", "remaining_time": "2h 10m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79043} {"loss": 0.36865997, "grad_norm": 2.77928519, "learning_rate": 8.357e-05, "token_acc": 0.89537713, "epoch": 3.02474691, "global_step/max_steps": "2689/8890", "percentage": "30.25%", "elapsed_time": "56m 41s", "remaining_time": "2h 10m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79046} {"loss": 0.29778785, "grad_norm": 2.56841445, "learning_rate": 8.355e-05, "token_acc": 0.90379747, "epoch": 3.02587177, "global_step/max_steps": "2690/8890", "percentage": "30.26%", "elapsed_time": "56m 42s", "remaining_time": "2h 10m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790512} {"loss": 0.36908954, "grad_norm": 2.74901271, "learning_rate": 8.354e-05, "token_acc": 0.87289089, "epoch": 3.02699663, "global_step/max_steps": "2691/8890", "percentage": "30.27%", "elapsed_time": "56m 43s", "remaining_time": "2h 10m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790545} {"loss": 0.32040113, "grad_norm": 2.83808327, "learning_rate": 8.353e-05, "token_acc": 0.89641944, "epoch": 3.02812148, "global_step/max_steps": "2692/8890", "percentage": "30.28%", "elapsed_time": "56m 45s", "remaining_time": "2h 10m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790598} {"loss": 0.32269377, "grad_norm": 2.60348272, "learning_rate": 8.351e-05, "token_acc": 0.88711395, "epoch": 3.02924634, "global_step/max_steps": "2693/8890", "percentage": "30.29%", "elapsed_time": "56m 46s", "remaining_time": "2h 10m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790621} {"loss": 0.33437049, "grad_norm": 2.55616021, "learning_rate": 8.35e-05, "token_acc": 0.89266738, "epoch": 3.0303712, "global_step/max_steps": "2694/8890", "percentage": "30.30%", "elapsed_time": "56m 47s", "remaining_time": "2h 10m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.79065} {"loss": 0.32968456, "grad_norm": 2.67260599, "learning_rate": 8.348e-05, "token_acc": 0.88994083, "epoch": 3.03149606, "global_step/max_steps": "2695/8890", "percentage": "30.31%", "elapsed_time": "56m 48s", "remaining_time": "2h 10m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790709} {"loss": 0.34446746, "grad_norm": 2.4663372, "learning_rate": 8.347e-05, "token_acc": 0.89156627, "epoch": 3.03262092, "global_step/max_steps": "2696/8890", "percentage": "30.33%", "elapsed_time": "56m 49s", "remaining_time": "2h 10m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790729} {"loss": 0.34932196, "grad_norm": 2.18383384, "learning_rate": 8.346e-05, "token_acc": 0.88478074, "epoch": 3.03374578, "global_step/max_steps": "2697/8890", "percentage": "30.34%", "elapsed_time": "56m 50s", "remaining_time": "2h 10m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790719} {"loss": 0.3312313, "grad_norm": 2.44080114, "learning_rate": 8.344e-05, "token_acc": 0.88665998, "epoch": 3.03487064, "global_step/max_steps": "2698/8890", "percentage": "30.35%", "elapsed_time": "56m 51s", "remaining_time": "2h 10m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790771} {"loss": 0.27704775, "grad_norm": 2.37020063, "learning_rate": 8.343e-05, "token_acc": 0.90738814, "epoch": 3.0359955, "global_step/max_steps": "2699/8890", "percentage": "30.36%", "elapsed_time": "56m 53s", "remaining_time": "2h 10m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790795} {"loss": 0.26287442, "grad_norm": 2.0631249, "learning_rate": 8.342e-05, "token_acc": 0.9082774, "epoch": 3.03712036, "global_step/max_steps": "2700/8890", "percentage": "30.37%", "elapsed_time": "56m 54s", "remaining_time": "2h 10m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.790848} {"eval_loss": 1.06253207, "eval_runtime": 31.923, "eval_samples_per_second": 25.154, "eval_steps_per_second": 3.164, "eval_token_acc": 0.73710885, "epoch": 3.03712036, "global_step/max_steps": "2700/8890", "percentage": "30.37%", "elapsed_time": "57m 25s", "remaining_time": "2h 11m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783519} {"loss": 0.31334543, "grad_norm": 2.38238287, "learning_rate": 8.34e-05, "token_acc": 0.90114068, "epoch": 3.03824522, "global_step/max_steps": "2701/8890", "percentage": "30.38%", "elapsed_time": "57m 41s", "remaining_time": "2h 12m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780344} {"loss": 0.2734153, "grad_norm": 2.25886726, "learning_rate": 8.339e-05, "token_acc": 0.91656131, "epoch": 3.03937008, "global_step/max_steps": "2702/8890", "percentage": "30.39%", "elapsed_time": "57m 42s", "remaining_time": "2h 12m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780404} {"loss": 0.27317542, "grad_norm": 2.16425705, "learning_rate": 8.337e-05, "token_acc": 0.91486658, "epoch": 3.04049494, "global_step/max_steps": "2703/8890", "percentage": "30.40%", "elapsed_time": "57m 43s", "remaining_time": "2h 12m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780434} {"loss": 0.28908119, "grad_norm": 2.45308948, "learning_rate": 8.336e-05, "token_acc": 0.90482759, "epoch": 3.0416198, "global_step/max_steps": "2704/8890", "percentage": "30.42%", "elapsed_time": "57m 44s", "remaining_time": "2h 12m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780472} {"loss": 0.33549008, "grad_norm": 2.4026854, "learning_rate": 8.335e-05, "token_acc": 0.89136126, "epoch": 3.04274466, "global_step/max_steps": "2705/8890", "percentage": "30.43%", "elapsed_time": "57m 45s", "remaining_time": "2h 12m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780502} {"loss": 0.30066383, "grad_norm": 2.42546082, "learning_rate": 8.333e-05, "token_acc": 0.89196311, "epoch": 3.04386952, "global_step/max_steps": "2706/8890", "percentage": "30.44%", "elapsed_time": "57m 46s", "remaining_time": "2h 12m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780505} {"loss": 0.32855499, "grad_norm": 2.59173536, "learning_rate": 8.332e-05, "token_acc": 0.88778409, "epoch": 3.04499438, "global_step/max_steps": "2707/8890", "percentage": "30.45%", "elapsed_time": "57m 48s", "remaining_time": "2h 12m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78056} {"loss": 0.26429418, "grad_norm": 1.9664371, "learning_rate": 8.33e-05, "token_acc": 0.91183295, "epoch": 3.04611924, "global_step/max_steps": "2708/8890", "percentage": "30.46%", "elapsed_time": "57m 49s", "remaining_time": "2h 12m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780549} {"loss": 0.34474155, "grad_norm": 2.33116627, "learning_rate": 8.329e-05, "token_acc": 0.8951049, "epoch": 3.04724409, "global_step/max_steps": "2709/8890", "percentage": "30.47%", "elapsed_time": "57m 50s", "remaining_time": "2h 11m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780549} {"loss": 0.35769132, "grad_norm": 2.58645272, "learning_rate": 8.328e-05, "token_acc": 0.87483356, "epoch": 3.04836895, "global_step/max_steps": "2710/8890", "percentage": "30.48%", "elapsed_time": "57m 51s", "remaining_time": "2h 11m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780578} {"loss": 0.30680066, "grad_norm": 2.3717978, "learning_rate": 8.326e-05, "token_acc": 0.89942857, "epoch": 3.04949381, "global_step/max_steps": "2711/8890", "percentage": "30.49%", "elapsed_time": "57m 52s", "remaining_time": "2h 11m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780613} {"loss": 0.28800109, "grad_norm": 1.8585577, "learning_rate": 8.325e-05, "token_acc": 0.90183486, "epoch": 3.05061867, "global_step/max_steps": "2712/8890", "percentage": "30.51%", "elapsed_time": "57m 54s", "remaining_time": "2h 11m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780626} {"loss": 0.22008987, "grad_norm": 1.60211825, "learning_rate": 8.323e-05, "token_acc": 0.93927748, "epoch": 3.05174353, "global_step/max_steps": "2713/8890", "percentage": "30.52%", "elapsed_time": "57m 55s", "remaining_time": "2h 11m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780662} {"loss": 0.33117914, "grad_norm": 2.7852366, "learning_rate": 8.322e-05, "token_acc": 0.89793439, "epoch": 3.05286839, "global_step/max_steps": "2714/8890", "percentage": "30.53%", "elapsed_time": "57m 56s", "remaining_time": "2h 11m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780684} {"loss": 0.3518472, "grad_norm": 2.80164313, "learning_rate": 8.321e-05, "token_acc": 0.90103397, "epoch": 3.05399325, "global_step/max_steps": "2715/8890", "percentage": "30.54%", "elapsed_time": "57m 57s", "remaining_time": "2h 11m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780752} {"loss": 0.3393122, "grad_norm": 2.9606452, "learning_rate": 8.319e-05, "token_acc": 0.89344262, "epoch": 3.05511811, "global_step/max_steps": "2716/8890", "percentage": "30.55%", "elapsed_time": "57m 58s", "remaining_time": "2h 11m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780788} {"loss": 0.38838679, "grad_norm": 2.24036074, "learning_rate": 8.318e-05, "token_acc": 0.87787788, "epoch": 3.05624297, "global_step/max_steps": "2717/8890", "percentage": "30.56%", "elapsed_time": "57m 59s", "remaining_time": "2h 11m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780815} {"loss": 0.26841414, "grad_norm": 2.35867906, "learning_rate": 8.317e-05, "token_acc": 0.91827637, "epoch": 3.05736783, "global_step/max_steps": "2718/8890", "percentage": "30.57%", "elapsed_time": "58m 0s", "remaining_time": "2h 11m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78086} {"loss": 0.35769916, "grad_norm": 2.17554784, "learning_rate": 8.315e-05, "token_acc": 0.88054299, "epoch": 3.05849269, "global_step/max_steps": "2719/8890", "percentage": "30.58%", "elapsed_time": "58m 1s", "remaining_time": "2h 11m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780892} {"loss": 0.32059205, "grad_norm": 2.33760023, "learning_rate": 8.314e-05, "token_acc": 0.90252294, "epoch": 3.05961755, "global_step/max_steps": "2720/8890", "percentage": "30.60%", "elapsed_time": "58m 3s", "remaining_time": "2h 11m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780916} {"loss": 0.34830588, "grad_norm": 2.54842448, "learning_rate": 8.312e-05, "token_acc": 0.88410992, "epoch": 3.06074241, "global_step/max_steps": "2721/8890", "percentage": "30.61%", "elapsed_time": "58m 4s", "remaining_time": "2h 11m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780944} {"loss": 0.35863197, "grad_norm": 2.51529026, "learning_rate": 8.311e-05, "token_acc": 0.89067524, "epoch": 3.06186727, "global_step/max_steps": "2722/8890", "percentage": "30.62%", "elapsed_time": "58m 5s", "remaining_time": "2h 11m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780973} {"loss": 0.20475958, "grad_norm": 2.16916466, "learning_rate": 8.31e-05, "token_acc": 0.93290323, "epoch": 3.06299213, "global_step/max_steps": "2723/8890", "percentage": "30.63%", "elapsed_time": "58m 6s", "remaining_time": "2h 11m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781009} {"loss": 0.29292035, "grad_norm": 2.39867496, "learning_rate": 8.308e-05, "token_acc": 0.89634865, "epoch": 3.06411699, "global_step/max_steps": "2724/8890", "percentage": "30.64%", "elapsed_time": "58m 7s", "remaining_time": "2h 11m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781039} {"loss": 0.41682899, "grad_norm": 2.2853601, "learning_rate": 8.307e-05, "token_acc": 0.86456401, "epoch": 3.06524184, "global_step/max_steps": "2725/8890", "percentage": "30.65%", "elapsed_time": "58m 8s", "remaining_time": "2h 11m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781065} {"loss": 0.31328732, "grad_norm": 2.62255073, "learning_rate": 8.305e-05, "token_acc": 0.89640884, "epoch": 3.0663667, "global_step/max_steps": "2726/8890", "percentage": "30.66%", "elapsed_time": "58m 9s", "remaining_time": "2h 11m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7811} {"loss": 0.30434155, "grad_norm": 2.17217875, "learning_rate": 8.304e-05, "token_acc": 0.90377358, "epoch": 3.06749156, "global_step/max_steps": "2727/8890", "percentage": "30.67%", "elapsed_time": "58m 11s", "remaining_time": "2h 11m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781087} {"loss": 0.27089351, "grad_norm": 2.16816807, "learning_rate": 8.303e-05, "token_acc": 0.91196835, "epoch": 3.06861642, "global_step/max_steps": "2728/8890", "percentage": "30.69%", "elapsed_time": "58m 12s", "remaining_time": "2h 11m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78112} {"loss": 0.32334137, "grad_norm": 2.38819909, "learning_rate": 8.301e-05, "token_acc": 0.88967972, "epoch": 3.06974128, "global_step/max_steps": "2729/8890", "percentage": "30.70%", "elapsed_time": "58m 13s", "remaining_time": "2h 11m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781145} {"loss": 0.33864424, "grad_norm": 2.45079279, "learning_rate": 8.3e-05, "token_acc": 0.88174274, "epoch": 3.07086614, "global_step/max_steps": "2730/8890", "percentage": "30.71%", "elapsed_time": "58m 14s", "remaining_time": "2h 11m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78118} {"loss": 0.3071357, "grad_norm": 2.87112522, "learning_rate": 8.298e-05, "token_acc": 0.90126939, "epoch": 3.071991, "global_step/max_steps": "2731/8890", "percentage": "30.72%", "elapsed_time": "58m 15s", "remaining_time": "2h 11m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781251} {"loss": 0.36376721, "grad_norm": 2.45983338, "learning_rate": 8.297e-05, "token_acc": 0.88754647, "epoch": 3.07311586, "global_step/max_steps": "2732/8890", "percentage": "30.73%", "elapsed_time": "58m 16s", "remaining_time": "2h 11m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781307} {"loss": 0.33823413, "grad_norm": 3.07029843, "learning_rate": 8.296e-05, "token_acc": 0.89906832, "epoch": 3.07424072, "global_step/max_steps": "2733/8890", "percentage": "30.74%", "elapsed_time": "58m 17s", "remaining_time": "2h 11m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781344} {"loss": 0.4048501, "grad_norm": 2.48369789, "learning_rate": 8.294e-05, "token_acc": 0.89485214, "epoch": 3.07536558, "global_step/max_steps": "2734/8890", "percentage": "30.75%", "elapsed_time": "58m 18s", "remaining_time": "2h 11m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781394} {"loss": 0.26539838, "grad_norm": 2.14545321, "learning_rate": 8.293e-05, "token_acc": 0.91472868, "epoch": 3.07649044, "global_step/max_steps": "2735/8890", "percentage": "30.76%", "elapsed_time": "58m 20s", "remaining_time": "2h 11m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781419} {"loss": 0.33912343, "grad_norm": 2.66633391, "learning_rate": 8.291e-05, "token_acc": 0.90023753, "epoch": 3.0776153, "global_step/max_steps": "2736/8890", "percentage": "30.78%", "elapsed_time": "58m 21s", "remaining_time": "2h 11m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781451} {"loss": 0.29670161, "grad_norm": 2.26748872, "learning_rate": 8.29e-05, "token_acc": 0.9091621, "epoch": 3.07874016, "global_step/max_steps": "2737/8890", "percentage": "30.79%", "elapsed_time": "58m 22s", "remaining_time": "2h 11m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78148} {"loss": 0.34863302, "grad_norm": 2.53010154, "learning_rate": 8.289e-05, "token_acc": 0.88182832, "epoch": 3.07986502, "global_step/max_steps": "2738/8890", "percentage": "30.80%", "elapsed_time": "58m 23s", "remaining_time": "2h 11m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781511} {"loss": 0.29034954, "grad_norm": 2.21604013, "learning_rate": 8.287e-05, "token_acc": 0.8961039, "epoch": 3.08098988, "global_step/max_steps": "2739/8890", "percentage": "30.81%", "elapsed_time": "58m 25s", "remaining_time": "2h 11m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781397} {"loss": 0.3810187, "grad_norm": 2.46459317, "learning_rate": 8.286e-05, "token_acc": 0.87946429, "epoch": 3.08211474, "global_step/max_steps": "2740/8890", "percentage": "30.82%", "elapsed_time": "58m 26s", "remaining_time": "2h 11m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781422} {"loss": 0.25888675, "grad_norm": 1.97394776, "learning_rate": 8.284e-05, "token_acc": 0.92576792, "epoch": 3.0832396, "global_step/max_steps": "2741/8890", "percentage": "30.83%", "elapsed_time": "58m 27s", "remaining_time": "2h 11m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781453} {"loss": 0.32511997, "grad_norm": 2.34776568, "learning_rate": 8.283e-05, "token_acc": 0.89204545, "epoch": 3.08436445, "global_step/max_steps": "2742/8890", "percentage": "30.84%", "elapsed_time": "58m 28s", "remaining_time": "2h 11m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781477} {"loss": 0.2951023, "grad_norm": 2.49090695, "learning_rate": 8.282e-05, "token_acc": 0.89650712, "epoch": 3.08548931, "global_step/max_steps": "2743/8890", "percentage": "30.85%", "elapsed_time": "58m 29s", "remaining_time": "2h 11m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781512} {"loss": 0.40728903, "grad_norm": 2.48857737, "learning_rate": 8.28e-05, "token_acc": 0.8667288, "epoch": 3.08661417, "global_step/max_steps": "2744/8890", "percentage": "30.87%", "elapsed_time": "58m 30s", "remaining_time": "2h 11m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781571} {"loss": 0.36613595, "grad_norm": 2.45866442, "learning_rate": 8.279e-05, "token_acc": 0.89096573, "epoch": 3.08773903, "global_step/max_steps": "2745/8890", "percentage": "30.88%", "elapsed_time": "58m 32s", "remaining_time": "2h 11m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781604} {"loss": 0.35015655, "grad_norm": 2.64545274, "learning_rate": 8.277e-05, "token_acc": 0.88757396, "epoch": 3.08886389, "global_step/max_steps": "2746/8890", "percentage": "30.89%", "elapsed_time": "58m 33s", "remaining_time": "2h 11m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781637} {"loss": 0.2685321, "grad_norm": 2.84701824, "learning_rate": 8.276e-05, "token_acc": 0.90982287, "epoch": 3.08998875, "global_step/max_steps": "2747/8890", "percentage": "30.90%", "elapsed_time": "58m 34s", "remaining_time": "2h 11m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78152} {"loss": 0.33129656, "grad_norm": 2.20396161, "learning_rate": 8.275e-05, "token_acc": 0.88723206, "epoch": 3.09111361, "global_step/max_steps": "2748/8890", "percentage": "30.91%", "elapsed_time": "58m 36s", "remaining_time": "2h 10m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781515} {"loss": 0.19175395, "grad_norm": 1.91389775, "learning_rate": 8.273e-05, "token_acc": 0.94007491, "epoch": 3.09223847, "global_step/max_steps": "2749/8890", "percentage": "30.92%", "elapsed_time": "58m 37s", "remaining_time": "2h 10m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781566} {"loss": 0.34973425, "grad_norm": 2.38615799, "learning_rate": 8.272e-05, "token_acc": 0.88815789, "epoch": 3.09336333, "global_step/max_steps": "2750/8890", "percentage": "30.93%", "elapsed_time": "58m 38s", "remaining_time": "2h 10m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781593} {"loss": 0.37138921, "grad_norm": 2.21474457, "learning_rate": 8.27e-05, "token_acc": 0.87638376, "epoch": 3.09448819, "global_step/max_steps": "2751/8890", "percentage": "30.94%", "elapsed_time": "58m 39s", "remaining_time": "2h 10m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781616} {"loss": 0.38342977, "grad_norm": 2.48172379, "learning_rate": 8.269e-05, "token_acc": 0.87393162, "epoch": 3.09561305, "global_step/max_steps": "2752/8890", "percentage": "30.96%", "elapsed_time": "58m 40s", "remaining_time": "2h 10m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781652} {"loss": 0.34745544, "grad_norm": 2.34823394, "learning_rate": 8.268e-05, "token_acc": 0.88656195, "epoch": 3.09673791, "global_step/max_steps": "2753/8890", "percentage": "30.97%", "elapsed_time": "58m 41s", "remaining_time": "2h 10m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78168} {"loss": 0.35089886, "grad_norm": 2.37085271, "learning_rate": 8.266e-05, "token_acc": 0.89853659, "epoch": 3.09786277, "global_step/max_steps": "2754/8890", "percentage": "30.98%", "elapsed_time": "58m 43s", "remaining_time": "2h 10m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781707} {"loss": 0.26236534, "grad_norm": 2.56389236, "learning_rate": 8.265e-05, "token_acc": 0.91994751, "epoch": 3.09898763, "global_step/max_steps": "2755/8890", "percentage": "30.99%", "elapsed_time": "58m 44s", "remaining_time": "2h 10m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781741} {"loss": 0.27182263, "grad_norm": 2.63119459, "learning_rate": 8.263e-05, "token_acc": 0.90458015, "epoch": 3.10011249, "global_step/max_steps": "2756/8890", "percentage": "31.00%", "elapsed_time": "58m 45s", "remaining_time": "2h 10m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781776} {"loss": 0.242512, "grad_norm": 2.31084085, "learning_rate": 8.262e-05, "token_acc": 0.92087312, "epoch": 3.10123735, "global_step/max_steps": "2757/8890", "percentage": "31.01%", "elapsed_time": "58m 46s", "remaining_time": "2h 10m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781821} {"loss": 0.42411408, "grad_norm": 2.73598886, "learning_rate": 8.26e-05, "token_acc": 0.86464646, "epoch": 3.1023622, "global_step/max_steps": "2758/8890", "percentage": "31.02%", "elapsed_time": "58m 47s", "remaining_time": "2h 10m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781873} {"loss": 0.26081952, "grad_norm": 2.39222288, "learning_rate": 8.259e-05, "token_acc": 0.91263441, "epoch": 3.10348706, "global_step/max_steps": "2759/8890", "percentage": "31.03%", "elapsed_time": "58m 48s", "remaining_time": "2h 10m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781908} {"loss": 0.34746963, "grad_norm": 2.19905663, "learning_rate": 8.258e-05, "token_acc": 0.89818548, "epoch": 3.10461192, "global_step/max_steps": "2760/8890", "percentage": "31.05%", "elapsed_time": "58m 49s", "remaining_time": "2h 10m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781935} {"loss": 0.38153011, "grad_norm": 2.16958547, "learning_rate": 8.256e-05, "token_acc": 0.87563884, "epoch": 3.10573678, "global_step/max_steps": "2761/8890", "percentage": "31.06%", "elapsed_time": "58m 51s", "remaining_time": "2h 10m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78193} {"loss": 0.23568334, "grad_norm": 2.16798735, "learning_rate": 8.255e-05, "token_acc": 0.91521739, "epoch": 3.10686164, "global_step/max_steps": "2762/8890", "percentage": "31.07%", "elapsed_time": "58m 52s", "remaining_time": "2h 10m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781979} {"loss": 0.2951107, "grad_norm": 2.32779384, "learning_rate": 8.253e-05, "token_acc": 0.91457286, "epoch": 3.1079865, "global_step/max_steps": "2763/8890", "percentage": "31.08%", "elapsed_time": "58m 53s", "remaining_time": "2h 10m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782006} {"loss": 0.32392719, "grad_norm": 2.61872411, "learning_rate": 8.252e-05, "token_acc": 0.89769821, "epoch": 3.10911136, "global_step/max_steps": "2764/8890", "percentage": "31.09%", "elapsed_time": "58m 54s", "remaining_time": "2h 10m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782058} {"loss": 0.43427551, "grad_norm": 2.60313082, "learning_rate": 8.251e-05, "token_acc": 0.87052551, "epoch": 3.11023622, "global_step/max_steps": "2765/8890", "percentage": "31.10%", "elapsed_time": "58m 55s", "remaining_time": "2h 10m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782059} {"loss": 0.43739629, "grad_norm": 2.59874082, "learning_rate": 8.249e-05, "token_acc": 0.86576041, "epoch": 3.11136108, "global_step/max_steps": "2766/8890", "percentage": "31.11%", "elapsed_time": "58m 56s", "remaining_time": "2h 10m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782087} {"loss": 0.38485843, "grad_norm": 2.30074883, "learning_rate": 8.248e-05, "token_acc": 0.87912088, "epoch": 3.11248594, "global_step/max_steps": "2767/8890", "percentage": "31.12%", "elapsed_time": "58m 58s", "remaining_time": "2h 10m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782064} {"loss": 0.36547542, "grad_norm": 2.65563607, "learning_rate": 8.246e-05, "token_acc": 0.87835052, "epoch": 3.1136108, "global_step/max_steps": "2768/8890", "percentage": "31.14%", "elapsed_time": "58m 59s", "remaining_time": "2h 10m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782096} {"loss": 0.34154469, "grad_norm": 2.52834916, "learning_rate": 8.245e-05, "token_acc": 0.89433962, "epoch": 3.11473566, "global_step/max_steps": "2769/8890", "percentage": "31.15%", "elapsed_time": "59m 0s", "remaining_time": "2h 10m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782124} {"loss": 0.35712188, "grad_norm": 2.57409286, "learning_rate": 8.244e-05, "token_acc": 0.8949115, "epoch": 3.11586052, "global_step/max_steps": "2770/8890", "percentage": "31.16%", "elapsed_time": "59m 1s", "remaining_time": "2h 10m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782159} {"loss": 0.31383669, "grad_norm": 2.68595004, "learning_rate": 8.242e-05, "token_acc": 0.88624339, "epoch": 3.11698538, "global_step/max_steps": "2771/8890", "percentage": "31.17%", "elapsed_time": "59m 2s", "remaining_time": "2h 10m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78219} {"loss": 0.25985956, "grad_norm": 2.32168937, "learning_rate": 8.241e-05, "token_acc": 0.91486658, "epoch": 3.11811024, "global_step/max_steps": "2772/8890", "percentage": "31.18%", "elapsed_time": "59m 3s", "remaining_time": "2h 10m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78222} {"loss": 0.31522858, "grad_norm": 2.40205455, "learning_rate": 8.239e-05, "token_acc": 0.88501742, "epoch": 3.1192351, "global_step/max_steps": "2773/8890", "percentage": "31.19%", "elapsed_time": "59m 4s", "remaining_time": "2h 10m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782245} {"loss": 0.27128863, "grad_norm": 2.26053739, "learning_rate": 8.238e-05, "token_acc": 0.91205674, "epoch": 3.12035996, "global_step/max_steps": "2774/8890", "percentage": "31.20%", "elapsed_time": "59m 6s", "remaining_time": "2h 10m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782271} {"loss": 0.29120269, "grad_norm": 2.31040168, "learning_rate": 8.236e-05, "token_acc": 0.91102123, "epoch": 3.12148481, "global_step/max_steps": "2775/8890", "percentage": "31.21%", "elapsed_time": "59m 7s", "remaining_time": "2h 10m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7823} {"loss": 0.30132821, "grad_norm": 2.33602834, "learning_rate": 8.235e-05, "token_acc": 0.90311005, "epoch": 3.12260967, "global_step/max_steps": "2776/8890", "percentage": "31.23%", "elapsed_time": "59m 8s", "remaining_time": "2h 10m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782371} {"loss": 0.36914676, "grad_norm": 2.43335056, "learning_rate": 8.234e-05, "token_acc": 0.88685015, "epoch": 3.12373453, "global_step/max_steps": "2777/8890", "percentage": "31.24%", "elapsed_time": "59m 9s", "remaining_time": "2h 10m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782401} {"loss": 0.32413059, "grad_norm": 2.39521503, "learning_rate": 8.232e-05, "token_acc": 0.89375, "epoch": 3.12485939, "global_step/max_steps": "2778/8890", "percentage": "31.25%", "elapsed_time": "59m 10s", "remaining_time": "2h 10m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782447} {"loss": 0.26183218, "grad_norm": 2.56065369, "learning_rate": 8.231e-05, "token_acc": 0.92372881, "epoch": 3.12598425, "global_step/max_steps": "2779/8890", "percentage": "31.26%", "elapsed_time": "59m 11s", "remaining_time": "2h 10m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782501} {"loss": 0.33625191, "grad_norm": 2.3192842, "learning_rate": 8.229e-05, "token_acc": 0.88765088, "epoch": 3.12710911, "global_step/max_steps": "2780/8890", "percentage": "31.27%", "elapsed_time": "59m 12s", "remaining_time": "2h 10m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78255} {"loss": 0.33001539, "grad_norm": 2.57118678, "learning_rate": 8.228e-05, "token_acc": 0.88946684, "epoch": 3.12823397, "global_step/max_steps": "2781/8890", "percentage": "31.28%", "elapsed_time": "59m 13s", "remaining_time": "2h 10m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7826} {"loss": 0.27733466, "grad_norm": 2.23598695, "learning_rate": 8.227e-05, "token_acc": 0.91091314, "epoch": 3.12935883, "global_step/max_steps": "2782/8890", "percentage": "31.29%", "elapsed_time": "59m 14s", "remaining_time": "2h 10m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782626} {"loss": 0.26695108, "grad_norm": 2.29380584, "learning_rate": 8.225e-05, "token_acc": 0.92339833, "epoch": 3.13048369, "global_step/max_steps": "2783/8890", "percentage": "31.30%", "elapsed_time": "59m 15s", "remaining_time": "2h 10m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782654} {"loss": 0.4114337, "grad_norm": 2.53086376, "learning_rate": 8.224e-05, "token_acc": 0.87361624, "epoch": 3.13160855, "global_step/max_steps": "2784/8890", "percentage": "31.32%", "elapsed_time": "59m 16s", "remaining_time": "2h 10m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782691} {"loss": 0.21044126, "grad_norm": 2.25588131, "learning_rate": 8.222e-05, "token_acc": 0.93121019, "epoch": 3.13273341, "global_step/max_steps": "2785/8890", "percentage": "31.33%", "elapsed_time": "59m 17s", "remaining_time": "2h 9m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782749} {"loss": 0.26053131, "grad_norm": 2.45590448, "learning_rate": 8.221e-05, "token_acc": 0.91099476, "epoch": 3.13385827, "global_step/max_steps": "2786/8890", "percentage": "31.34%", "elapsed_time": "59m 19s", "remaining_time": "2h 9m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7828} {"loss": 0.27361155, "grad_norm": 1.93778455, "learning_rate": 8.219e-05, "token_acc": 0.91548043, "epoch": 3.13498313, "global_step/max_steps": "2787/8890", "percentage": "31.35%", "elapsed_time": "59m 20s", "remaining_time": "2h 9m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782824} {"loss": 0.29957098, "grad_norm": 2.25014019, "learning_rate": 8.218e-05, "token_acc": 0.89567148, "epoch": 3.13610799, "global_step/max_steps": "2788/8890", "percentage": "31.36%", "elapsed_time": "59m 21s", "remaining_time": "2h 9m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782822} {"loss": 0.30540836, "grad_norm": 2.32087636, "learning_rate": 8.217e-05, "token_acc": 0.89845475, "epoch": 3.13723285, "global_step/max_steps": "2789/8890", "percentage": "31.37%", "elapsed_time": "59m 22s", "remaining_time": "2h 9m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782867} {"loss": 0.31286597, "grad_norm": 2.66518259, "learning_rate": 8.215e-05, "token_acc": 0.88843537, "epoch": 3.13835771, "global_step/max_steps": "2790/8890", "percentage": "31.38%", "elapsed_time": "59m 23s", "remaining_time": "2h 9m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782923} {"loss": 0.25478131, "grad_norm": 1.84698582, "learning_rate": 8.214e-05, "token_acc": 0.92004264, "epoch": 3.13948256, "global_step/max_steps": "2791/8890", "percentage": "31.39%", "elapsed_time": "59m 24s", "remaining_time": "2h 9m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782952} {"loss": 0.27300131, "grad_norm": 2.57615447, "learning_rate": 8.212e-05, "token_acc": 0.91152503, "epoch": 3.14060742, "global_step/max_steps": "2792/8890", "percentage": "31.41%", "elapsed_time": "59m 25s", "remaining_time": "2h 9m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782983} {"loss": 0.30547625, "grad_norm": 2.53777933, "learning_rate": 8.211e-05, "token_acc": 0.90646651, "epoch": 3.14173228, "global_step/max_steps": "2793/8890", "percentage": "31.42%", "elapsed_time": "59m 26s", "remaining_time": "2h 9m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783013} {"loss": 0.31402344, "grad_norm": 2.58993053, "learning_rate": 8.209e-05, "token_acc": 0.89929078, "epoch": 3.14285714, "global_step/max_steps": "2794/8890", "percentage": "31.43%", "elapsed_time": "59m 28s", "remaining_time": "2h 9m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783065} {"loss": 0.37591094, "grad_norm": 2.55380011, "learning_rate": 8.208e-05, "token_acc": 0.88110138, "epoch": 3.143982, "global_step/max_steps": "2795/8890", "percentage": "31.44%", "elapsed_time": "59m 28s", "remaining_time": "2h 9m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783174} {"loss": 0.37292102, "grad_norm": 2.74035144, "learning_rate": 8.207e-05, "token_acc": 0.88148148, "epoch": 3.14510686, "global_step/max_steps": "2796/8890", "percentage": "31.45%", "elapsed_time": "59m 29s", "remaining_time": "2h 9m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783223} {"loss": 0.27272877, "grad_norm": 2.46698356, "learning_rate": 8.205e-05, "token_acc": 0.9027611, "epoch": 3.14623172, "global_step/max_steps": "2797/8890", "percentage": "31.46%", "elapsed_time": "59m 30s", "remaining_time": "2h 9m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783273} {"loss": 0.33956113, "grad_norm": 2.21006656, "learning_rate": 8.204e-05, "token_acc": 0.88629032, "epoch": 3.14735658, "global_step/max_steps": "2798/8890", "percentage": "31.47%", "elapsed_time": "59m 32s", "remaining_time": "2h 9m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783264} {"loss": 0.32033688, "grad_norm": 2.49278665, "learning_rate": 8.202e-05, "token_acc": 0.89863326, "epoch": 3.14848144, "global_step/max_steps": "2799/8890", "percentage": "31.48%", "elapsed_time": "59m 33s", "remaining_time": "2h 9m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783291} {"loss": 0.47145981, "grad_norm": 2.71186709, "learning_rate": 8.201e-05, "token_acc": 0.85140187, "epoch": 3.1496063, "global_step/max_steps": "2800/8890", "percentage": "31.50%", "elapsed_time": "59m 34s", "remaining_time": "2h 9m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78329} {"loss": 0.25416663, "grad_norm": 2.13930297, "learning_rate": 8.199e-05, "token_acc": 0.91592483, "epoch": 3.15073116, "global_step/max_steps": "2801/8890", "percentage": "31.51%", "elapsed_time": "59m 35s", "remaining_time": "2h 9m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783318} {"loss": 0.28914061, "grad_norm": 2.54485416, "learning_rate": 8.198e-05, "token_acc": 0.89340813, "epoch": 3.15185602, "global_step/max_steps": "2802/8890", "percentage": "31.52%", "elapsed_time": "59m 36s", "remaining_time": "2h 9m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783352} {"loss": 0.39368275, "grad_norm": 2.72261405, "learning_rate": 8.197e-05, "token_acc": 0.87793953, "epoch": 3.15298088, "global_step/max_steps": "2803/8890", "percentage": "31.53%", "elapsed_time": "59m 37s", "remaining_time": "2h 9m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783426} {"loss": 0.41252401, "grad_norm": 2.63268638, "learning_rate": 8.195e-05, "token_acc": 0.87512291, "epoch": 3.15410574, "global_step/max_steps": "2804/8890", "percentage": "31.54%", "elapsed_time": "59m 39s", "remaining_time": "2h 9m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783456} {"loss": 0.35573679, "grad_norm": 2.84115791, "learning_rate": 8.194e-05, "token_acc": 0.885, "epoch": 3.1552306, "global_step/max_steps": "2805/8890", "percentage": "31.55%", "elapsed_time": "59m 40s", "remaining_time": "2h 9m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783483} {"loss": 0.31651506, "grad_norm": 2.89952087, "learning_rate": 8.192e-05, "token_acc": 0.89241379, "epoch": 3.15635546, "global_step/max_steps": "2806/8890", "percentage": "31.56%", "elapsed_time": "59m 41s", "remaining_time": "2h 9m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783529} {"loss": 0.38636833, "grad_norm": 2.63467717, "learning_rate": 8.191e-05, "token_acc": 0.88514226, "epoch": 3.15748031, "global_step/max_steps": "2807/8890", "percentage": "31.57%", "elapsed_time": "59m 42s", "remaining_time": "2h 9m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783556} {"loss": 0.3575103, "grad_norm": 2.51716518, "learning_rate": 8.189e-05, "token_acc": 0.88787879, "epoch": 3.15860517, "global_step/max_steps": "2808/8890", "percentage": "31.59%", "elapsed_time": "59m 43s", "remaining_time": "2h 9m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783584} {"loss": 0.3527512, "grad_norm": 2.23892879, "learning_rate": 8.188e-05, "token_acc": 0.90345528, "epoch": 3.15973003, "global_step/max_steps": "2809/8890", "percentage": "31.60%", "elapsed_time": "59m 44s", "remaining_time": "2h 9m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783584} {"loss": 0.30560374, "grad_norm": 1.9682374, "learning_rate": 8.187e-05, "token_acc": 0.89451827, "epoch": 3.16085489, "global_step/max_steps": "2810/8890", "percentage": "31.61%", "elapsed_time": "59m 45s", "remaining_time": "2h 9m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783615} {"loss": 0.43026745, "grad_norm": 2.416677, "learning_rate": 8.185e-05, "token_acc": 0.86580517, "epoch": 3.16197975, "global_step/max_steps": "2811/8890", "percentage": "31.62%", "elapsed_time": "59m 47s", "remaining_time": "2h 9m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783613} {"loss": 0.32351094, "grad_norm": 2.55596781, "learning_rate": 8.184e-05, "token_acc": 0.89307412, "epoch": 3.16310461, "global_step/max_steps": "2812/8890", "percentage": "31.63%", "elapsed_time": "59m 48s", "remaining_time": "2h 9m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783656} {"loss": 0.26931646, "grad_norm": 2.30812144, "learning_rate": 8.182e-05, "token_acc": 0.91775457, "epoch": 3.16422947, "global_step/max_steps": "2813/8890", "percentage": "31.64%", "elapsed_time": "59m 49s", "remaining_time": "2h 9m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783716} {"loss": 0.34515142, "grad_norm": 2.61144352, "learning_rate": 8.181e-05, "token_acc": 0.88355167, "epoch": 3.16535433, "global_step/max_steps": "2814/8890", "percentage": "31.65%", "elapsed_time": "59m 50s", "remaining_time": "2h 9m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783766} {"loss": 0.29225162, "grad_norm": 2.52013993, "learning_rate": 8.179e-05, "token_acc": 0.90612245, "epoch": 3.16647919, "global_step/max_steps": "2815/8890", "percentage": "31.66%", "elapsed_time": "59m 51s", "remaining_time": "2h 9m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783782} {"loss": 0.28172255, "grad_norm": 2.27058339, "learning_rate": 8.178e-05, "token_acc": 0.9078105, "epoch": 3.16760405, "global_step/max_steps": "2816/8890", "percentage": "31.68%", "elapsed_time": "59m 52s", "remaining_time": "2h 9m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783808} {"loss": 0.32691026, "grad_norm": 2.44652724, "learning_rate": 8.177e-05, "token_acc": 0.90301003, "epoch": 3.16872891, "global_step/max_steps": "2817/8890", "percentage": "31.69%", "elapsed_time": "59m 53s", "remaining_time": "2h 9m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78384} {"loss": 0.29434735, "grad_norm": 2.47426605, "learning_rate": 8.175e-05, "token_acc": 0.89731622, "epoch": 3.16985377, "global_step/max_steps": "2818/8890", "percentage": "31.70%", "elapsed_time": "59m 55s", "remaining_time": "2h 9m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783866} {"loss": 0.28720537, "grad_norm": 2.74255157, "learning_rate": 8.174e-05, "token_acc": 0.90185676, "epoch": 3.17097863, "global_step/max_steps": "2819/8890", "percentage": "31.71%", "elapsed_time": "59m 56s", "remaining_time": "2h 9m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78386} {"loss": 0.1862396, "grad_norm": 2.18382001, "learning_rate": 8.172e-05, "token_acc": 0.9237013, "epoch": 3.17210349, "global_step/max_steps": "2820/8890", "percentage": "31.72%", "elapsed_time": "59m 57s", "remaining_time": "2h 9m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783905} {"loss": 0.30342436, "grad_norm": 2.30728912, "learning_rate": 8.171e-05, "token_acc": 0.90485629, "epoch": 3.17322835, "global_step/max_steps": "2821/8890", "percentage": "31.73%", "elapsed_time": "59m 58s", "remaining_time": "2h 9m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783977} {"loss": 0.50098002, "grad_norm": 3.00755429, "learning_rate": 8.169e-05, "token_acc": 0.84140436, "epoch": 3.17435321, "global_step/max_steps": "2822/8890", "percentage": "31.74%", "elapsed_time": "59m 59s", "remaining_time": "2h 8m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78401} {"loss": 0.26650757, "grad_norm": 1.9236691, "learning_rate": 8.168e-05, "token_acc": 0.91011236, "epoch": 3.17547807, "global_step/max_steps": "2823/8890", "percentage": "31.75%", "elapsed_time": "1h 0m 0s", "remaining_time": "2h 8m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784053} {"loss": 0.26430014, "grad_norm": 2.10311127, "learning_rate": 8.166e-05, "token_acc": 0.91035219, "epoch": 3.17660292, "global_step/max_steps": "2824/8890", "percentage": "31.77%", "elapsed_time": "1h 0m 1s", "remaining_time": "2h 8m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784087} {"loss": 0.24288788, "grad_norm": 2.09194446, "learning_rate": 8.165e-05, "token_acc": 0.9248497, "epoch": 3.17772778, "global_step/max_steps": "2825/8890", "percentage": "31.78%", "elapsed_time": "1h 0m 2s", "remaining_time": "2h 8m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784118} {"loss": 0.4015941, "grad_norm": 2.53655815, "learning_rate": 8.164e-05, "token_acc": 0.86477733, "epoch": 3.17885264, "global_step/max_steps": "2826/8890", "percentage": "31.79%", "elapsed_time": "1h 0m 3s", "remaining_time": "2h 8m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784144} {"loss": 0.37980682, "grad_norm": 2.17874122, "learning_rate": 8.162e-05, "token_acc": 0.89581395, "epoch": 3.1799775, "global_step/max_steps": "2827/8890", "percentage": "31.80%", "elapsed_time": "1h 0m 5s", "remaining_time": "2h 8m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784136} {"loss": 0.36215714, "grad_norm": 2.92346263, "learning_rate": 8.161e-05, "token_acc": 0.88478747, "epoch": 3.18110236, "global_step/max_steps": "2828/8890", "percentage": "31.81%", "elapsed_time": "1h 0m 6s", "remaining_time": "2h 8m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784137} {"loss": 0.35749474, "grad_norm": 2.50443506, "learning_rate": 8.159e-05, "token_acc": 0.88935722, "epoch": 3.18222722, "global_step/max_steps": "2829/8890", "percentage": "31.82%", "elapsed_time": "1h 0m 7s", "remaining_time": "2h 8m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784162} {"loss": 0.33992803, "grad_norm": 2.42444706, "learning_rate": 8.158e-05, "token_acc": 0.90481651, "epoch": 3.18335208, "global_step/max_steps": "2830/8890", "percentage": "31.83%", "elapsed_time": "1h 0m 8s", "remaining_time": "2h 8m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784187} {"loss": 0.31655622, "grad_norm": 2.29311061, "learning_rate": 8.156e-05, "token_acc": 0.8886619, "epoch": 3.18447694, "global_step/max_steps": "2831/8890", "percentage": "31.84%", "elapsed_time": "1h 0m 9s", "remaining_time": "2h 8m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784213} {"loss": 0.33123741, "grad_norm": 2.83948565, "learning_rate": 8.155e-05, "token_acc": 0.89290883, "epoch": 3.1856018, "global_step/max_steps": "2832/8890", "percentage": "31.86%", "elapsed_time": "1h 0m 11s", "remaining_time": "2h 8m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784248} {"loss": 0.3182041, "grad_norm": 2.19462299, "learning_rate": 8.153e-05, "token_acc": 0.89344262, "epoch": 3.18672666, "global_step/max_steps": "2833/8890", "percentage": "31.87%", "elapsed_time": "1h 0m 12s", "remaining_time": "2h 8m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784296} {"loss": 0.40563759, "grad_norm": 2.29626155, "learning_rate": 8.152e-05, "token_acc": 0.8685446, "epoch": 3.18785152, "global_step/max_steps": "2834/8890", "percentage": "31.88%", "elapsed_time": "1h 0m 13s", "remaining_time": "2h 8m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784287} {"loss": 0.33934987, "grad_norm": 2.69386744, "learning_rate": 8.151e-05, "token_acc": 0.88374596, "epoch": 3.18897638, "global_step/max_steps": "2835/8890", "percentage": "31.89%", "elapsed_time": "1h 0m 14s", "remaining_time": "2h 8m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784312} {"loss": 0.31358576, "grad_norm": 2.42969608, "learning_rate": 8.149e-05, "token_acc": 0.8968254, "epoch": 3.19010124, "global_step/max_steps": "2836/8890", "percentage": "31.90%", "elapsed_time": "1h 0m 15s", "remaining_time": "2h 8m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784342} {"loss": 0.28218734, "grad_norm": 2.34370565, "learning_rate": 8.148e-05, "token_acc": 0.91176471, "epoch": 3.1912261, "global_step/max_steps": "2837/8890", "percentage": "31.91%", "elapsed_time": "1h 0m 16s", "remaining_time": "2h 8m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784424} {"loss": 0.37247813, "grad_norm": 2.72751737, "learning_rate": 8.146e-05, "token_acc": 0.87994723, "epoch": 3.19235096, "global_step/max_steps": "2838/8890", "percentage": "31.92%", "elapsed_time": "1h 0m 17s", "remaining_time": "2h 8m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784531} {"loss": 0.29471397, "grad_norm": 2.27548814, "learning_rate": 8.145e-05, "token_acc": 0.90387016, "epoch": 3.19347582, "global_step/max_steps": "2839/8890", "percentage": "31.93%", "elapsed_time": "1h 0m 18s", "remaining_time": "2h 8m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784557} {"loss": 0.37141055, "grad_norm": 2.14401841, "learning_rate": 8.143e-05, "token_acc": 0.88520801, "epoch": 3.19460067, "global_step/max_steps": "2840/8890", "percentage": "31.95%", "elapsed_time": "1h 0m 19s", "remaining_time": "2h 8m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784555} {"loss": 0.35422629, "grad_norm": 2.65364075, "learning_rate": 8.142e-05, "token_acc": 0.88521401, "epoch": 3.19572553, "global_step/max_steps": "2841/8890", "percentage": "31.96%", "elapsed_time": "1h 0m 21s", "remaining_time": "2h 8m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784548} {"loss": 0.35453364, "grad_norm": 2.6093452, "learning_rate": 8.14e-05, "token_acc": 0.88235294, "epoch": 3.19685039, "global_step/max_steps": "2842/8890", "percentage": "31.97%", "elapsed_time": "1h 0m 22s", "remaining_time": "2h 8m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784571} {"loss": 0.34032175, "grad_norm": 2.76689005, "learning_rate": 8.139e-05, "token_acc": 0.88565022, "epoch": 3.19797525, "global_step/max_steps": "2843/8890", "percentage": "31.98%", "elapsed_time": "1h 0m 23s", "remaining_time": "2h 8m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784596} {"loss": 0.25560325, "grad_norm": 2.32109928, "learning_rate": 8.138e-05, "token_acc": 0.914446, "epoch": 3.19910011, "global_step/max_steps": "2844/8890", "percentage": "31.99%", "elapsed_time": "1h 0m 24s", "remaining_time": "2h 8m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784647} {"loss": 0.29936957, "grad_norm": 2.42186308, "learning_rate": 8.136e-05, "token_acc": 0.90130152, "epoch": 3.20022497, "global_step/max_steps": "2845/8890", "percentage": "32.00%", "elapsed_time": "1h 0m 25s", "remaining_time": "2h 8m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784679} {"loss": 0.32082576, "grad_norm": 2.80431032, "learning_rate": 8.135e-05, "token_acc": 0.88757396, "epoch": 3.20134983, "global_step/max_steps": "2846/8890", "percentage": "32.01%", "elapsed_time": "1h 0m 26s", "remaining_time": "2h 8m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78473} {"loss": 0.37206945, "grad_norm": 2.69077229, "learning_rate": 8.133e-05, "token_acc": 0.88409371, "epoch": 3.20247469, "global_step/max_steps": "2847/8890", "percentage": "32.02%", "elapsed_time": "1h 0m 27s", "remaining_time": "2h 8m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784759} {"loss": 0.44749379, "grad_norm": 2.7202034, "learning_rate": 8.132e-05, "token_acc": 0.86070111, "epoch": 3.20359955, "global_step/max_steps": "2848/8890", "percentage": "32.04%", "elapsed_time": "1h 0m 28s", "remaining_time": "2h 8m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784793} {"loss": 0.35128778, "grad_norm": 2.97021055, "learning_rate": 8.13e-05, "token_acc": 0.88587732, "epoch": 3.20472441, "global_step/max_steps": "2849/8890", "percentage": "32.05%", "elapsed_time": "1h 0m 30s", "remaining_time": "2h 8m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784826} {"loss": 0.38436538, "grad_norm": 2.7848897, "learning_rate": 8.129e-05, "token_acc": 0.88504326, "epoch": 3.20584927, "global_step/max_steps": "2850/8890", "percentage": "32.06%", "elapsed_time": "1h 0m 31s", "remaining_time": "2h 8m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784876} {"loss": 0.30422604, "grad_norm": 2.29673028, "learning_rate": 8.127e-05, "token_acc": 0.92316927, "epoch": 3.20697413, "global_step/max_steps": "2851/8890", "percentage": "32.07%", "elapsed_time": "1h 0m 32s", "remaining_time": "2h 8m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784946} {"loss": 0.359173, "grad_norm": 2.52925086, "learning_rate": 8.126e-05, "token_acc": 0.90124858, "epoch": 3.20809899, "global_step/max_steps": "2852/8890", "percentage": "32.08%", "elapsed_time": "1h 0m 33s", "remaining_time": "2h 8m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784975} {"loss": 0.26135218, "grad_norm": 2.16673064, "learning_rate": 8.125e-05, "token_acc": 0.90574713, "epoch": 3.20922385, "global_step/max_steps": "2853/8890", "percentage": "32.09%", "elapsed_time": "1h 0m 34s", "remaining_time": "2h 8m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785026} {"loss": 0.37492257, "grad_norm": 2.59005022, "learning_rate": 8.123e-05, "token_acc": 0.87108014, "epoch": 3.21034871, "global_step/max_steps": "2854/8890", "percentage": "32.10%", "elapsed_time": "1h 0m 35s", "remaining_time": "2h 8m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785051} {"loss": 0.31179473, "grad_norm": 2.70733666, "learning_rate": 8.122e-05, "token_acc": 0.89281211, "epoch": 3.21147357, "global_step/max_steps": "2855/8890", "percentage": "32.11%", "elapsed_time": "1h 0m 36s", "remaining_time": "2h 8m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785085} {"loss": 0.30864027, "grad_norm": 2.77306914, "learning_rate": 8.12e-05, "token_acc": 0.90656566, "epoch": 3.21259843, "global_step/max_steps": "2856/8890", "percentage": "32.13%", "elapsed_time": "1h 0m 37s", "remaining_time": "2h 8m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785134} {"loss": 0.289096, "grad_norm": 2.53812099, "learning_rate": 8.119e-05, "token_acc": 0.89473684, "epoch": 3.21372328, "global_step/max_steps": "2857/8890", "percentage": "32.14%", "elapsed_time": "1h 0m 38s", "remaining_time": "2h 8m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785177} {"loss": 0.34765121, "grad_norm": 2.55361652, "learning_rate": 8.117e-05, "token_acc": 0.89122373, "epoch": 3.21484814, "global_step/max_steps": "2858/8890", "percentage": "32.15%", "elapsed_time": "1h 0m 39s", "remaining_time": "2h 8m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785208} {"loss": 0.29801479, "grad_norm": 2.20778847, "learning_rate": 8.116e-05, "token_acc": 0.8967587, "epoch": 3.215973, "global_step/max_steps": "2859/8890", "percentage": "32.16%", "elapsed_time": "1h 0m 40s", "remaining_time": "2h 8m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785238} {"loss": 0.42537987, "grad_norm": 2.6426847, "learning_rate": 8.114e-05, "token_acc": 0.86552073, "epoch": 3.21709786, "global_step/max_steps": "2860/8890", "percentage": "32.17%", "elapsed_time": "1h 0m 42s", "remaining_time": "2h 7m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785223} {"loss": 0.26932532, "grad_norm": 2.11456871, "learning_rate": 8.113e-05, "token_acc": 0.92094017, "epoch": 3.21822272, "global_step/max_steps": "2861/8890", "percentage": "32.18%", "elapsed_time": "1h 0m 43s", "remaining_time": "2h 7m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785247} {"loss": 0.35479099, "grad_norm": 2.49526024, "learning_rate": 8.111e-05, "token_acc": 0.87946429, "epoch": 3.21934758, "global_step/max_steps": "2862/8890", "percentage": "32.19%", "elapsed_time": "1h 0m 44s", "remaining_time": "2h 7m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785271} {"loss": 0.30939984, "grad_norm": 2.53313422, "learning_rate": 8.11e-05, "token_acc": 0.893796, "epoch": 3.22047244, "global_step/max_steps": "2863/8890", "percentage": "32.20%", "elapsed_time": "1h 0m 45s", "remaining_time": "2h 7m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785292} {"loss": 0.40860936, "grad_norm": 2.43114758, "learning_rate": 8.109e-05, "token_acc": 0.88192552, "epoch": 3.2215973, "global_step/max_steps": "2864/8890", "percentage": "32.22%", "elapsed_time": "1h 0m 46s", "remaining_time": "2h 7m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785312} {"loss": 0.26995718, "grad_norm": 2.27169538, "learning_rate": 8.107e-05, "token_acc": 0.91533181, "epoch": 3.22272216, "global_step/max_steps": "2865/8890", "percentage": "32.23%", "elapsed_time": "1h 0m 47s", "remaining_time": "2h 7m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785411} {"loss": 0.33864653, "grad_norm": 2.81256318, "learning_rate": 8.106e-05, "token_acc": 0.88732394, "epoch": 3.22384702, "global_step/max_steps": "2866/8890", "percentage": "32.24%", "elapsed_time": "1h 0m 48s", "remaining_time": "2h 7m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785452} {"loss": 0.29622778, "grad_norm": 2.83922815, "learning_rate": 8.104e-05, "token_acc": 0.89411765, "epoch": 3.22497188, "global_step/max_steps": "2867/8890", "percentage": "32.25%", "elapsed_time": "1h 0m 49s", "remaining_time": "2h 7m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785505} {"loss": 0.39187998, "grad_norm": 2.86702228, "learning_rate": 8.103e-05, "token_acc": 0.86780105, "epoch": 3.22609674, "global_step/max_steps": "2868/8890", "percentage": "32.26%", "elapsed_time": "1h 0m 50s", "remaining_time": "2h 7m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785615} {"loss": 0.36252239, "grad_norm": 2.24972725, "learning_rate": 8.101e-05, "token_acc": 0.88757396, "epoch": 3.2272216, "global_step/max_steps": "2869/8890", "percentage": "32.27%", "elapsed_time": "1h 0m 51s", "remaining_time": "2h 7m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785653} {"loss": 0.30602333, "grad_norm": 2.68420982, "learning_rate": 8.1e-05, "token_acc": 0.90043764, "epoch": 3.22834646, "global_step/max_steps": "2870/8890", "percentage": "32.28%", "elapsed_time": "1h 0m 52s", "remaining_time": "2h 7m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785703} {"loss": 0.27672219, "grad_norm": 2.12778234, "learning_rate": 8.098e-05, "token_acc": 0.90961761, "epoch": 3.22947132, "global_step/max_steps": "2871/8890", "percentage": "32.29%", "elapsed_time": "1h 0m 53s", "remaining_time": "2h 7m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785736} {"loss": 0.37702492, "grad_norm": 2.70445561, "learning_rate": 8.097e-05, "token_acc": 0.87039563, "epoch": 3.23059618, "global_step/max_steps": "2872/8890", "percentage": "32.31%", "elapsed_time": "1h 0m 55s", "remaining_time": "2h 7m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78577} {"loss": 0.36940229, "grad_norm": 2.59776688, "learning_rate": 8.095e-05, "token_acc": 0.87280702, "epoch": 3.23172103, "global_step/max_steps": "2873/8890", "percentage": "32.32%", "elapsed_time": "1h 0m 56s", "remaining_time": "2h 7m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785796} {"loss": 0.34075969, "grad_norm": 2.11392808, "learning_rate": 8.094e-05, "token_acc": 0.89655172, "epoch": 3.23284589, "global_step/max_steps": "2874/8890", "percentage": "32.33%", "elapsed_time": "1h 0m 57s", "remaining_time": "2h 7m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785875} {"loss": 0.33686581, "grad_norm": 2.29431486, "learning_rate": 8.092e-05, "token_acc": 0.88382688, "epoch": 3.23397075, "global_step/max_steps": "2875/8890", "percentage": "32.34%", "elapsed_time": "1h 0m 58s", "remaining_time": "2h 7m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785926} {"loss": 0.35743219, "grad_norm": 2.81898165, "learning_rate": 8.091e-05, "token_acc": 0.89042821, "epoch": 3.23509561, "global_step/max_steps": "2876/8890", "percentage": "32.35%", "elapsed_time": "1h 0m 59s", "remaining_time": "2h 7m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785947} {"loss": 0.36294341, "grad_norm": 3.12372732, "learning_rate": 8.09e-05, "token_acc": 0.87563025, "epoch": 3.23622047, "global_step/max_steps": "2877/8890", "percentage": "32.36%", "elapsed_time": "1h 1m 0s", "remaining_time": "2h 7m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785977} {"loss": 0.32778475, "grad_norm": 2.50572181, "learning_rate": 8.088e-05, "token_acc": 0.89306358, "epoch": 3.23734533, "global_step/max_steps": "2878/8890", "percentage": "32.37%", "elapsed_time": "1h 1m 1s", "remaining_time": "2h 7m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786} {"loss": 0.36403227, "grad_norm": 2.54446578, "learning_rate": 8.087e-05, "token_acc": 0.90533188, "epoch": 3.23847019, "global_step/max_steps": "2879/8890", "percentage": "32.38%", "elapsed_time": "1h 1m 2s", "remaining_time": "2h 7m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786049} {"loss": 0.31323287, "grad_norm": 2.27298379, "learning_rate": 8.085e-05, "token_acc": 0.90731707, "epoch": 3.23959505, "global_step/max_steps": "2880/8890", "percentage": "32.40%", "elapsed_time": "1h 1m 3s", "remaining_time": "2h 7m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786075} {"loss": 0.31298423, "grad_norm": 2.37979865, "learning_rate": 8.084e-05, "token_acc": 0.90851334, "epoch": 3.24071991, "global_step/max_steps": "2881/8890", "percentage": "32.41%", "elapsed_time": "1h 1m 4s", "remaining_time": "2h 7m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786176} {"loss": 0.37017739, "grad_norm": 2.42900825, "learning_rate": 8.082e-05, "token_acc": 0.89489194, "epoch": 3.24184477, "global_step/max_steps": "2882/8890", "percentage": "32.42%", "elapsed_time": "1h 1m 5s", "remaining_time": "2h 7m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786208} {"loss": 0.35932285, "grad_norm": 2.75936651, "learning_rate": 8.081e-05, "token_acc": 0.8856383, "epoch": 3.24296963, "global_step/max_steps": "2883/8890", "percentage": "32.43%", "elapsed_time": "1h 1m 6s", "remaining_time": "2h 7m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786233} {"loss": 0.32343546, "grad_norm": 2.53484416, "learning_rate": 8.079e-05, "token_acc": 0.89174705, "epoch": 3.24409449, "global_step/max_steps": "2884/8890", "percentage": "32.44%", "elapsed_time": "1h 1m 8s", "remaining_time": "2h 7m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786258} {"loss": 0.33579975, "grad_norm": 2.73654675, "learning_rate": 8.078e-05, "token_acc": 0.88782816, "epoch": 3.24521935, "global_step/max_steps": "2885/8890", "percentage": "32.45%", "elapsed_time": "1h 1m 9s", "remaining_time": "2h 7m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786277} {"loss": 0.37871334, "grad_norm": 2.58111501, "learning_rate": 8.076e-05, "token_acc": 0.87748344, "epoch": 3.24634421, "global_step/max_steps": "2886/8890", "percentage": "32.46%", "elapsed_time": "1h 1m 10s", "remaining_time": "2h 7m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786311} {"loss": 0.30330449, "grad_norm": 2.48104072, "learning_rate": 8.075e-05, "token_acc": 0.89124668, "epoch": 3.24746907, "global_step/max_steps": "2887/8890", "percentage": "32.47%", "elapsed_time": "1h 1m 11s", "remaining_time": "2h 7m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786413} {"loss": 0.2992551, "grad_norm": 2.9014132, "learning_rate": 8.073e-05, "token_acc": 0.89545455, "epoch": 3.24859393, "global_step/max_steps": "2888/8890", "percentage": "32.49%", "elapsed_time": "1h 1m 12s", "remaining_time": "2h 7m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786445} {"loss": 0.33826482, "grad_norm": 2.30684948, "learning_rate": 8.072e-05, "token_acc": 0.89665971, "epoch": 3.24971879, "global_step/max_steps": "2889/8890", "percentage": "32.50%", "elapsed_time": "1h 1m 13s", "remaining_time": "2h 7m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786467} {"loss": 0.38656646, "grad_norm": 2.3165915, "learning_rate": 8.07e-05, "token_acc": 0.88035714, "epoch": 3.25084364, "global_step/max_steps": "2890/8890", "percentage": "32.51%", "elapsed_time": "1h 1m 14s", "remaining_time": "2h 7m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786494} {"loss": 0.2532866, "grad_norm": 2.3559494, "learning_rate": 8.069e-05, "token_acc": 0.92196532, "epoch": 3.2519685, "global_step/max_steps": "2891/8890", "percentage": "32.52%", "elapsed_time": "1h 1m 15s", "remaining_time": "2h 7m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786548} {"loss": 0.33953303, "grad_norm": 2.4798255, "learning_rate": 8.068e-05, "token_acc": 0.88809766, "epoch": 3.25309336, "global_step/max_steps": "2892/8890", "percentage": "32.53%", "elapsed_time": "1h 1m 16s", "remaining_time": "2h 7m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786564} {"loss": 0.28532639, "grad_norm": 2.43787909, "learning_rate": 8.066e-05, "token_acc": 0.90361446, "epoch": 3.25421822, "global_step/max_steps": "2893/8890", "percentage": "32.54%", "elapsed_time": "1h 1m 17s", "remaining_time": "2h 7m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786597} {"loss": 0.26380187, "grad_norm": 2.08001232, "learning_rate": 8.065e-05, "token_acc": 0.91396648, "epoch": 3.25534308, "global_step/max_steps": "2894/8890", "percentage": "32.55%", "elapsed_time": "1h 1m 19s", "remaining_time": "2h 7m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786621} {"loss": 0.44644645, "grad_norm": 2.46200395, "learning_rate": 8.063e-05, "token_acc": 0.86335404, "epoch": 3.25646794, "global_step/max_steps": "2895/8890", "percentage": "32.56%", "elapsed_time": "1h 1m 20s", "remaining_time": "2h 7m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786602} {"loss": 0.37586462, "grad_norm": 2.46688199, "learning_rate": 8.062e-05, "token_acc": 0.87959866, "epoch": 3.2575928, "global_step/max_steps": "2896/8890", "percentage": "32.58%", "elapsed_time": "1h 1m 21s", "remaining_time": "2h 6m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786624} {"loss": 0.35352618, "grad_norm": 2.64887762, "learning_rate": 8.06e-05, "token_acc": 0.88950276, "epoch": 3.25871766, "global_step/max_steps": "2897/8890", "percentage": "32.59%", "elapsed_time": "1h 1m 22s", "remaining_time": "2h 6m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78664} {"loss": 0.33818945, "grad_norm": 2.96441317, "learning_rate": 8.059e-05, "token_acc": 0.89326557, "epoch": 3.25984252, "global_step/max_steps": "2898/8890", "percentage": "32.60%", "elapsed_time": "1h 1m 23s", "remaining_time": "2h 6m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786697} {"loss": 0.2753635, "grad_norm": 2.41377425, "learning_rate": 8.057e-05, "token_acc": 0.90985325, "epoch": 3.26096738, "global_step/max_steps": "2899/8890", "percentage": "32.61%", "elapsed_time": "1h 1m 24s", "remaining_time": "2h 6m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786747} {"loss": 0.26534352, "grad_norm": 2.24842834, "learning_rate": 8.056e-05, "token_acc": 0.90740741, "epoch": 3.26209224, "global_step/max_steps": "2900/8890", "percentage": "32.62%", "elapsed_time": "1h 1m 25s", "remaining_time": "2h 6m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786772} {"loss": 0.38202438, "grad_norm": 3.28960466, "learning_rate": 8.054e-05, "token_acc": 0.85673759, "epoch": 3.2632171, "global_step/max_steps": "2901/8890", "percentage": "32.63%", "elapsed_time": "1h 1m 27s", "remaining_time": "2h 6m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786796} {"loss": 0.35727388, "grad_norm": 2.55370522, "learning_rate": 8.053e-05, "token_acc": 0.89274448, "epoch": 3.26434196, "global_step/max_steps": "2902/8890", "percentage": "32.64%", "elapsed_time": "1h 1m 28s", "remaining_time": "2h 6m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78682} {"loss": 0.32512119, "grad_norm": 2.48224187, "learning_rate": 8.051e-05, "token_acc": 0.8974359, "epoch": 3.26546682, "global_step/max_steps": "2903/8890", "percentage": "32.65%", "elapsed_time": "1h 1m 29s", "remaining_time": "2h 6m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786852} {"loss": 0.32980502, "grad_norm": 2.67796588, "learning_rate": 8.05e-05, "token_acc": 0.88364435, "epoch": 3.26659168, "global_step/max_steps": "2904/8890", "percentage": "32.67%", "elapsed_time": "1h 1m 30s", "remaining_time": "2h 6m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78688} {"loss": 0.37885082, "grad_norm": 2.95105338, "learning_rate": 8.048e-05, "token_acc": 0.875, "epoch": 3.26771654, "global_step/max_steps": "2905/8890", "percentage": "32.68%", "elapsed_time": "1h 1m 31s", "remaining_time": "2h 6m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786914} {"loss": 0.34791607, "grad_norm": 3.01547694, "learning_rate": 8.047e-05, "token_acc": 0.88598901, "epoch": 3.26884139, "global_step/max_steps": "2906/8890", "percentage": "32.69%", "elapsed_time": "1h 1m 32s", "remaining_time": "2h 6m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78702} {"loss": 0.26855516, "grad_norm": 1.99374664, "learning_rate": 8.045e-05, "token_acc": 0.91208791, "epoch": 3.26996625, "global_step/max_steps": "2907/8890", "percentage": "32.70%", "elapsed_time": "1h 1m 33s", "remaining_time": "2h 6m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787063} {"loss": 0.32178447, "grad_norm": 2.2965374, "learning_rate": 8.044e-05, "token_acc": 0.89902569, "epoch": 3.27109111, "global_step/max_steps": "2908/8890", "percentage": "32.71%", "elapsed_time": "1h 1m 34s", "remaining_time": "2h 6m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787087} {"loss": 0.24171314, "grad_norm": 2.68203259, "learning_rate": 8.043e-05, "token_acc": 0.91531532, "epoch": 3.27221597, "global_step/max_steps": "2909/8890", "percentage": "32.72%", "elapsed_time": "1h 1m 35s", "remaining_time": "2h 6m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787136} {"loss": 0.2257659, "grad_norm": 2.36003923, "learning_rate": 8.041e-05, "token_acc": 0.91468531, "epoch": 3.27334083, "global_step/max_steps": "2910/8890", "percentage": "32.73%", "elapsed_time": "1h 1m 36s", "remaining_time": "2h 6m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787175} {"loss": 0.35147029, "grad_norm": 2.86753535, "learning_rate": 8.04e-05, "token_acc": 0.90184922, "epoch": 3.27446569, "global_step/max_steps": "2911/8890", "percentage": "32.74%", "elapsed_time": "1h 1m 37s", "remaining_time": "2h 6m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787241} {"loss": 0.3270773, "grad_norm": 2.6667738, "learning_rate": 8.038e-05, "token_acc": 0.9113608, "epoch": 3.27559055, "global_step/max_steps": "2912/8890", "percentage": "32.76%", "elapsed_time": "1h 1m 38s", "remaining_time": "2h 6m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787275} {"loss": 0.25960726, "grad_norm": 2.55790257, "learning_rate": 8.037e-05, "token_acc": 0.90657439, "epoch": 3.27671541, "global_step/max_steps": "2913/8890", "percentage": "32.77%", "elapsed_time": "1h 1m 39s", "remaining_time": "2h 6m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787324} {"loss": 0.42289221, "grad_norm": 2.52640963, "learning_rate": 8.035e-05, "token_acc": 0.86314761, "epoch": 3.27784027, "global_step/max_steps": "2914/8890", "percentage": "32.78%", "elapsed_time": "1h 1m 41s", "remaining_time": "2h 6m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787316} {"loss": 0.36688519, "grad_norm": 2.62985873, "learning_rate": 8.034e-05, "token_acc": 0.88726043, "epoch": 3.27896513, "global_step/max_steps": "2915/8890", "percentage": "32.79%", "elapsed_time": "1h 1m 42s", "remaining_time": "2h 6m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787373} {"loss": 0.41293544, "grad_norm": 2.54543018, "learning_rate": 8.032e-05, "token_acc": 0.86842105, "epoch": 3.28008999, "global_step/max_steps": "2916/8890", "percentage": "32.80%", "elapsed_time": "1h 1m 43s", "remaining_time": "2h 6m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787399} {"loss": 0.29536206, "grad_norm": 2.71381807, "learning_rate": 8.031e-05, "token_acc": 0.90860215, "epoch": 3.28121485, "global_step/max_steps": "2917/8890", "percentage": "32.81%", "elapsed_time": "1h 1m 44s", "remaining_time": "2h 6m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787427} {"loss": 0.48215976, "grad_norm": 2.63121128, "learning_rate": 8.029e-05, "token_acc": 0.84818482, "epoch": 3.28233971, "global_step/max_steps": "2918/8890", "percentage": "32.82%", "elapsed_time": "1h 1m 45s", "remaining_time": "2h 6m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787456} {"loss": 0.35170251, "grad_norm": 2.48006225, "learning_rate": 8.028e-05, "token_acc": 0.88302277, "epoch": 3.28346457, "global_step/max_steps": "2919/8890", "percentage": "32.83%", "elapsed_time": "1h 1m 46s", "remaining_time": "2h 6m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787503} {"loss": 0.33387133, "grad_norm": 2.6473701, "learning_rate": 8.026e-05, "token_acc": 0.88235294, "epoch": 3.28458943, "global_step/max_steps": "2920/8890", "percentage": "32.85%", "elapsed_time": "1h 1m 47s", "remaining_time": "2h 6m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787536} {"loss": 0.30911869, "grad_norm": 2.98256111, "learning_rate": 8.025e-05, "token_acc": 0.90088757, "epoch": 3.28571429, "global_step/max_steps": "2921/8890", "percentage": "32.86%", "elapsed_time": "1h 1m 48s", "remaining_time": "2h 6m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787568} {"loss": 0.38196546, "grad_norm": 2.42240715, "learning_rate": 8.023e-05, "token_acc": 0.8725065, "epoch": 3.28683915, "global_step/max_steps": "2922/8890", "percentage": "32.87%", "elapsed_time": "1h 1m 50s", "remaining_time": "2h 6m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787568} {"loss": 0.30350152, "grad_norm": 3.15083289, "learning_rate": 8.022e-05, "token_acc": 0.90580848, "epoch": 3.287964, "global_step/max_steps": "2923/8890", "percentage": "32.88%", "elapsed_time": "1h 1m 51s", "remaining_time": "2h 6m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787595} {"loss": 0.39321131, "grad_norm": 2.62730789, "learning_rate": 8.02e-05, "token_acc": 0.88072417, "epoch": 3.28908886, "global_step/max_steps": "2924/8890", "percentage": "32.89%", "elapsed_time": "1h 1m 52s", "remaining_time": "2h 6m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787675} {"loss": 0.35990947, "grad_norm": 2.6801455, "learning_rate": 8.019e-05, "token_acc": 0.87303665, "epoch": 3.29021372, "global_step/max_steps": "2925/8890", "percentage": "32.90%", "elapsed_time": "1h 1m 53s", "remaining_time": "2h 6m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787719} {"loss": 0.33785641, "grad_norm": 2.11823654, "learning_rate": 8.017e-05, "token_acc": 0.8914956, "epoch": 3.29133858, "global_step/max_steps": "2926/8890", "percentage": "32.91%", "elapsed_time": "1h 1m 54s", "remaining_time": "2h 6m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787751} {"loss": 0.36120531, "grad_norm": 2.51228285, "learning_rate": 8.016e-05, "token_acc": 0.8902439, "epoch": 3.29246344, "global_step/max_steps": "2927/8890", "percentage": "32.92%", "elapsed_time": "1h 1m 55s", "remaining_time": "2h 6m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787781} {"loss": 0.37451696, "grad_norm": 2.69332981, "learning_rate": 8.014e-05, "token_acc": 0.88128342, "epoch": 3.2935883, "global_step/max_steps": "2928/8890", "percentage": "32.94%", "elapsed_time": "1h 1m 56s", "remaining_time": "2h 6m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787813} {"loss": 0.37168682, "grad_norm": 2.5269599, "learning_rate": 8.013e-05, "token_acc": 0.88378378, "epoch": 3.29471316, "global_step/max_steps": "2929/8890", "percentage": "32.95%", "elapsed_time": "1h 1m 57s", "remaining_time": "2h 6m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787861} {"loss": 0.39157474, "grad_norm": 2.05712104, "learning_rate": 8.011e-05, "token_acc": 0.89058524, "epoch": 3.29583802, "global_step/max_steps": "2930/8890", "percentage": "32.96%", "elapsed_time": "1h 1m 58s", "remaining_time": "2h 6m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787881} {"loss": 0.35061979, "grad_norm": 2.64063811, "learning_rate": 8.01e-05, "token_acc": 0.8824164, "epoch": 3.29696288, "global_step/max_steps": "2931/8890", "percentage": "32.97%", "elapsed_time": "1h 1m 59s", "remaining_time": "2h 6m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787903} {"loss": 0.31003886, "grad_norm": 2.44335556, "learning_rate": 8.008e-05, "token_acc": 0.89769821, "epoch": 3.29808774, "global_step/max_steps": "2932/8890", "percentage": "32.98%", "elapsed_time": "1h 2m 1s", "remaining_time": "2h 6m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787929} {"loss": 0.3066622, "grad_norm": 2.72168398, "learning_rate": 8.007e-05, "token_acc": 0.89380531, "epoch": 3.2992126, "global_step/max_steps": "2933/8890", "percentage": "32.99%", "elapsed_time": "1h 2m 2s", "remaining_time": "2h 6m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787956} {"loss": 0.36170745, "grad_norm": 2.54886484, "learning_rate": 8.005e-05, "token_acc": 0.88297872, "epoch": 3.30033746, "global_step/max_steps": "2934/8890", "percentage": "33.00%", "elapsed_time": "1h 2m 3s", "remaining_time": "2h 5m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787984} {"loss": 0.42473346, "grad_norm": 2.99647236, "learning_rate": 8.004e-05, "token_acc": 0.875, "epoch": 3.30146232, "global_step/max_steps": "2935/8890", "percentage": "33.01%", "elapsed_time": "1h 2m 4s", "remaining_time": "2h 5m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788008} {"loss": 0.34876853, "grad_norm": 2.52633548, "learning_rate": 8.003e-05, "token_acc": 0.89204545, "epoch": 3.30258718, "global_step/max_steps": "2936/8890", "percentage": "33.03%", "elapsed_time": "1h 2m 5s", "remaining_time": "2h 5m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78803} {"loss": 0.41107231, "grad_norm": 2.28233528, "learning_rate": 8.001e-05, "token_acc": 0.87272727, "epoch": 3.30371204, "global_step/max_steps": "2937/8890", "percentage": "33.04%", "elapsed_time": "1h 2m 6s", "remaining_time": "2h 5m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78805} {"loss": 0.36289757, "grad_norm": 2.10855961, "learning_rate": 8e-05, "token_acc": 0.86974444, "epoch": 3.3048369, "global_step/max_steps": "2938/8890", "percentage": "33.05%", "elapsed_time": "1h 2m 8s", "remaining_time": "2h 5m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788078} {"loss": 0.41826081, "grad_norm": 2.80185151, "learning_rate": 7.998e-05, "token_acc": 0.86923077, "epoch": 3.30596175, "global_step/max_steps": "2939/8890", "percentage": "33.06%", "elapsed_time": "1h 2m 9s", "remaining_time": "2h 5m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788102} {"loss": 0.27123055, "grad_norm": 2.42965555, "learning_rate": 7.997e-05, "token_acc": 0.90714286, "epoch": 3.30708661, "global_step/max_steps": "2940/8890", "percentage": "33.07%", "elapsed_time": "1h 2m 10s", "remaining_time": "2h 5m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788122} {"loss": 0.28520134, "grad_norm": 2.15164852, "learning_rate": 7.995e-05, "token_acc": 0.91340018, "epoch": 3.30821147, "global_step/max_steps": "2941/8890", "percentage": "33.08%", "elapsed_time": "1h 2m 11s", "remaining_time": "2h 5m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788169} {"loss": 0.25229487, "grad_norm": 2.14076376, "learning_rate": 7.994e-05, "token_acc": 0.91504018, "epoch": 3.30933633, "global_step/max_steps": "2942/8890", "percentage": "33.09%", "elapsed_time": "1h 2m 12s", "remaining_time": "2h 5m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788193} {"loss": 0.36174291, "grad_norm": 2.70469236, "learning_rate": 7.992e-05, "token_acc": 0.87875289, "epoch": 3.31046119, "global_step/max_steps": "2943/8890", "percentage": "33.10%", "elapsed_time": "1h 2m 13s", "remaining_time": "2h 5m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788215} {"loss": 0.21452504, "grad_norm": 2.4246881, "learning_rate": 7.991e-05, "token_acc": 0.91734198, "epoch": 3.31158605, "global_step/max_steps": "2944/8890", "percentage": "33.12%", "elapsed_time": "1h 2m 14s", "remaining_time": "2h 5m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788244} {"loss": 0.34421957, "grad_norm": 2.4293623, "learning_rate": 7.989e-05, "token_acc": 0.88876014, "epoch": 3.31271091, "global_step/max_steps": "2945/8890", "percentage": "33.13%", "elapsed_time": "1h 2m 16s", "remaining_time": "2h 5m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78824} {"loss": 0.32361692, "grad_norm": 2.47837567, "learning_rate": 7.988e-05, "token_acc": 0.88923395, "epoch": 3.31383577, "global_step/max_steps": "2946/8890", "percentage": "33.14%", "elapsed_time": "1h 2m 17s", "remaining_time": "2h 5m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78824} {"loss": 0.36081129, "grad_norm": 2.44819927, "learning_rate": 7.986e-05, "token_acc": 0.88541667, "epoch": 3.31496063, "global_step/max_steps": "2947/8890", "percentage": "33.15%", "elapsed_time": "1h 2m 18s", "remaining_time": "2h 5m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78826} {"loss": 0.25469577, "grad_norm": 2.60990357, "learning_rate": 7.985e-05, "token_acc": 0.91461837, "epoch": 3.31608549, "global_step/max_steps": "2948/8890", "percentage": "33.16%", "elapsed_time": "1h 2m 19s", "remaining_time": "2h 5m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788288} {"loss": 0.28143203, "grad_norm": 2.66937327, "learning_rate": 7.983e-05, "token_acc": 0.91610738, "epoch": 3.31721035, "global_step/max_steps": "2949/8890", "percentage": "33.17%", "elapsed_time": "1h 2m 20s", "remaining_time": "2h 5m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788353} {"loss": 0.3129153, "grad_norm": 2.5068686, "learning_rate": 7.982e-05, "token_acc": 0.88824214, "epoch": 3.31833521, "global_step/max_steps": "2950/8890", "percentage": "33.18%", "elapsed_time": "1h 2m 21s", "remaining_time": "2h 5m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788376} {"loss": 0.29985774, "grad_norm": 2.27317786, "learning_rate": 7.98e-05, "token_acc": 0.90992018, "epoch": 3.31946007, "global_step/max_steps": "2951/8890", "percentage": "33.19%", "elapsed_time": "1h 2m 23s", "remaining_time": "2h 5m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788394} {"loss": 0.38091579, "grad_norm": 2.98280478, "learning_rate": 7.979e-05, "token_acc": 0.87096774, "epoch": 3.32058493, "global_step/max_steps": "2952/8890", "percentage": "33.21%", "elapsed_time": "1h 2m 24s", "remaining_time": "2h 5m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788439} {"loss": 0.3830018, "grad_norm": 3.0394783, "learning_rate": 7.977e-05, "token_acc": 0.85180412, "epoch": 3.32170979, "global_step/max_steps": "2953/8890", "percentage": "33.22%", "elapsed_time": "1h 2m 25s", "remaining_time": "2h 5m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788464} {"loss": 0.27534643, "grad_norm": 2.37307119, "learning_rate": 7.976e-05, "token_acc": 0.91009989, "epoch": 3.32283465, "global_step/max_steps": "2954/8890", "percentage": "33.23%", "elapsed_time": "1h 2m 26s", "remaining_time": "2h 5m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788489} {"loss": 0.3221817, "grad_norm": 2.68704891, "learning_rate": 7.974e-05, "token_acc": 0.88608981, "epoch": 3.32395951, "global_step/max_steps": "2955/8890", "percentage": "33.24%", "elapsed_time": "1h 2m 27s", "remaining_time": "2h 5m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788517} {"loss": 0.31219721, "grad_norm": 2.83031392, "learning_rate": 7.973e-05, "token_acc": 0.89749702, "epoch": 3.32508436, "global_step/max_steps": "2956/8890", "percentage": "33.25%", "elapsed_time": "1h 2m 28s", "remaining_time": "2h 5m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788545} {"loss": 0.28768504, "grad_norm": 2.15921354, "learning_rate": 7.971e-05, "token_acc": 0.91317671, "epoch": 3.32620922, "global_step/max_steps": "2957/8890", "percentage": "33.26%", "elapsed_time": "1h 2m 29s", "remaining_time": "2h 5m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788564} {"loss": 0.25929224, "grad_norm": 2.36693835, "learning_rate": 7.97e-05, "token_acc": 0.90586932, "epoch": 3.32733408, "global_step/max_steps": "2958/8890", "percentage": "33.27%", "elapsed_time": "1h 2m 30s", "remaining_time": "2h 5m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788591} {"loss": 0.40290493, "grad_norm": 2.74963975, "learning_rate": 7.968e-05, "token_acc": 0.88100962, "epoch": 3.32845894, "global_step/max_steps": "2959/8890", "percentage": "33.28%", "elapsed_time": "1h 2m 32s", "remaining_time": "2h 5m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788614} {"loss": 0.41457814, "grad_norm": 2.50856543, "learning_rate": 7.967e-05, "token_acc": 0.85784314, "epoch": 3.3295838, "global_step/max_steps": "2960/8890", "percentage": "33.30%", "elapsed_time": "1h 2m 33s", "remaining_time": "2h 5m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788643} {"loss": 0.41121638, "grad_norm": 2.45583534, "learning_rate": 7.965e-05, "token_acc": 0.87615148, "epoch": 3.33070866, "global_step/max_steps": "2961/8890", "percentage": "33.31%", "elapsed_time": "1h 2m 34s", "remaining_time": "2h 5m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788655} {"loss": 0.32643735, "grad_norm": 2.77744794, "learning_rate": 7.964e-05, "token_acc": 0.88362652, "epoch": 3.33183352, "global_step/max_steps": "2962/8890", "percentage": "33.32%", "elapsed_time": "1h 2m 35s", "remaining_time": "2h 5m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788679} {"loss": 0.3350549, "grad_norm": 2.26674032, "learning_rate": 7.962e-05, "token_acc": 0.89042448, "epoch": 3.33295838, "global_step/max_steps": "2963/8890", "percentage": "33.33%", "elapsed_time": "1h 2m 36s", "remaining_time": "2h 5m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788704} {"loss": 0.37652013, "grad_norm": 2.64535356, "learning_rate": 7.961e-05, "token_acc": 0.87748691, "epoch": 3.33408324, "global_step/max_steps": "2964/8890", "percentage": "33.34%", "elapsed_time": "1h 2m 37s", "remaining_time": "2h 5m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788724} {"loss": 0.38336021, "grad_norm": 2.42632627, "learning_rate": 7.959e-05, "token_acc": 0.88306011, "epoch": 3.3352081, "global_step/max_steps": "2965/8890", "percentage": "33.35%", "elapsed_time": "1h 2m 39s", "remaining_time": "2h 5m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788751} {"loss": 0.29787067, "grad_norm": 2.36741877, "learning_rate": 7.958e-05, "token_acc": 0.89684814, "epoch": 3.33633296, "global_step/max_steps": "2966/8890", "percentage": "33.36%", "elapsed_time": "1h 2m 40s", "remaining_time": "2h 5m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788782} {"loss": 0.36038789, "grad_norm": 2.36923933, "learning_rate": 7.956e-05, "token_acc": 0.88414055, "epoch": 3.33745782, "global_step/max_steps": "2967/8890", "percentage": "33.37%", "elapsed_time": "1h 2m 41s", "remaining_time": "2h 5m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788812} {"loss": 0.42396125, "grad_norm": 2.57045794, "learning_rate": 7.955e-05, "token_acc": 0.87219512, "epoch": 3.33858268, "global_step/max_steps": "2968/8890", "percentage": "33.39%", "elapsed_time": "1h 2m 42s", "remaining_time": "2h 5m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788843} {"loss": 0.36572078, "grad_norm": 3.0386076, "learning_rate": 7.953e-05, "token_acc": 0.87591241, "epoch": 3.33970754, "global_step/max_steps": "2969/8890", "percentage": "33.40%", "elapsed_time": "1h 2m 43s", "remaining_time": "2h 5m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788863} {"loss": 0.43216932, "grad_norm": 2.45961952, "learning_rate": 7.952e-05, "token_acc": 0.87010078, "epoch": 3.3408324, "global_step/max_steps": "2970/8890", "percentage": "33.41%", "elapsed_time": "1h 2m 44s", "remaining_time": "2h 5m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788928} {"loss": 0.32619041, "grad_norm": 2.20455623, "learning_rate": 7.95e-05, "token_acc": 0.90224719, "epoch": 3.34195726, "global_step/max_steps": "2971/8890", "percentage": "33.42%", "elapsed_time": "1h 2m 45s", "remaining_time": "2h 5m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788955} {"loss": 0.33980882, "grad_norm": 2.45837975, "learning_rate": 7.949e-05, "token_acc": 0.89208633, "epoch": 3.34308211, "global_step/max_steps": "2972/8890", "percentage": "33.43%", "elapsed_time": "1h 2m 46s", "remaining_time": "2h 5m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788983} {"loss": 0.3635096, "grad_norm": 2.43337893, "learning_rate": 7.947e-05, "token_acc": 0.88140704, "epoch": 3.34420697, "global_step/max_steps": "2973/8890", "percentage": "33.44%", "elapsed_time": "1h 2m 48s", "remaining_time": "2h 4m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789008} {"loss": 0.43692353, "grad_norm": 2.44952559, "learning_rate": 7.946e-05, "token_acc": 0.86536485, "epoch": 3.34533183, "global_step/max_steps": "2974/8890", "percentage": "33.45%", "elapsed_time": "1h 2m 49s", "remaining_time": "2h 4m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788991} {"loss": 0.36846316, "grad_norm": 2.66459966, "learning_rate": 7.944e-05, "token_acc": 0.8776267, "epoch": 3.34645669, "global_step/max_steps": "2975/8890", "percentage": "33.46%", "elapsed_time": "1h 2m 50s", "remaining_time": "2h 4m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789014} {"loss": 0.35399139, "grad_norm": 2.34491825, "learning_rate": 7.943e-05, "token_acc": 0.87962963, "epoch": 3.34758155, "global_step/max_steps": "2976/8890", "percentage": "33.48%", "elapsed_time": "1h 2m 51s", "remaining_time": "2h 4m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789034} {"loss": 0.40336698, "grad_norm": 2.47324538, "learning_rate": 7.941e-05, "token_acc": 0.86666667, "epoch": 3.34870641, "global_step/max_steps": "2977/8890", "percentage": "33.49%", "elapsed_time": "1h 2m 52s", "remaining_time": "2h 4m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789033} {"loss": 0.41116244, "grad_norm": 2.69134402, "learning_rate": 7.94e-05, "token_acc": 0.87982833, "epoch": 3.34983127, "global_step/max_steps": "2978/8890", "percentage": "33.50%", "elapsed_time": "1h 2m 54s", "remaining_time": "2h 4m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789055} {"loss": 0.38027665, "grad_norm": 2.42167234, "learning_rate": 7.938e-05, "token_acc": 0.88864865, "epoch": 3.35095613, "global_step/max_steps": "2979/8890", "percentage": "33.51%", "elapsed_time": "1h 2m 55s", "remaining_time": "2h 4m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789103} {"loss": 0.28329796, "grad_norm": 2.36805964, "learning_rate": 7.937e-05, "token_acc": 0.90898749, "epoch": 3.35208099, "global_step/max_steps": "2980/8890", "percentage": "33.52%", "elapsed_time": "1h 2m 56s", "remaining_time": "2h 4m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789148} {"loss": 0.30038744, "grad_norm": 2.32279849, "learning_rate": 7.935e-05, "token_acc": 0.91125, "epoch": 3.35320585, "global_step/max_steps": "2981/8890", "percentage": "33.53%", "elapsed_time": "1h 2m 57s", "remaining_time": "2h 4m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789197} {"loss": 0.29829198, "grad_norm": 2.37220621, "learning_rate": 7.934e-05, "token_acc": 0.89189189, "epoch": 3.35433071, "global_step/max_steps": "2982/8890", "percentage": "33.54%", "elapsed_time": "1h 2m 58s", "remaining_time": "2h 4m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789226} {"loss": 0.26639932, "grad_norm": 2.06821108, "learning_rate": 7.932e-05, "token_acc": 0.90720632, "epoch": 3.35545557, "global_step/max_steps": "2983/8890", "percentage": "33.55%", "elapsed_time": "1h 2m 59s", "remaining_time": "2h 4m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789248} {"loss": 0.35345817, "grad_norm": 2.35752916, "learning_rate": 7.931e-05, "token_acc": 0.87086446, "epoch": 3.35658043, "global_step/max_steps": "2984/8890", "percentage": "33.57%", "elapsed_time": "1h 3m 0s", "remaining_time": "2h 4m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789294} {"loss": 0.35904545, "grad_norm": 2.38788915, "learning_rate": 7.929e-05, "token_acc": 0.8808554, "epoch": 3.35770529, "global_step/max_steps": "2985/8890", "percentage": "33.58%", "elapsed_time": "1h 3m 1s", "remaining_time": "2h 4m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789291} {"loss": 0.23043965, "grad_norm": 2.40357924, "learning_rate": 7.928e-05, "token_acc": 0.90897436, "epoch": 3.35883015, "global_step/max_steps": "2986/8890", "percentage": "33.59%", "elapsed_time": "1h 3m 3s", "remaining_time": "2h 4m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789314} {"loss": 0.31810886, "grad_norm": 1.95504534, "learning_rate": 7.926e-05, "token_acc": 0.90133333, "epoch": 3.35995501, "global_step/max_steps": "2987/8890", "percentage": "33.60%", "elapsed_time": "1h 3m 4s", "remaining_time": "2h 4m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789333} {"loss": 0.25837833, "grad_norm": 2.15034485, "learning_rate": 7.925e-05, "token_acc": 0.92821782, "epoch": 3.36107987, "global_step/max_steps": "2988/8890", "percentage": "33.61%", "elapsed_time": "1h 3m 5s", "remaining_time": "2h 4m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789359} {"loss": 0.3408629, "grad_norm": 2.65190697, "learning_rate": 7.923e-05, "token_acc": 0.89817792, "epoch": 3.36220472, "global_step/max_steps": "2989/8890", "percentage": "33.62%", "elapsed_time": "1h 3m 6s", "remaining_time": "2h 4m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789377} {"loss": 0.36230642, "grad_norm": 2.40336609, "learning_rate": 7.922e-05, "token_acc": 0.87925357, "epoch": 3.36332958, "global_step/max_steps": "2990/8890", "percentage": "33.63%", "elapsed_time": "1h 3m 7s", "remaining_time": "2h 4m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789409} {"loss": 0.18234247, "grad_norm": 2.08429599, "learning_rate": 7.92e-05, "token_acc": 0.93463143, "epoch": 3.36445444, "global_step/max_steps": "2991/8890", "percentage": "33.64%", "elapsed_time": "1h 3m 8s", "remaining_time": "2h 4m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789459} {"loss": 0.34179586, "grad_norm": 2.49554086, "learning_rate": 7.919e-05, "token_acc": 0.89740082, "epoch": 3.3655793, "global_step/max_steps": "2992/8890", "percentage": "33.66%", "elapsed_time": "1h 3m 9s", "remaining_time": "2h 4m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789513} {"loss": 0.33175159, "grad_norm": 2.83513236, "learning_rate": 7.917e-05, "token_acc": 0.89262613, "epoch": 3.36670416, "global_step/max_steps": "2993/8890", "percentage": "33.67%", "elapsed_time": "1h 3m 10s", "remaining_time": "2h 4m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789535} {"loss": 0.46985173, "grad_norm": 2.56379485, "learning_rate": 7.916e-05, "token_acc": 0.86142322, "epoch": 3.36782902, "global_step/max_steps": "2994/8890", "percentage": "33.68%", "elapsed_time": "1h 3m 12s", "remaining_time": "2h 4m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789557} {"loss": 0.31709939, "grad_norm": 2.77692485, "learning_rate": 7.914e-05, "token_acc": 0.90568655, "epoch": 3.36895388, "global_step/max_steps": "2995/8890", "percentage": "33.69%", "elapsed_time": "1h 3m 13s", "remaining_time": "2h 4m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789589} {"loss": 0.30070862, "grad_norm": 2.27939939, "learning_rate": 7.913e-05, "token_acc": 0.88432836, "epoch": 3.37007874, "global_step/max_steps": "2996/8890", "percentage": "33.70%", "elapsed_time": "1h 3m 14s", "remaining_time": "2h 4m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78961} {"loss": 0.3090288, "grad_norm": 2.24226522, "learning_rate": 7.911e-05, "token_acc": 0.88987217, "epoch": 3.3712036, "global_step/max_steps": "2997/8890", "percentage": "33.71%", "elapsed_time": "1h 3m 15s", "remaining_time": "2h 4m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789637} {"loss": 0.35085613, "grad_norm": 2.61869907, "learning_rate": 7.91e-05, "token_acc": 0.88599752, "epoch": 3.37232846, "global_step/max_steps": "2998/8890", "percentage": "33.72%", "elapsed_time": "1h 3m 16s", "remaining_time": "2h 4m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789655} {"loss": 0.38398141, "grad_norm": 2.41648531, "learning_rate": 7.908e-05, "token_acc": 0.8765324, "epoch": 3.37345332, "global_step/max_steps": "2999/8890", "percentage": "33.73%", "elapsed_time": "1h 3m 17s", "remaining_time": "2h 4m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789681} {"loss": 0.33147377, "grad_norm": 2.67635155, "learning_rate": 7.906e-05, "token_acc": 0.89111111, "epoch": 3.37457818, "global_step/max_steps": "3000/8890", "percentage": "33.75%", "elapsed_time": "1h 3m 18s", "remaining_time": "2h 4m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.789708} {"eval_loss": 1.10436714, "eval_runtime": 31.6985, "eval_samples_per_second": 25.332, "eval_steps_per_second": 3.186, "eval_token_acc": 0.73520968, "epoch": 3.37457818, "global_step/max_steps": "3000/8890", "percentage": "33.75%", "elapsed_time": "1h 3m 50s", "remaining_time": "2h 5m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783171} {"loss": 0.33475125, "grad_norm": 2.7133553, "learning_rate": 7.905e-05, "token_acc": 0.89467312, "epoch": 3.37570304, "global_step/max_steps": "3001/8890", "percentage": "33.76%", "elapsed_time": "1h 4m 5s", "remaining_time": "2h 5m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780345} {"loss": 0.30058444, "grad_norm": 2.31801629, "learning_rate": 7.903e-05, "token_acc": 0.8935743, "epoch": 3.3768279, "global_step/max_steps": "3002/8890", "percentage": "33.77%", "elapsed_time": "1h 4m 6s", "remaining_time": "2h 5m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780375} {"loss": 0.34543017, "grad_norm": 2.91888928, "learning_rate": 7.902e-05, "token_acc": 0.88073394, "epoch": 3.37795276, "global_step/max_steps": "3003/8890", "percentage": "33.78%", "elapsed_time": "1h 4m 8s", "remaining_time": "2h 5m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780381} {"loss": 0.35899711, "grad_norm": 2.56002927, "learning_rate": 7.9e-05, "token_acc": 0.88591385, "epoch": 3.37907762, "global_step/max_steps": "3004/8890", "percentage": "33.79%", "elapsed_time": "1h 4m 9s", "remaining_time": "2h 5m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780412} {"loss": 0.30888003, "grad_norm": 3.38726354, "learning_rate": 7.899e-05, "token_acc": 0.89456869, "epoch": 3.38020247, "global_step/max_steps": "3005/8890", "percentage": "33.80%", "elapsed_time": "1h 4m 10s", "remaining_time": "2h 5m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780464} {"loss": 0.43770686, "grad_norm": 3.01143742, "learning_rate": 7.897e-05, "token_acc": 0.86357786, "epoch": 3.38132733, "global_step/max_steps": "3006/8890", "percentage": "33.81%", "elapsed_time": "1h 4m 11s", "remaining_time": "2h 5m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780512} {"loss": 0.35457775, "grad_norm": 2.46459365, "learning_rate": 7.896e-05, "token_acc": 0.88850967, "epoch": 3.38245219, "global_step/max_steps": "3007/8890", "percentage": "33.82%", "elapsed_time": "1h 4m 12s", "remaining_time": "2h 5m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780534} {"loss": 0.45437384, "grad_norm": 2.42058516, "learning_rate": 7.894e-05, "token_acc": 0.8630363, "epoch": 3.38357705, "global_step/max_steps": "3008/8890", "percentage": "33.84%", "elapsed_time": "1h 4m 13s", "remaining_time": "2h 5m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780556} {"loss": 0.45103824, "grad_norm": 2.46220875, "learning_rate": 7.893e-05, "token_acc": 0.87025596, "epoch": 3.38470191, "global_step/max_steps": "3009/8890", "percentage": "33.85%", "elapsed_time": "1h 4m 14s", "remaining_time": "2h 5m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780565} {"loss": 0.28819594, "grad_norm": 2.17556429, "learning_rate": 7.891e-05, "token_acc": 0.89956332, "epoch": 3.38582677, "global_step/max_steps": "3010/8890", "percentage": "33.86%", "elapsed_time": "1h 4m 16s", "remaining_time": "2h 5m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780597} {"loss": 0.24684979, "grad_norm": 2.27973962, "learning_rate": 7.89e-05, "token_acc": 0.91331924, "epoch": 3.38695163, "global_step/max_steps": "3011/8890", "percentage": "33.87%", "elapsed_time": "1h 4m 17s", "remaining_time": "2h 5m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780646} {"loss": 0.33314553, "grad_norm": 2.63256073, "learning_rate": 7.888e-05, "token_acc": 0.87727825, "epoch": 3.38807649, "global_step/max_steps": "3012/8890", "percentage": "33.88%", "elapsed_time": "1h 4m 18s", "remaining_time": "2h 5m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780671} {"loss": 0.44603005, "grad_norm": 2.59487104, "learning_rate": 7.887e-05, "token_acc": 0.86536485, "epoch": 3.38920135, "global_step/max_steps": "3013/8890", "percentage": "33.89%", "elapsed_time": "1h 4m 19s", "remaining_time": "2h 5m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780697} {"loss": 0.29996198, "grad_norm": 2.3366003, "learning_rate": 7.885e-05, "token_acc": 0.90760234, "epoch": 3.39032621, "global_step/max_steps": "3014/8890", "percentage": "33.90%", "elapsed_time": "1h 4m 20s", "remaining_time": "2h 5m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780709} {"loss": 0.35782585, "grad_norm": 2.39305663, "learning_rate": 7.884e-05, "token_acc": 0.88636364, "epoch": 3.39145107, "global_step/max_steps": "3015/8890", "percentage": "33.91%", "elapsed_time": "1h 4m 21s", "remaining_time": "2h 5m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780765} {"loss": 0.32553264, "grad_norm": 2.37282372, "learning_rate": 7.882e-05, "token_acc": 0.89142857, "epoch": 3.39257593, "global_step/max_steps": "3016/8890", "percentage": "33.93%", "elapsed_time": "1h 4m 22s", "remaining_time": "2h 5m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780787} {"loss": 0.44621035, "grad_norm": 2.74577522, "learning_rate": 7.881e-05, "token_acc": 0.84576613, "epoch": 3.39370079, "global_step/max_steps": "3017/8890", "percentage": "33.94%", "elapsed_time": "1h 4m 23s", "remaining_time": "2h 5m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780807} {"loss": 0.28353542, "grad_norm": 2.41965079, "learning_rate": 7.879e-05, "token_acc": 0.89311164, "epoch": 3.39482565, "global_step/max_steps": "3018/8890", "percentage": "33.95%", "elapsed_time": "1h 4m 25s", "remaining_time": "2h 5m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780841} {"loss": 0.35390806, "grad_norm": 2.68079925, "learning_rate": 7.878e-05, "token_acc": 0.89460477, "epoch": 3.39595051, "global_step/max_steps": "3019/8890", "percentage": "33.96%", "elapsed_time": "1h 4m 26s", "remaining_time": "2h 5m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780865} {"loss": 0.29196534, "grad_norm": 2.18300843, "learning_rate": 7.876e-05, "token_acc": 0.9063786, "epoch": 3.39707537, "global_step/max_steps": "3020/8890", "percentage": "33.97%", "elapsed_time": "1h 4m 27s", "remaining_time": "2h 5m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780853} {"loss": 0.32844207, "grad_norm": 2.23435974, "learning_rate": 7.875e-05, "token_acc": 0.89818182, "epoch": 3.39820022, "global_step/max_steps": "3021/8890", "percentage": "33.98%", "elapsed_time": "1h 4m 28s", "remaining_time": "2h 5m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780856} {"loss": 0.33959594, "grad_norm": 2.31963086, "learning_rate": 7.873e-05, "token_acc": 0.89944134, "epoch": 3.39932508, "global_step/max_steps": "3022/8890", "percentage": "33.99%", "elapsed_time": "1h 4m 29s", "remaining_time": "2h 5m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780908} {"loss": 0.3417924, "grad_norm": 2.67455149, "learning_rate": 7.872e-05, "token_acc": 0.89158163, "epoch": 3.40044994, "global_step/max_steps": "3023/8890", "percentage": "34.00%", "elapsed_time": "1h 4m 31s", "remaining_time": "2h 5m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780934} {"loss": 0.34244147, "grad_norm": 2.58516264, "learning_rate": 7.87e-05, "token_acc": 0.89620536, "epoch": 3.4015748, "global_step/max_steps": "3024/8890", "percentage": "34.02%", "elapsed_time": "1h 4m 32s", "remaining_time": "2h 5m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780965} {"loss": 0.36925197, "grad_norm": 2.52550578, "learning_rate": 7.869e-05, "token_acc": 0.88836773, "epoch": 3.40269966, "global_step/max_steps": "3025/8890", "percentage": "34.03%", "elapsed_time": "1h 4m 33s", "remaining_time": "2h 5m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78096} {"loss": 0.27572975, "grad_norm": 2.01264882, "learning_rate": 7.867e-05, "token_acc": 0.90030519, "epoch": 3.40382452, "global_step/max_steps": "3026/8890", "percentage": "34.04%", "elapsed_time": "1h 4m 34s", "remaining_time": "2h 5m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780982} {"loss": 0.39812237, "grad_norm": 2.84541011, "learning_rate": 7.865e-05, "token_acc": 0.87679426, "epoch": 3.40494938, "global_step/max_steps": "3027/8890", "percentage": "34.05%", "elapsed_time": "1h 4m 35s", "remaining_time": "2h 5m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781004} {"loss": 0.38241357, "grad_norm": 2.61289668, "learning_rate": 7.864e-05, "token_acc": 0.89189189, "epoch": 3.40607424, "global_step/max_steps": "3028/8890", "percentage": "34.06%", "elapsed_time": "1h 4m 36s", "remaining_time": "2h 5m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78103} {"loss": 0.25637466, "grad_norm": 2.9663136, "learning_rate": 7.862e-05, "token_acc": 0.92060491, "epoch": 3.4071991, "global_step/max_steps": "3029/8890", "percentage": "34.07%", "elapsed_time": "1h 4m 37s", "remaining_time": "2h 5m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78108} {"loss": 0.31567639, "grad_norm": 2.46122336, "learning_rate": 7.861e-05, "token_acc": 0.90578158, "epoch": 3.40832396, "global_step/max_steps": "3030/8890", "percentage": "34.08%", "elapsed_time": "1h 4m 39s", "remaining_time": "2h 5m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781119} {"loss": 0.24778123, "grad_norm": 2.51854515, "learning_rate": 7.859e-05, "token_acc": 0.91770186, "epoch": 3.40944882, "global_step/max_steps": "3031/8890", "percentage": "34.09%", "elapsed_time": "1h 4m 40s", "remaining_time": "2h 5m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781173} {"loss": 0.28293735, "grad_norm": 2.29684782, "learning_rate": 7.858e-05, "token_acc": 0.91239892, "epoch": 3.41057368, "global_step/max_steps": "3032/8890", "percentage": "34.11%", "elapsed_time": "1h 4m 40s", "remaining_time": "2h 4m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78125} {"loss": 0.34470338, "grad_norm": 2.67375255, "learning_rate": 7.856e-05, "token_acc": 0.88929889, "epoch": 3.41169854, "global_step/max_steps": "3033/8890", "percentage": "34.12%", "elapsed_time": "1h 4m 42s", "remaining_time": "2h 4m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781274} {"loss": 0.2387338, "grad_norm": 2.41706491, "learning_rate": 7.855e-05, "token_acc": 0.91851852, "epoch": 3.4128234, "global_step/max_steps": "3034/8890", "percentage": "34.13%", "elapsed_time": "1h 4m 43s", "remaining_time": "2h 4m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781302} {"loss": 0.3114692, "grad_norm": 2.59701514, "learning_rate": 7.853e-05, "token_acc": 0.8974359, "epoch": 3.41394826, "global_step/max_steps": "3035/8890", "percentage": "34.14%", "elapsed_time": "1h 4m 44s", "remaining_time": "2h 4m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781341} {"loss": 0.36440903, "grad_norm": 2.36131096, "learning_rate": 7.852e-05, "token_acc": 0.88828968, "epoch": 3.41507312, "global_step/max_steps": "3036/8890", "percentage": "34.15%", "elapsed_time": "1h 4m 45s", "remaining_time": "2h 4m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78137} {"loss": 0.35454518, "grad_norm": 2.42506409, "learning_rate": 7.85e-05, "token_acc": 0.88457808, "epoch": 3.41619798, "global_step/max_steps": "3037/8890", "percentage": "34.16%", "elapsed_time": "1h 4m 46s", "remaining_time": "2h 4m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781398} {"loss": 0.34659675, "grad_norm": 2.84565902, "learning_rate": 7.849e-05, "token_acc": 0.88823529, "epoch": 3.41732283, "global_step/max_steps": "3038/8890", "percentage": "34.17%", "elapsed_time": "1h 4m 47s", "remaining_time": "2h 4m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781431} {"loss": 0.26204571, "grad_norm": 2.32812428, "learning_rate": 7.847e-05, "token_acc": 0.9055794, "epoch": 3.41844769, "global_step/max_steps": "3039/8890", "percentage": "34.18%", "elapsed_time": "1h 4m 49s", "remaining_time": "2h 4m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781427} {"loss": 0.30300921, "grad_norm": 2.87173867, "learning_rate": 7.846e-05, "token_acc": 0.8742236, "epoch": 3.41957255, "global_step/max_steps": "3040/8890", "percentage": "34.20%", "elapsed_time": "1h 4m 50s", "remaining_time": "2h 4m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781456} {"loss": 0.23565724, "grad_norm": 2.09920025, "learning_rate": 7.844e-05, "token_acc": 0.92630502, "epoch": 3.42069741, "global_step/max_steps": "3041/8890", "percentage": "34.21%", "elapsed_time": "1h 4m 51s", "remaining_time": "2h 4m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781482} {"loss": 0.4059363, "grad_norm": 3.32669401, "learning_rate": 7.843e-05, "token_acc": 0.86997319, "epoch": 3.42182227, "global_step/max_steps": "3042/8890", "percentage": "34.22%", "elapsed_time": "1h 4m 52s", "remaining_time": "2h 4m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781529} {"loss": 0.26332718, "grad_norm": 2.14136934, "learning_rate": 7.841e-05, "token_acc": 0.90599295, "epoch": 3.42294713, "global_step/max_steps": "3043/8890", "percentage": "34.23%", "elapsed_time": "1h 4m 53s", "remaining_time": "2h 4m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781555} {"loss": 0.39254647, "grad_norm": 2.41487718, "learning_rate": 7.839e-05, "token_acc": 0.87689713, "epoch": 3.42407199, "global_step/max_steps": "3044/8890", "percentage": "34.24%", "elapsed_time": "1h 4m 54s", "remaining_time": "2h 4m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781576} {"loss": 0.31208092, "grad_norm": 2.91446424, "learning_rate": 7.838e-05, "token_acc": 0.88522013, "epoch": 3.42519685, "global_step/max_steps": "3045/8890", "percentage": "34.25%", "elapsed_time": "1h 4m 55s", "remaining_time": "2h 4m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781606} {"loss": 0.28252202, "grad_norm": 2.43377709, "learning_rate": 7.836e-05, "token_acc": 0.90899001, "epoch": 3.42632171, "global_step/max_steps": "3046/8890", "percentage": "34.26%", "elapsed_time": "1h 4m 56s", "remaining_time": "2h 4m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781631} {"loss": 0.35621834, "grad_norm": 2.66377401, "learning_rate": 7.835e-05, "token_acc": 0.87093023, "epoch": 3.42744657, "global_step/max_steps": "3047/8890", "percentage": "34.27%", "elapsed_time": "1h 4m 58s", "remaining_time": "2h 4m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781678} {"loss": 0.29114485, "grad_norm": 2.10241127, "learning_rate": 7.833e-05, "token_acc": 0.89846154, "epoch": 3.42857143, "global_step/max_steps": "3048/8890", "percentage": "34.29%", "elapsed_time": "1h 4m 59s", "remaining_time": "2h 4m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781707} {"loss": 0.29279244, "grad_norm": 2.6727674, "learning_rate": 7.832e-05, "token_acc": 0.9057377, "epoch": 3.42969629, "global_step/max_steps": "3049/8890", "percentage": "34.30%", "elapsed_time": "1h 5m 0s", "remaining_time": "2h 4m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781746} {"loss": 0.41060972, "grad_norm": 2.91655564, "learning_rate": 7.83e-05, "token_acc": 0.89042553, "epoch": 3.43082115, "global_step/max_steps": "3050/8890", "percentage": "34.31%", "elapsed_time": "1h 5m 1s", "remaining_time": "2h 4m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7818} {"loss": 0.35495779, "grad_norm": 2.80410123, "learning_rate": 7.829e-05, "token_acc": 0.8800905, "epoch": 3.43194601, "global_step/max_steps": "3051/8890", "percentage": "34.32%", "elapsed_time": "1h 5m 2s", "remaining_time": "2h 4m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781828} {"loss": 0.33052349, "grad_norm": 2.84669757, "learning_rate": 7.827e-05, "token_acc": 0.89228296, "epoch": 3.43307087, "global_step/max_steps": "3052/8890", "percentage": "34.33%", "elapsed_time": "1h 5m 3s", "remaining_time": "2h 4m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781857} {"loss": 0.27545792, "grad_norm": 2.63756895, "learning_rate": 7.826e-05, "token_acc": 0.89458689, "epoch": 3.43419573, "global_step/max_steps": "3053/8890", "percentage": "34.34%", "elapsed_time": "1h 5m 4s", "remaining_time": "2h 4m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781914} {"loss": 0.34527472, "grad_norm": 2.71223664, "learning_rate": 7.824e-05, "token_acc": 0.89029004, "epoch": 3.43532058, "global_step/max_steps": "3054/8890", "percentage": "34.35%", "elapsed_time": "1h 5m 5s", "remaining_time": "2h 4m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78193} {"loss": 0.23312855, "grad_norm": 2.42189026, "learning_rate": 7.823e-05, "token_acc": 0.91714614, "epoch": 3.43644544, "global_step/max_steps": "3055/8890", "percentage": "34.36%", "elapsed_time": "1h 5m 6s", "remaining_time": "2h 4m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781966} {"loss": 0.41184932, "grad_norm": 3.04880118, "learning_rate": 7.821e-05, "token_acc": 0.87281796, "epoch": 3.4375703, "global_step/max_steps": "3056/8890", "percentage": "34.38%", "elapsed_time": "1h 5m 7s", "remaining_time": "2h 4m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781989} {"loss": 0.25323924, "grad_norm": 2.45875096, "learning_rate": 7.82e-05, "token_acc": 0.91327064, "epoch": 3.43869516, "global_step/max_steps": "3057/8890", "percentage": "34.39%", "elapsed_time": "1h 5m 9s", "remaining_time": "2h 4m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782015} {"loss": 0.40528333, "grad_norm": 3.39323235, "learning_rate": 7.818e-05, "token_acc": 0.8683274, "epoch": 3.43982002, "global_step/max_steps": "3058/8890", "percentage": "34.40%", "elapsed_time": "1h 5m 10s", "remaining_time": "2h 4m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782091} {"loss": 0.27339417, "grad_norm": 2.49442863, "learning_rate": 7.816e-05, "token_acc": 0.91075795, "epoch": 3.44094488, "global_step/max_steps": "3059/8890", "percentage": "34.41%", "elapsed_time": "1h 5m 11s", "remaining_time": "2h 4m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782144} {"loss": 0.29697543, "grad_norm": 2.27277708, "learning_rate": 7.815e-05, "token_acc": 0.90103093, "epoch": 3.44206974, "global_step/max_steps": "3060/8890", "percentage": "34.42%", "elapsed_time": "1h 5m 12s", "remaining_time": "2h 4m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782147} {"loss": 0.39890176, "grad_norm": 2.79858494, "learning_rate": 7.813e-05, "token_acc": 0.87839433, "epoch": 3.4431946, "global_step/max_steps": "3061/8890", "percentage": "34.43%", "elapsed_time": "1h 5m 13s", "remaining_time": "2h 4m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782178} {"loss": 0.31904927, "grad_norm": 2.656883, "learning_rate": 7.812e-05, "token_acc": 0.89978214, "epoch": 3.44431946, "global_step/max_steps": "3062/8890", "percentage": "34.44%", "elapsed_time": "1h 5m 14s", "remaining_time": "2h 4m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782226} {"loss": 0.26177868, "grad_norm": 2.62880588, "learning_rate": 7.81e-05, "token_acc": 0.92006033, "epoch": 3.44544432, "global_step/max_steps": "3063/8890", "percentage": "34.45%", "elapsed_time": "1h 5m 15s", "remaining_time": "2h 4m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782303} {"loss": 0.26604459, "grad_norm": 2.24952054, "learning_rate": 7.809e-05, "token_acc": 0.90789474, "epoch": 3.44656918, "global_step/max_steps": "3064/8890", "percentage": "34.47%", "elapsed_time": "1h 5m 16s", "remaining_time": "2h 4m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782344} {"loss": 0.36638591, "grad_norm": 2.89797235, "learning_rate": 7.807e-05, "token_acc": 0.87531172, "epoch": 3.44769404, "global_step/max_steps": "3065/8890", "percentage": "34.48%", "elapsed_time": "1h 5m 17s", "remaining_time": "2h 4m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782368} {"loss": 0.29613194, "grad_norm": 2.53026676, "learning_rate": 7.806e-05, "token_acc": 0.89819588, "epoch": 3.4488189, "global_step/max_steps": "3066/8890", "percentage": "34.49%", "elapsed_time": "1h 5m 18s", "remaining_time": "2h 4m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782417} {"loss": 0.36053801, "grad_norm": 2.50951552, "learning_rate": 7.804e-05, "token_acc": 0.88739669, "epoch": 3.44994376, "global_step/max_steps": "3067/8890", "percentage": "34.50%", "elapsed_time": "1h 5m 19s", "remaining_time": "2h 4m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78244} {"loss": 0.32516822, "grad_norm": 2.43275523, "learning_rate": 7.803e-05, "token_acc": 0.89840881, "epoch": 3.45106862, "global_step/max_steps": "3068/8890", "percentage": "34.51%", "elapsed_time": "1h 5m 20s", "remaining_time": "2h 4m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782472} {"loss": 0.37064239, "grad_norm": 2.31786656, "learning_rate": 7.801e-05, "token_acc": 0.87301587, "epoch": 3.45219348, "global_step/max_steps": "3069/8890", "percentage": "34.52%", "elapsed_time": "1h 5m 22s", "remaining_time": "2h 3m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782494} {"loss": 0.37783176, "grad_norm": 2.16525626, "learning_rate": 7.8e-05, "token_acc": 0.89216512, "epoch": 3.45331834, "global_step/max_steps": "3070/8890", "percentage": "34.53%", "elapsed_time": "1h 5m 23s", "remaining_time": "2h 3m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782519} {"loss": 0.36118859, "grad_norm": 2.93664551, "learning_rate": 7.798e-05, "token_acc": 0.87808418, "epoch": 3.45444319, "global_step/max_steps": "3071/8890", "percentage": "34.54%", "elapsed_time": "1h 5m 24s", "remaining_time": "2h 3m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782558} {"loss": 0.2579768, "grad_norm": 2.26790929, "learning_rate": 7.796e-05, "token_acc": 0.90315315, "epoch": 3.45556805, "global_step/max_steps": "3072/8890", "percentage": "34.56%", "elapsed_time": "1h 5m 25s", "remaining_time": "2h 3m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78262} {"loss": 0.20484303, "grad_norm": 2.0737803, "learning_rate": 7.795e-05, "token_acc": 0.9283237, "epoch": 3.45669291, "global_step/max_steps": "3073/8890", "percentage": "34.57%", "elapsed_time": "1h 5m 26s", "remaining_time": "2h 3m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782648} {"loss": 0.36656117, "grad_norm": 1.98529649, "learning_rate": 7.793e-05, "token_acc": 0.87850467, "epoch": 3.45781777, "global_step/max_steps": "3074/8890", "percentage": "34.58%", "elapsed_time": "1h 5m 27s", "remaining_time": "2h 3m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782604} {"loss": 0.31658557, "grad_norm": 2.74964237, "learning_rate": 7.792e-05, "token_acc": 0.90494792, "epoch": 3.45894263, "global_step/max_steps": "3075/8890", "percentage": "34.59%", "elapsed_time": "1h 5m 28s", "remaining_time": "2h 3m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782649} {"loss": 0.30632985, "grad_norm": 2.30934525, "learning_rate": 7.79e-05, "token_acc": 0.89708738, "epoch": 3.46006749, "global_step/max_steps": "3076/8890", "percentage": "34.60%", "elapsed_time": "1h 5m 30s", "remaining_time": "2h 3m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782672} {"loss": 0.30532423, "grad_norm": 2.38930488, "learning_rate": 7.789e-05, "token_acc": 0.89818548, "epoch": 3.46119235, "global_step/max_steps": "3077/8890", "percentage": "34.61%", "elapsed_time": "1h 5m 31s", "remaining_time": "2h 3m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782718} {"loss": 0.38041392, "grad_norm": 2.71245313, "learning_rate": 7.787e-05, "token_acc": 0.89297659, "epoch": 3.46231721, "global_step/max_steps": "3078/8890", "percentage": "34.62%", "elapsed_time": "1h 5m 32s", "remaining_time": "2h 3m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782745} {"loss": 0.31851113, "grad_norm": 2.72399235, "learning_rate": 7.786e-05, "token_acc": 0.8960177, "epoch": 3.46344207, "global_step/max_steps": "3079/8890", "percentage": "34.63%", "elapsed_time": "1h 5m 33s", "remaining_time": "2h 3m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782789} {"loss": 0.31242269, "grad_norm": 2.04075503, "learning_rate": 7.784e-05, "token_acc": 0.90276539, "epoch": 3.46456693, "global_step/max_steps": "3080/8890", "percentage": "34.65%", "elapsed_time": "1h 5m 34s", "remaining_time": "2h 3m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782786} {"loss": 0.318809, "grad_norm": 2.88442564, "learning_rate": 7.783e-05, "token_acc": 0.90384615, "epoch": 3.46569179, "global_step/max_steps": "3081/8890", "percentage": "34.66%", "elapsed_time": "1h 5m 35s", "remaining_time": "2h 3m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782814} {"loss": 0.34285975, "grad_norm": 2.7249856, "learning_rate": 7.781e-05, "token_acc": 0.89108911, "epoch": 3.46681665, "global_step/max_steps": "3082/8890", "percentage": "34.67%", "elapsed_time": "1h 5m 36s", "remaining_time": "2h 3m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782862} {"loss": 0.2899493, "grad_norm": 2.20307255, "learning_rate": 7.779e-05, "token_acc": 0.90402844, "epoch": 3.46794151, "global_step/max_steps": "3083/8890", "percentage": "34.68%", "elapsed_time": "1h 5m 37s", "remaining_time": "2h 3m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782934} {"loss": 0.25987887, "grad_norm": 2.36335206, "learning_rate": 7.778e-05, "token_acc": 0.90481283, "epoch": 3.46906637, "global_step/max_steps": "3084/8890", "percentage": "34.69%", "elapsed_time": "1h 5m 39s", "remaining_time": "2h 3m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782832} {"loss": 0.35724396, "grad_norm": 2.67155123, "learning_rate": 7.776e-05, "token_acc": 0.88704663, "epoch": 3.47019123, "global_step/max_steps": "3085/8890", "percentage": "34.70%", "elapsed_time": "1h 5m 40s", "remaining_time": "2h 3m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782855} {"loss": 0.31253821, "grad_norm": 2.61877894, "learning_rate": 7.775e-05, "token_acc": 0.88865096, "epoch": 3.47131609, "global_step/max_steps": "3086/8890", "percentage": "34.71%", "elapsed_time": "1h 5m 41s", "remaining_time": "2h 3m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782862} {"loss": 0.37348962, "grad_norm": 2.45209193, "learning_rate": 7.773e-05, "token_acc": 0.88597376, "epoch": 3.47244094, "global_step/max_steps": "3087/8890", "percentage": "34.72%", "elapsed_time": "1h 5m 43s", "remaining_time": "2h 3m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782889} {"loss": 0.31557655, "grad_norm": 2.65716529, "learning_rate": 7.772e-05, "token_acc": 0.8946213, "epoch": 3.4735658, "global_step/max_steps": "3088/8890", "percentage": "34.74%", "elapsed_time": "1h 5m 44s", "remaining_time": "2h 3m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782922} {"loss": 0.31052113, "grad_norm": 2.51203847, "learning_rate": 7.77e-05, "token_acc": 0.88769415, "epoch": 3.47469066, "global_step/max_steps": "3089/8890", "percentage": "34.75%", "elapsed_time": "1h 5m 45s", "remaining_time": "2h 3m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782951} {"loss": 0.30667371, "grad_norm": 2.28893948, "learning_rate": 7.769e-05, "token_acc": 0.90585975, "epoch": 3.47581552, "global_step/max_steps": "3090/8890", "percentage": "34.76%", "elapsed_time": "1h 5m 46s", "remaining_time": "2h 3m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782979} {"loss": 0.34518829, "grad_norm": 2.76401854, "learning_rate": 7.767e-05, "token_acc": 0.89290495, "epoch": 3.47694038, "global_step/max_steps": "3091/8890", "percentage": "34.77%", "elapsed_time": "1h 5m 47s", "remaining_time": "2h 3m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78302} {"loss": 0.34660238, "grad_norm": 2.35937667, "learning_rate": 7.766e-05, "token_acc": 0.88888889, "epoch": 3.47806524, "global_step/max_steps": "3092/8890", "percentage": "34.78%", "elapsed_time": "1h 5m 48s", "remaining_time": "2h 3m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783007} {"loss": 0.33567321, "grad_norm": 2.30957723, "learning_rate": 7.764e-05, "token_acc": 0.88837209, "epoch": 3.4791901, "global_step/max_steps": "3093/8890", "percentage": "34.79%", "elapsed_time": "1h 5m 49s", "remaining_time": "2h 3m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783058} {"loss": 0.25810477, "grad_norm": 1.98148203, "learning_rate": 7.762e-05, "token_acc": 0.9049049, "epoch": 3.48031496, "global_step/max_steps": "3094/8890", "percentage": "34.80%", "elapsed_time": "1h 5m 51s", "remaining_time": "2h 3m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783086} {"loss": 0.25209844, "grad_norm": 2.36905193, "learning_rate": 7.761e-05, "token_acc": 0.92771084, "epoch": 3.48143982, "global_step/max_steps": "3095/8890", "percentage": "34.81%", "elapsed_time": "1h 5m 52s", "remaining_time": "2h 3m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783137} {"loss": 0.3397921, "grad_norm": 2.52731538, "learning_rate": 7.759e-05, "token_acc": 0.88276553, "epoch": 3.48256468, "global_step/max_steps": "3096/8890", "percentage": "34.83%", "elapsed_time": "1h 5m 53s", "remaining_time": "2h 3m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783158} {"loss": 0.3919515, "grad_norm": 2.5716548, "learning_rate": 7.758e-05, "token_acc": 0.85953177, "epoch": 3.48368954, "global_step/max_steps": "3097/8890", "percentage": "34.84%", "elapsed_time": "1h 5m 54s", "remaining_time": "2h 3m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783186} {"loss": 0.40886557, "grad_norm": 2.75023651, "learning_rate": 7.756e-05, "token_acc": 0.872, "epoch": 3.4848144, "global_step/max_steps": "3098/8890", "percentage": "34.85%", "elapsed_time": "1h 5m 55s", "remaining_time": "2h 3m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783211} {"loss": 0.32404888, "grad_norm": 2.92364717, "learning_rate": 7.755e-05, "token_acc": 0.87777778, "epoch": 3.48593926, "global_step/max_steps": "3099/8890", "percentage": "34.86%", "elapsed_time": "1h 5m 56s", "remaining_time": "2h 3m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783233} {"loss": 0.40102738, "grad_norm": 2.70394301, "learning_rate": 7.753e-05, "token_acc": 0.87363834, "epoch": 3.48706412, "global_step/max_steps": "3100/8890", "percentage": "34.87%", "elapsed_time": "1h 5m 57s", "remaining_time": "2h 3m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783254} {"loss": 0.30674654, "grad_norm": 2.21271133, "learning_rate": 7.752e-05, "token_acc": 0.905549, "epoch": 3.48818898, "global_step/max_steps": "3101/8890", "percentage": "34.88%", "elapsed_time": "1h 5m 58s", "remaining_time": "2h 3m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783314} {"loss": 0.45304382, "grad_norm": 2.85697794, "learning_rate": 7.75e-05, "token_acc": 0.8701623, "epoch": 3.48931384, "global_step/max_steps": "3102/8890", "percentage": "34.89%", "elapsed_time": "1h 5m 59s", "remaining_time": "2h 3m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783345} {"loss": 0.29898518, "grad_norm": 2.44068265, "learning_rate": 7.748e-05, "token_acc": 0.89555822, "epoch": 3.4904387, "global_step/max_steps": "3103/8890", "percentage": "34.90%", "elapsed_time": "1h 6m 1s", "remaining_time": "2h 3m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783366} {"loss": 0.2528733, "grad_norm": 2.46893454, "learning_rate": 7.747e-05, "token_acc": 0.91180285, "epoch": 3.49156355, "global_step/max_steps": "3104/8890", "percentage": "34.92%", "elapsed_time": "1h 6m 2s", "remaining_time": "2h 3m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783411} {"loss": 0.29271358, "grad_norm": 2.42640734, "learning_rate": 7.745e-05, "token_acc": 0.9054917, "epoch": 3.49268841, "global_step/max_steps": "3105/8890", "percentage": "34.93%", "elapsed_time": "1h 6m 3s", "remaining_time": "2h 3m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78344} {"loss": 0.36131787, "grad_norm": 2.56603479, "learning_rate": 7.744e-05, "token_acc": 0.88410256, "epoch": 3.49381327, "global_step/max_steps": "3106/8890", "percentage": "34.94%", "elapsed_time": "1h 6m 4s", "remaining_time": "2h 3m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78344} {"loss": 0.38830855, "grad_norm": 2.53815126, "learning_rate": 7.742e-05, "token_acc": 0.87596154, "epoch": 3.49493813, "global_step/max_steps": "3107/8890", "percentage": "34.95%", "elapsed_time": "1h 6m 5s", "remaining_time": "2h 3m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783465} {"loss": 0.38278776, "grad_norm": 2.3247242, "learning_rate": 7.741e-05, "token_acc": 0.88045234, "epoch": 3.49606299, "global_step/max_steps": "3108/8890", "percentage": "34.96%", "elapsed_time": "1h 6m 6s", "remaining_time": "2h 2m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783486} {"loss": 0.36449048, "grad_norm": 2.19559336, "learning_rate": 7.739e-05, "token_acc": 0.88442211, "epoch": 3.49718785, "global_step/max_steps": "3109/8890", "percentage": "34.97%", "elapsed_time": "1h 6m 8s", "remaining_time": "2h 2m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783512} {"loss": 0.28285885, "grad_norm": 2.41062784, "learning_rate": 7.738e-05, "token_acc": 0.90483384, "epoch": 3.49831271, "global_step/max_steps": "3110/8890", "percentage": "34.98%", "elapsed_time": "1h 6m 9s", "remaining_time": "2h 2m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78355} {"loss": 0.27734825, "grad_norm": 2.25916886, "learning_rate": 7.736e-05, "token_acc": 0.90714286, "epoch": 3.49943757, "global_step/max_steps": "3111/8890", "percentage": "34.99%", "elapsed_time": "1h 6m 10s", "remaining_time": "2h 2m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783578} {"loss": 0.28155327, "grad_norm": 2.24970841, "learning_rate": 7.734e-05, "token_acc": 0.8989011, "epoch": 3.50056243, "global_step/max_steps": "3112/8890", "percentage": "35.01%", "elapsed_time": "1h 6m 11s", "remaining_time": "2h 2m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783608} {"loss": 0.31926876, "grad_norm": 2.34187794, "learning_rate": 7.733e-05, "token_acc": 0.89281642, "epoch": 3.50168729, "global_step/max_steps": "3113/8890", "percentage": "35.02%", "elapsed_time": "1h 6m 12s", "remaining_time": "2h 2m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783629} {"loss": 0.48456413, "grad_norm": 2.50104499, "learning_rate": 7.731e-05, "token_acc": 0.86166365, "epoch": 3.50281215, "global_step/max_steps": "3114/8890", "percentage": "35.03%", "elapsed_time": "1h 6m 13s", "remaining_time": "2h 2m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783654} {"loss": 0.4218713, "grad_norm": 2.61863685, "learning_rate": 7.73e-05, "token_acc": 0.87488241, "epoch": 3.50393701, "global_step/max_steps": "3115/8890", "percentage": "35.04%", "elapsed_time": "1h 6m 14s", "remaining_time": "2h 2m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783696} {"loss": 0.38309866, "grad_norm": 2.33658051, "learning_rate": 7.728e-05, "token_acc": 0.88280582, "epoch": 3.50506187, "global_step/max_steps": "3116/8890", "percentage": "35.05%", "elapsed_time": "1h 6m 15s", "remaining_time": "2h 2m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783727} {"loss": 0.23530155, "grad_norm": 1.89522362, "learning_rate": 7.727e-05, "token_acc": 0.92300642, "epoch": 3.50618673, "global_step/max_steps": "3117/8890", "percentage": "35.06%", "elapsed_time": "1h 6m 17s", "remaining_time": "2h 2m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783727} {"loss": 0.27772585, "grad_norm": 2.09213996, "learning_rate": 7.725e-05, "token_acc": 0.89296046, "epoch": 3.50731159, "global_step/max_steps": "3118/8890", "percentage": "35.07%", "elapsed_time": "1h 6m 18s", "remaining_time": "2h 2m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783747} {"loss": 0.39758217, "grad_norm": 2.75197959, "learning_rate": 7.724e-05, "token_acc": 0.89312977, "epoch": 3.50843645, "global_step/max_steps": "3119/8890", "percentage": "35.08%", "elapsed_time": "1h 6m 19s", "remaining_time": "2h 2m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783777} {"loss": 0.36493006, "grad_norm": 2.90650868, "learning_rate": 7.722e-05, "token_acc": 0.88163885, "epoch": 3.5095613, "global_step/max_steps": "3120/8890", "percentage": "35.10%", "elapsed_time": "1h 6m 20s", "remaining_time": "2h 2m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783814} {"loss": 0.29746878, "grad_norm": 2.61744452, "learning_rate": 7.72e-05, "token_acc": 0.90615836, "epoch": 3.51068616, "global_step/max_steps": "3121/8890", "percentage": "35.11%", "elapsed_time": "1h 6m 21s", "remaining_time": "2h 2m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783839} {"loss": 0.40779212, "grad_norm": 2.71220303, "learning_rate": 7.719e-05, "token_acc": 0.88421053, "epoch": 3.51181102, "global_step/max_steps": "3122/8890", "percentage": "35.12%", "elapsed_time": "1h 6m 22s", "remaining_time": "2h 2m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783865} {"loss": 0.37495416, "grad_norm": 2.73251534, "learning_rate": 7.717e-05, "token_acc": 0.88108108, "epoch": 3.51293588, "global_step/max_steps": "3123/8890", "percentage": "35.13%", "elapsed_time": "1h 6m 23s", "remaining_time": "2h 2m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783931} {"loss": 0.39475965, "grad_norm": 2.72961926, "learning_rate": 7.716e-05, "token_acc": 0.86847826, "epoch": 3.51406074, "global_step/max_steps": "3124/8890", "percentage": "35.14%", "elapsed_time": "1h 6m 24s", "remaining_time": "2h 2m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783982} {"loss": 0.31257284, "grad_norm": 2.4696486, "learning_rate": 7.714e-05, "token_acc": 0.8997955, "epoch": 3.5151856, "global_step/max_steps": "3125/8890", "percentage": "35.15%", "elapsed_time": "1h 6m 25s", "remaining_time": "2h 2m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784009} {"loss": 0.33616132, "grad_norm": 2.69666171, "learning_rate": 7.713e-05, "token_acc": 0.87270341, "epoch": 3.51631046, "global_step/max_steps": "3126/8890", "percentage": "35.16%", "elapsed_time": "1h 6m 27s", "remaining_time": "2h 2m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784026} {"loss": 0.38429296, "grad_norm": 2.92993069, "learning_rate": 7.711e-05, "token_acc": 0.87019868, "epoch": 3.51743532, "global_step/max_steps": "3127/8890", "percentage": "35.17%", "elapsed_time": "1h 6m 28s", "remaining_time": "2h 2m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784036} {"loss": 0.35263306, "grad_norm": 2.25236964, "learning_rate": 7.71e-05, "token_acc": 0.88245931, "epoch": 3.51856018, "global_step/max_steps": "3128/8890", "percentage": "35.19%", "elapsed_time": "1h 6m 29s", "remaining_time": "2h 2m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78406} {"loss": 0.46244937, "grad_norm": 3.20543933, "learning_rate": 7.708e-05, "token_acc": 0.86675824, "epoch": 3.51968504, "global_step/max_steps": "3129/8890", "percentage": "35.20%", "elapsed_time": "1h 6m 30s", "remaining_time": "2h 2m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784084} {"loss": 0.34663945, "grad_norm": 2.65486503, "learning_rate": 7.706e-05, "token_acc": 0.87436548, "epoch": 3.5208099, "global_step/max_steps": "3130/8890", "percentage": "35.21%", "elapsed_time": "1h 6m 31s", "remaining_time": "2h 2m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784108} {"loss": 0.31543601, "grad_norm": 2.61361909, "learning_rate": 7.705e-05, "token_acc": 0.89387145, "epoch": 3.52193476, "global_step/max_steps": "3131/8890", "percentage": "35.22%", "elapsed_time": "1h 6m 32s", "remaining_time": "2h 2m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784136} {"loss": 0.41143548, "grad_norm": 2.39327979, "learning_rate": 7.703e-05, "token_acc": 0.86754297, "epoch": 3.52305962, "global_step/max_steps": "3132/8890", "percentage": "35.23%", "elapsed_time": "1h 6m 34s", "remaining_time": "2h 2m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784155} {"loss": 0.34286624, "grad_norm": 2.11421704, "learning_rate": 7.702e-05, "token_acc": 0.89426523, "epoch": 3.52418448, "global_step/max_steps": "3133/8890", "percentage": "35.24%", "elapsed_time": "1h 6m 35s", "remaining_time": "2h 2m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784155} {"loss": 0.40355989, "grad_norm": 2.58002424, "learning_rate": 7.7e-05, "token_acc": 0.88187373, "epoch": 3.52530934, "global_step/max_steps": "3134/8890", "percentage": "35.25%", "elapsed_time": "1h 6m 36s", "remaining_time": "2h 2m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784185} {"loss": 0.36738223, "grad_norm": 2.2422061, "learning_rate": 7.699e-05, "token_acc": 0.87945205, "epoch": 3.5264342, "global_step/max_steps": "3135/8890", "percentage": "35.26%", "elapsed_time": "1h 6m 37s", "remaining_time": "2h 2m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784212} {"loss": 0.34380841, "grad_norm": 2.84199953, "learning_rate": 7.697e-05, "token_acc": 0.88522427, "epoch": 3.52755906, "global_step/max_steps": "3136/8890", "percentage": "35.28%", "elapsed_time": "1h 6m 38s", "remaining_time": "2h 2m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78424} {"loss": 0.37085167, "grad_norm": 2.88660955, "learning_rate": 7.695e-05, "token_acc": 0.87388393, "epoch": 3.52868391, "global_step/max_steps": "3137/8890", "percentage": "35.29%", "elapsed_time": "1h 6m 39s", "remaining_time": "2h 2m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784267} {"loss": 0.38639694, "grad_norm": 2.65018821, "learning_rate": 7.694e-05, "token_acc": 0.87903226, "epoch": 3.52980877, "global_step/max_steps": "3138/8890", "percentage": "35.30%", "elapsed_time": "1h 6m 41s", "remaining_time": "2h 2m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784297} {"loss": 0.30890685, "grad_norm": 2.37290549, "learning_rate": 7.692e-05, "token_acc": 0.90735146, "epoch": 3.53093363, "global_step/max_steps": "3139/8890", "percentage": "35.31%", "elapsed_time": "1h 6m 42s", "remaining_time": "2h 2m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784346} {"loss": 0.3445518, "grad_norm": 2.59768105, "learning_rate": 7.691e-05, "token_acc": 0.89185751, "epoch": 3.53205849, "global_step/max_steps": "3140/8890", "percentage": "35.32%", "elapsed_time": "1h 6m 42s", "remaining_time": "2h 2m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784418} {"loss": 0.39955097, "grad_norm": 3.12786984, "learning_rate": 7.689e-05, "token_acc": 0.87284768, "epoch": 3.53318335, "global_step/max_steps": "3141/8890", "percentage": "35.33%", "elapsed_time": "1h 6m 44s", "remaining_time": "2h 2m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784449} {"loss": 0.32930589, "grad_norm": 2.52963328, "learning_rate": 7.688e-05, "token_acc": 0.9009009, "epoch": 3.53430821, "global_step/max_steps": "3142/8890", "percentage": "35.34%", "elapsed_time": "1h 6m 45s", "remaining_time": "2h 2m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784491} {"loss": 0.29992321, "grad_norm": 2.14062476, "learning_rate": 7.686e-05, "token_acc": 0.90115163, "epoch": 3.53543307, "global_step/max_steps": "3143/8890", "percentage": "35.35%", "elapsed_time": "1h 6m 46s", "remaining_time": "2h 2m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78451} {"loss": 0.29351234, "grad_norm": 2.51264668, "learning_rate": 7.684e-05, "token_acc": 0.9009901, "epoch": 3.53655793, "global_step/max_steps": "3144/8890", "percentage": "35.37%", "elapsed_time": "1h 6m 48s", "remaining_time": "2h 2m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784421} {"loss": 0.44468424, "grad_norm": 3.12960386, "learning_rate": 7.683e-05, "token_acc": 0.85846868, "epoch": 3.53768279, "global_step/max_steps": "3145/8890", "percentage": "35.38%", "elapsed_time": "1h 6m 49s", "remaining_time": "2h 2m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784459} {"loss": 0.35393041, "grad_norm": 2.60159206, "learning_rate": 7.681e-05, "token_acc": 0.87704131, "epoch": 3.53880765, "global_step/max_steps": "3146/8890", "percentage": "35.39%", "elapsed_time": "1h 6m 50s", "remaining_time": "2h 2m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784487} {"loss": 0.31548485, "grad_norm": 2.32519937, "learning_rate": 7.68e-05, "token_acc": 0.90049751, "epoch": 3.53993251, "global_step/max_steps": "3147/8890", "percentage": "35.40%", "elapsed_time": "1h 6m 51s", "remaining_time": "2h 2m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784488} {"loss": 0.41558251, "grad_norm": 3.18974733, "learning_rate": 7.678e-05, "token_acc": 0.85160576, "epoch": 3.54105737, "global_step/max_steps": "3148/8890", "percentage": "35.41%", "elapsed_time": "1h 6m 52s", "remaining_time": "2h 1m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784527} {"loss": 0.3376466, "grad_norm": 2.47604179, "learning_rate": 7.677e-05, "token_acc": 0.89195678, "epoch": 3.54218223, "global_step/max_steps": "3149/8890", "percentage": "35.42%", "elapsed_time": "1h 6m 53s", "remaining_time": "2h 1m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784548} {"loss": 0.32459044, "grad_norm": 2.58550763, "learning_rate": 7.675e-05, "token_acc": 0.89632546, "epoch": 3.54330709, "global_step/max_steps": "3150/8890", "percentage": "35.43%", "elapsed_time": "1h 6m 54s", "remaining_time": "2h 1m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7846} {"loss": 0.33199021, "grad_norm": 2.47172737, "learning_rate": 7.673e-05, "token_acc": 0.89572471, "epoch": 3.54443195, "global_step/max_steps": "3151/8890", "percentage": "35.44%", "elapsed_time": "1h 6m 55s", "remaining_time": "2h 1m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784644} {"loss": 0.34009123, "grad_norm": 2.64083385, "learning_rate": 7.672e-05, "token_acc": 0.88347206, "epoch": 3.54555681, "global_step/max_steps": "3152/8890", "percentage": "35.46%", "elapsed_time": "1h 6m 56s", "remaining_time": "2h 1m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784669} {"loss": 0.35705745, "grad_norm": 3.07255173, "learning_rate": 7.67e-05, "token_acc": 0.87890625, "epoch": 3.54668166, "global_step/max_steps": "3153/8890", "percentage": "35.47%", "elapsed_time": "1h 6m 58s", "remaining_time": "2h 1m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784602} {"loss": 0.31768838, "grad_norm": 2.86651349, "learning_rate": 7.669e-05, "token_acc": 0.89276139, "epoch": 3.54780652, "global_step/max_steps": "3154/8890", "percentage": "35.48%", "elapsed_time": "1h 6m 59s", "remaining_time": "2h 1m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784669} {"loss": 0.36769313, "grad_norm": 2.62007356, "learning_rate": 7.667e-05, "token_acc": 0.88461538, "epoch": 3.54893138, "global_step/max_steps": "3155/8890", "percentage": "35.49%", "elapsed_time": "1h 7m 0s", "remaining_time": "2h 1m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784714} {"loss": 0.32627863, "grad_norm": 2.64446902, "learning_rate": 7.666e-05, "token_acc": 0.89336801, "epoch": 3.55005624, "global_step/max_steps": "3156/8890", "percentage": "35.50%", "elapsed_time": "1h 7m 1s", "remaining_time": "2h 1m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784754} {"loss": 0.42124444, "grad_norm": 2.55825615, "learning_rate": 7.664e-05, "token_acc": 0.87019231, "epoch": 3.5511811, "global_step/max_steps": "3157/8890", "percentage": "35.51%", "elapsed_time": "1h 7m 2s", "remaining_time": "2h 1m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784794} {"loss": 0.27742678, "grad_norm": 2.51112866, "learning_rate": 7.662e-05, "token_acc": 0.89473684, "epoch": 3.55230596, "global_step/max_steps": "3158/8890", "percentage": "35.52%", "elapsed_time": "1h 7m 3s", "remaining_time": "2h 1m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784819} {"loss": 0.30376741, "grad_norm": 2.4639039, "learning_rate": 7.661e-05, "token_acc": 0.90599455, "epoch": 3.55343082, "global_step/max_steps": "3159/8890", "percentage": "35.53%", "elapsed_time": "1h 7m 4s", "remaining_time": "2h 1m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784856} {"loss": 0.33703572, "grad_norm": 2.54499459, "learning_rate": 7.659e-05, "token_acc": 0.8852459, "epoch": 3.55455568, "global_step/max_steps": "3160/8890", "percentage": "35.55%", "elapsed_time": "1h 7m 5s", "remaining_time": "2h 1m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78492} {"loss": 0.42874616, "grad_norm": 2.64886045, "learning_rate": 7.658e-05, "token_acc": 0.86947585, "epoch": 3.55568054, "global_step/max_steps": "3161/8890", "percentage": "35.56%", "elapsed_time": "1h 7m 6s", "remaining_time": "2h 1m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784969} {"loss": 0.30005074, "grad_norm": 3.04766226, "learning_rate": 7.656e-05, "token_acc": 0.89244186, "epoch": 3.5568054, "global_step/max_steps": "3162/8890", "percentage": "35.57%", "elapsed_time": "1h 7m 7s", "remaining_time": "2h 1m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785014} {"loss": 0.24526331, "grad_norm": 2.2679553, "learning_rate": 7.655e-05, "token_acc": 0.91069182, "epoch": 3.55793026, "global_step/max_steps": "3163/8890", "percentage": "35.58%", "elapsed_time": "1h 7m 8s", "remaining_time": "2h 1m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785063} {"loss": 0.38516819, "grad_norm": 2.4790659, "learning_rate": 7.653e-05, "token_acc": 0.87956565, "epoch": 3.55905512, "global_step/max_steps": "3164/8890", "percentage": "35.59%", "elapsed_time": "1h 7m 10s", "remaining_time": "2h 1m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785086} {"loss": 0.25736254, "grad_norm": 2.4340117, "learning_rate": 7.651e-05, "token_acc": 0.91176471, "epoch": 3.56017998, "global_step/max_steps": "3165/8890", "percentage": "35.60%", "elapsed_time": "1h 7m 11s", "remaining_time": "2h 1m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785109} {"loss": 0.33107817, "grad_norm": 2.97059846, "learning_rate": 7.65e-05, "token_acc": 0.88839779, "epoch": 3.56130484, "global_step/max_steps": "3166/8890", "percentage": "35.61%", "elapsed_time": "1h 7m 12s", "remaining_time": "2h 1m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785147} {"loss": 0.3913081, "grad_norm": 3.17849946, "learning_rate": 7.648e-05, "token_acc": 0.8712522, "epoch": 3.5624297, "global_step/max_steps": "3167/8890", "percentage": "35.62%", "elapsed_time": "1h 7m 13s", "remaining_time": "2h 1m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785176} {"loss": 0.34173739, "grad_norm": 2.85287714, "learning_rate": 7.647e-05, "token_acc": 0.89862328, "epoch": 3.56355456, "global_step/max_steps": "3168/8890", "percentage": "35.64%", "elapsed_time": "1h 7m 14s", "remaining_time": "2h 1m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785203} {"loss": 0.32161227, "grad_norm": 2.40844011, "learning_rate": 7.645e-05, "token_acc": 0.896, "epoch": 3.56467942, "global_step/max_steps": "3169/8890", "percentage": "35.65%", "elapsed_time": "1h 7m 15s", "remaining_time": "2h 1m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785231} {"loss": 0.30451277, "grad_norm": 2.53339863, "learning_rate": 7.644e-05, "token_acc": 0.8971831, "epoch": 3.56580427, "global_step/max_steps": "3170/8890", "percentage": "35.66%", "elapsed_time": "1h 7m 16s", "remaining_time": "2h 1m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785261} {"loss": 0.36545461, "grad_norm": 2.33650494, "learning_rate": 7.642e-05, "token_acc": 0.88224956, "epoch": 3.56692913, "global_step/max_steps": "3171/8890", "percentage": "35.67%", "elapsed_time": "1h 7m 18s", "remaining_time": "2h 1m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785286} {"loss": 0.30680829, "grad_norm": 3.06323433, "learning_rate": 7.64e-05, "token_acc": 0.90582192, "epoch": 3.56805399, "global_step/max_steps": "3172/8890", "percentage": "35.68%", "elapsed_time": "1h 7m 19s", "remaining_time": "2h 1m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78531} {"loss": 0.37672901, "grad_norm": 2.90542173, "learning_rate": 7.639e-05, "token_acc": 0.89492326, "epoch": 3.56917885, "global_step/max_steps": "3173/8890", "percentage": "35.69%", "elapsed_time": "1h 7m 19s", "remaining_time": "2h 1m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785403} {"loss": 0.42712379, "grad_norm": 3.09635782, "learning_rate": 7.637e-05, "token_acc": 0.85888889, "epoch": 3.57030371, "global_step/max_steps": "3174/8890", "percentage": "35.70%", "elapsed_time": "1h 7m 21s", "remaining_time": "2h 1m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785421} {"loss": 0.23092815, "grad_norm": 2.39288568, "learning_rate": 7.636e-05, "token_acc": 0.91262136, "epoch": 3.57142857, "global_step/max_steps": "3175/8890", "percentage": "35.71%", "elapsed_time": "1h 7m 22s", "remaining_time": "2h 1m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785468} {"loss": 0.33063895, "grad_norm": 2.67772698, "learning_rate": 7.634e-05, "token_acc": 0.89655172, "epoch": 3.57255343, "global_step/max_steps": "3176/8890", "percentage": "35.73%", "elapsed_time": "1h 7m 23s", "remaining_time": "2h 1m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785494} {"loss": 0.33573651, "grad_norm": 2.77976727, "learning_rate": 7.632e-05, "token_acc": 0.87587822, "epoch": 3.57367829, "global_step/max_steps": "3177/8890", "percentage": "35.74%", "elapsed_time": "1h 7m 24s", "remaining_time": "2h 1m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785522} {"loss": 0.25896162, "grad_norm": 2.0996449, "learning_rate": 7.631e-05, "token_acc": 0.9141791, "epoch": 3.57480315, "global_step/max_steps": "3178/8890", "percentage": "35.75%", "elapsed_time": "1h 7m 25s", "remaining_time": "2h 1m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785567} {"loss": 0.38199902, "grad_norm": 2.60374451, "learning_rate": 7.629e-05, "token_acc": 0.88314176, "epoch": 3.57592801, "global_step/max_steps": "3179/8890", "percentage": "35.76%", "elapsed_time": "1h 7m 26s", "remaining_time": "2h 1m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78559} {"loss": 0.3753562, "grad_norm": 2.2919302, "learning_rate": 7.628e-05, "token_acc": 0.87806943, "epoch": 3.57705287, "global_step/max_steps": "3180/8890", "percentage": "35.77%", "elapsed_time": "1h 7m 27s", "remaining_time": "2h 1m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785578} {"loss": 0.33880639, "grad_norm": 2.86565137, "learning_rate": 7.626e-05, "token_acc": 0.88771466, "epoch": 3.57817773, "global_step/max_steps": "3181/8890", "percentage": "35.78%", "elapsed_time": "1h 7m 29s", "remaining_time": "2h 1m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78562} {"loss": 0.3826406, "grad_norm": 2.75794125, "learning_rate": 7.625e-05, "token_acc": 0.87342908, "epoch": 3.57930259, "global_step/max_steps": "3182/8890", "percentage": "35.79%", "elapsed_time": "1h 7m 30s", "remaining_time": "2h 1m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78564} {"loss": 0.34703219, "grad_norm": 2.45639658, "learning_rate": 7.623e-05, "token_acc": 0.8805668, "epoch": 3.58042745, "global_step/max_steps": "3183/8890", "percentage": "35.80%", "elapsed_time": "1h 7m 31s", "remaining_time": "2h 1m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785665} {"loss": 0.41229615, "grad_norm": 3.23823285, "learning_rate": 7.621e-05, "token_acc": 0.86370158, "epoch": 3.58155231, "global_step/max_steps": "3184/8890", "percentage": "35.82%", "elapsed_time": "1h 7m 32s", "remaining_time": "2h 1m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785767} {"loss": 0.43497574, "grad_norm": 2.89833021, "learning_rate": 7.62e-05, "token_acc": 0.86531585, "epoch": 3.58267717, "global_step/max_steps": "3185/8890", "percentage": "35.83%", "elapsed_time": "1h 7m 33s", "remaining_time": "2h 1m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785794} {"loss": 0.37154502, "grad_norm": 2.65382338, "learning_rate": 7.618e-05, "token_acc": 0.89911308, "epoch": 3.58380202, "global_step/max_steps": "3186/8890", "percentage": "35.84%", "elapsed_time": "1h 7m 34s", "remaining_time": "2h 0m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785816} {"loss": 0.24312592, "grad_norm": 2.62107253, "learning_rate": 7.617e-05, "token_acc": 0.90142672, "epoch": 3.58492688, "global_step/max_steps": "3187/8890", "percentage": "35.85%", "elapsed_time": "1h 7m 35s", "remaining_time": "2h 0m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785845} {"loss": 0.35566324, "grad_norm": 2.02620196, "learning_rate": 7.615e-05, "token_acc": 0.88084795, "epoch": 3.58605174, "global_step/max_steps": "3188/8890", "percentage": "35.86%", "elapsed_time": "1h 7m 36s", "remaining_time": "2h 0m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785833} {"loss": 0.28765178, "grad_norm": 2.18601346, "learning_rate": 7.613e-05, "token_acc": 0.90254237, "epoch": 3.5871766, "global_step/max_steps": "3189/8890", "percentage": "35.87%", "elapsed_time": "1h 7m 38s", "remaining_time": "2h 0m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785853} {"loss": 0.35958654, "grad_norm": 2.68933368, "learning_rate": 7.612e-05, "token_acc": 0.88927739, "epoch": 3.58830146, "global_step/max_steps": "3190/8890", "percentage": "35.88%", "elapsed_time": "1h 7m 39s", "remaining_time": "2h 0m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78588} {"loss": 0.38264304, "grad_norm": 2.28960657, "learning_rate": 7.61e-05, "token_acc": 0.88250883, "epoch": 3.58942632, "global_step/max_steps": "3191/8890", "percentage": "35.89%", "elapsed_time": "1h 7m 40s", "remaining_time": "2h 0m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785924} {"loss": 0.34514615, "grad_norm": 2.50981474, "learning_rate": 7.609e-05, "token_acc": 0.89230769, "epoch": 3.59055118, "global_step/max_steps": "3192/8890", "percentage": "35.91%", "elapsed_time": "1h 7m 41s", "remaining_time": "2h 0m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785952} {"loss": 0.30784106, "grad_norm": 2.69910288, "learning_rate": 7.607e-05, "token_acc": 0.89722222, "epoch": 3.59167604, "global_step/max_steps": "3193/8890", "percentage": "35.92%", "elapsed_time": "1h 7m 42s", "remaining_time": "2h 0m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785992} {"loss": 0.33217639, "grad_norm": 2.70115376, "learning_rate": 7.606e-05, "token_acc": 0.88834356, "epoch": 3.5928009, "global_step/max_steps": "3194/8890", "percentage": "35.93%", "elapsed_time": "1h 7m 43s", "remaining_time": "2h 0m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786016} {"loss": 0.45353031, "grad_norm": 3.56220102, "learning_rate": 7.604e-05, "token_acc": 0.85714286, "epoch": 3.59392576, "global_step/max_steps": "3195/8890", "percentage": "35.94%", "elapsed_time": "1h 7m 44s", "remaining_time": "2h 0m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786072} {"loss": 0.3323583, "grad_norm": 2.13984084, "learning_rate": 7.602e-05, "token_acc": 0.89209856, "epoch": 3.59505062, "global_step/max_steps": "3196/8890", "percentage": "35.95%", "elapsed_time": "1h 7m 45s", "remaining_time": "2h 0m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786096} {"loss": 0.30668554, "grad_norm": 2.68318176, "learning_rate": 7.601e-05, "token_acc": 0.89855072, "epoch": 3.59617548, "global_step/max_steps": "3197/8890", "percentage": "35.96%", "elapsed_time": "1h 7m 46s", "remaining_time": "2h 0m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786125} {"loss": 0.42405844, "grad_norm": 2.31490564, "learning_rate": 7.599e-05, "token_acc": 0.87122736, "epoch": 3.59730034, "global_step/max_steps": "3198/8890", "percentage": "35.97%", "elapsed_time": "1h 7m 48s", "remaining_time": "2h 0m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786119} {"loss": 0.36579102, "grad_norm": 2.29056406, "learning_rate": 7.598e-05, "token_acc": 0.89643268, "epoch": 3.5984252, "global_step/max_steps": "3199/8890", "percentage": "35.98%", "elapsed_time": "1h 7m 49s", "remaining_time": "2h 0m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786151} {"loss": 0.41123223, "grad_norm": 2.44163966, "learning_rate": 7.596e-05, "token_acc": 0.87750792, "epoch": 3.59955006, "global_step/max_steps": "3200/8890", "percentage": "36.00%", "elapsed_time": "1h 7m 50s", "remaining_time": "2h 0m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786146} {"loss": 0.38386357, "grad_norm": 2.57798219, "learning_rate": 7.594e-05, "token_acc": 0.88424437, "epoch": 3.60067492, "global_step/max_steps": "3201/8890", "percentage": "36.01%", "elapsed_time": "1h 7m 51s", "remaining_time": "2h 0m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786172} {"loss": 0.25862157, "grad_norm": 2.94526029, "learning_rate": 7.593e-05, "token_acc": 0.90697674, "epoch": 3.60179978, "global_step/max_steps": "3202/8890", "percentage": "36.02%", "elapsed_time": "1h 7m 52s", "remaining_time": "2h 0m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786219} {"loss": 0.47876698, "grad_norm": 2.99848199, "learning_rate": 7.591e-05, "token_acc": 0.85011442, "epoch": 3.60292463, "global_step/max_steps": "3203/8890", "percentage": "36.03%", "elapsed_time": "1h 7m 53s", "remaining_time": "2h 0m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786282} {"loss": 0.31346452, "grad_norm": 2.59226608, "learning_rate": 7.59e-05, "token_acc": 0.90407359, "epoch": 3.60404949, "global_step/max_steps": "3204/8890", "percentage": "36.04%", "elapsed_time": "1h 7m 54s", "remaining_time": "2h 0m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786306} {"loss": 0.31157678, "grad_norm": 2.69718337, "learning_rate": 7.588e-05, "token_acc": 0.89002933, "epoch": 3.60517435, "global_step/max_steps": "3205/8890", "percentage": "36.05%", "elapsed_time": "1h 7m 55s", "remaining_time": "2h 0m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786396} {"loss": 0.2586576, "grad_norm": 2.10281587, "learning_rate": 7.586e-05, "token_acc": 0.91732283, "epoch": 3.60629921, "global_step/max_steps": "3206/8890", "percentage": "36.06%", "elapsed_time": "1h 7m 56s", "remaining_time": "2h 0m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786417} {"loss": 0.38643751, "grad_norm": 2.27707863, "learning_rate": 7.585e-05, "token_acc": 0.86907021, "epoch": 3.60742407, "global_step/max_steps": "3207/8890", "percentage": "36.07%", "elapsed_time": "1h 7m 57s", "remaining_time": "2h 0m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786417} {"loss": 0.381118, "grad_norm": 2.5266149, "learning_rate": 7.583e-05, "token_acc": 0.88349515, "epoch": 3.60854893, "global_step/max_steps": "3208/8890", "percentage": "36.09%", "elapsed_time": "1h 7m 59s", "remaining_time": "2h 0m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786436} {"loss": 0.38380051, "grad_norm": 2.70685697, "learning_rate": 7.582e-05, "token_acc": 0.87553191, "epoch": 3.60967379, "global_step/max_steps": "3209/8890", "percentage": "36.10%", "elapsed_time": "1h 8m 0s", "remaining_time": "2h 0m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786456} {"loss": 0.36929277, "grad_norm": 2.43845773, "learning_rate": 7.58e-05, "token_acc": 0.89613971, "epoch": 3.61079865, "global_step/max_steps": "3210/8890", "percentage": "36.11%", "elapsed_time": "1h 8m 1s", "remaining_time": "2h 0m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786477} {"loss": 0.40403977, "grad_norm": 3.24241185, "learning_rate": 7.579e-05, "token_acc": 0.85359116, "epoch": 3.61192351, "global_step/max_steps": "3211/8890", "percentage": "36.12%", "elapsed_time": "1h 8m 2s", "remaining_time": "2h 0m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786515} {"loss": 0.3479577, "grad_norm": 2.57983875, "learning_rate": 7.577e-05, "token_acc": 0.88425926, "epoch": 3.61304837, "global_step/max_steps": "3212/8890", "percentage": "36.13%", "elapsed_time": "1h 8m 3s", "remaining_time": "2h 0m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786535} {"loss": 0.44314256, "grad_norm": 2.53633142, "learning_rate": 7.575e-05, "token_acc": 0.84535187, "epoch": 3.61417323, "global_step/max_steps": "3213/8890", "percentage": "36.14%", "elapsed_time": "1h 8m 5s", "remaining_time": "2h 0m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786528} {"loss": 0.32284552, "grad_norm": 2.56090117, "learning_rate": 7.574e-05, "token_acc": 0.89171975, "epoch": 3.61529809, "global_step/max_steps": "3214/8890", "percentage": "36.15%", "elapsed_time": "1h 8m 6s", "remaining_time": "2h 0m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786557} {"loss": 0.28959551, "grad_norm": 2.6717813, "learning_rate": 7.572e-05, "token_acc": 0.90344828, "epoch": 3.61642295, "global_step/max_steps": "3215/8890", "percentage": "36.16%", "elapsed_time": "1h 8m 7s", "remaining_time": "2h 0m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786578} {"loss": 0.30675298, "grad_norm": 2.12098312, "learning_rate": 7.571e-05, "token_acc": 0.8974122, "epoch": 3.61754781, "global_step/max_steps": "3216/8890", "percentage": "36.18%", "elapsed_time": "1h 8m 8s", "remaining_time": "2h 0m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786596} {"loss": 0.3227393, "grad_norm": 2.91125393, "learning_rate": 7.569e-05, "token_acc": 0.88473054, "epoch": 3.61867267, "global_step/max_steps": "3217/8890", "percentage": "36.19%", "elapsed_time": "1h 8m 9s", "remaining_time": "2h 0m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786616} {"loss": 0.25956082, "grad_norm": 2.46701384, "learning_rate": 7.567e-05, "token_acc": 0.91408115, "epoch": 3.61979753, "global_step/max_steps": "3218/8890", "percentage": "36.20%", "elapsed_time": "1h 8m 10s", "remaining_time": "2h 0m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786643} {"loss": 0.27098912, "grad_norm": 2.94407439, "learning_rate": 7.566e-05, "token_acc": 0.92203898, "epoch": 3.62092238, "global_step/max_steps": "3219/8890", "percentage": "36.21%", "elapsed_time": "1h 8m 11s", "remaining_time": "2h 0m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786702} {"loss": 0.35093218, "grad_norm": 2.61336541, "learning_rate": 7.564e-05, "token_acc": 0.89282103, "epoch": 3.62204724, "global_step/max_steps": "3220/8890", "percentage": "36.22%", "elapsed_time": "1h 8m 12s", "remaining_time": "2h 0m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786722} {"loss": 0.53281546, "grad_norm": 3.10527229, "learning_rate": 7.563e-05, "token_acc": 0.84249084, "epoch": 3.6231721, "global_step/max_steps": "3221/8890", "percentage": "36.23%", "elapsed_time": "1h 8m 13s", "remaining_time": "2h 0m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786762} {"loss": 0.35816792, "grad_norm": 2.49301338, "learning_rate": 7.561e-05, "token_acc": 0.88438134, "epoch": 3.62429696, "global_step/max_steps": "3222/8890", "percentage": "36.24%", "elapsed_time": "1h 8m 15s", "remaining_time": "2h 0m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786787} {"loss": 0.41936016, "grad_norm": 2.48866105, "learning_rate": 7.559e-05, "token_acc": 0.86925158, "epoch": 3.62542182, "global_step/max_steps": "3223/8890", "percentage": "36.25%", "elapsed_time": "1h 8m 16s", "remaining_time": "2h 0m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786805} {"loss": 0.22676516, "grad_norm": 2.56793046, "learning_rate": 7.558e-05, "token_acc": 0.91914191, "epoch": 3.62654668, "global_step/max_steps": "3224/8890", "percentage": "36.27%", "elapsed_time": "1h 8m 17s", "remaining_time": "2h 0m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786855} {"loss": 0.28333071, "grad_norm": 2.25984859, "learning_rate": 7.556e-05, "token_acc": 0.92229039, "epoch": 3.62767154, "global_step/max_steps": "3225/8890", "percentage": "36.28%", "elapsed_time": "1h 8m 18s", "remaining_time": "1h 59m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786878} {"loss": 0.35108119, "grad_norm": 2.60997033, "learning_rate": 7.555e-05, "token_acc": 0.89189189, "epoch": 3.6287964, "global_step/max_steps": "3226/8890", "percentage": "36.29%", "elapsed_time": "1h 8m 19s", "remaining_time": "1h 59m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786914} {"loss": 0.32962361, "grad_norm": 2.17656755, "learning_rate": 7.553e-05, "token_acc": 0.89649416, "epoch": 3.62992126, "global_step/max_steps": "3227/8890", "percentage": "36.30%", "elapsed_time": "1h 8m 20s", "remaining_time": "1h 59m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786942} {"loss": 0.30089164, "grad_norm": 2.28863454, "learning_rate": 7.551e-05, "token_acc": 0.90635452, "epoch": 3.63104612, "global_step/max_steps": "3228/8890", "percentage": "36.31%", "elapsed_time": "1h 8m 21s", "remaining_time": "1h 59m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786962} {"loss": 0.40085626, "grad_norm": 2.80153537, "learning_rate": 7.55e-05, "token_acc": 0.8746114, "epoch": 3.63217098, "global_step/max_steps": "3229/8890", "percentage": "36.32%", "elapsed_time": "1h 8m 23s", "remaining_time": "1h 59m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786984} {"loss": 0.33368161, "grad_norm": 2.14672208, "learning_rate": 7.548e-05, "token_acc": 0.89063948, "epoch": 3.63329584, "global_step/max_steps": "3230/8890", "percentage": "36.33%", "elapsed_time": "1h 8m 24s", "remaining_time": "1h 59m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787001} {"loss": 0.26362172, "grad_norm": 2.56711555, "learning_rate": 7.547e-05, "token_acc": 0.90771028, "epoch": 3.6344207, "global_step/max_steps": "3231/8890", "percentage": "36.34%", "elapsed_time": "1h 8m 25s", "remaining_time": "1h 59m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787046} {"loss": 0.37865418, "grad_norm": 2.84115672, "learning_rate": 7.545e-05, "token_acc": 0.87582563, "epoch": 3.63554556, "global_step/max_steps": "3232/8890", "percentage": "36.36%", "elapsed_time": "1h 8m 26s", "remaining_time": "1h 59m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787063} {"loss": 0.35283655, "grad_norm": 2.90685463, "learning_rate": 7.543e-05, "token_acc": 0.87702703, "epoch": 3.63667042, "global_step/max_steps": "3233/8890", "percentage": "36.37%", "elapsed_time": "1h 8m 27s", "remaining_time": "1h 59m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787091} {"loss": 0.31498939, "grad_norm": 2.88078165, "learning_rate": 7.542e-05, "token_acc": 0.88606061, "epoch": 3.63779528, "global_step/max_steps": "3234/8890", "percentage": "36.38%", "elapsed_time": "1h 8m 28s", "remaining_time": "1h 59m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787117} {"loss": 0.35974938, "grad_norm": 2.62798405, "learning_rate": 7.54e-05, "token_acc": 0.88794023, "epoch": 3.63892013, "global_step/max_steps": "3235/8890", "percentage": "36.39%", "elapsed_time": "1h 8m 29s", "remaining_time": "1h 59m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787162} {"loss": 0.32388943, "grad_norm": 2.44641185, "learning_rate": 7.539e-05, "token_acc": 0.9028777, "epoch": 3.64004499, "global_step/max_steps": "3236/8890", "percentage": "36.40%", "elapsed_time": "1h 8m 30s", "remaining_time": "1h 59m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787187} {"loss": 0.46426043, "grad_norm": 2.69451237, "learning_rate": 7.537e-05, "token_acc": 0.86378036, "epoch": 3.64116985, "global_step/max_steps": "3237/8890", "percentage": "36.41%", "elapsed_time": "1h 8m 31s", "remaining_time": "1h 59m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787212} {"loss": 0.34183741, "grad_norm": 2.65543938, "learning_rate": 7.535e-05, "token_acc": 0.89223638, "epoch": 3.64229471, "global_step/max_steps": "3238/8890", "percentage": "36.42%", "elapsed_time": "1h 8m 33s", "remaining_time": "1h 59m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787238} {"loss": 0.36558104, "grad_norm": 2.77504897, "learning_rate": 7.534e-05, "token_acc": 0.87228608, "epoch": 3.64341957, "global_step/max_steps": "3239/8890", "percentage": "36.43%", "elapsed_time": "1h 8m 34s", "remaining_time": "1h 59m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787263} {"loss": 0.32443923, "grad_norm": 2.27917242, "learning_rate": 7.532e-05, "token_acc": 0.88826816, "epoch": 3.64454443, "global_step/max_steps": "3240/8890", "percentage": "36.45%", "elapsed_time": "1h 8m 35s", "remaining_time": "1h 59m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787301} {"loss": 0.32815462, "grad_norm": 2.72665787, "learning_rate": 7.531e-05, "token_acc": 0.88451087, "epoch": 3.64566929, "global_step/max_steps": "3241/8890", "percentage": "36.46%", "elapsed_time": "1h 8m 36s", "remaining_time": "1h 59m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787301} {"loss": 0.31839693, "grad_norm": 2.70174646, "learning_rate": 7.529e-05, "token_acc": 0.88779804, "epoch": 3.64679415, "global_step/max_steps": "3242/8890", "percentage": "36.47%", "elapsed_time": "1h 8m 37s", "remaining_time": "1h 59m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78736} {"loss": 0.28905806, "grad_norm": 2.33665848, "learning_rate": 7.527e-05, "token_acc": 0.91940976, "epoch": 3.64791901, "global_step/max_steps": "3243/8890", "percentage": "36.48%", "elapsed_time": "1h 8m 38s", "remaining_time": "1h 59m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787411} {"loss": 0.31542158, "grad_norm": 2.3955977, "learning_rate": 7.526e-05, "token_acc": 0.89138135, "epoch": 3.64904387, "global_step/max_steps": "3244/8890", "percentage": "36.49%", "elapsed_time": "1h 8m 39s", "remaining_time": "1h 59m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787453} {"loss": 0.31603235, "grad_norm": 2.32049513, "learning_rate": 7.524e-05, "token_acc": 0.89425287, "epoch": 3.65016873, "global_step/max_steps": "3245/8890", "percentage": "36.50%", "elapsed_time": "1h 8m 40s", "remaining_time": "1h 59m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78749} {"loss": 0.2948038, "grad_norm": 2.4522543, "learning_rate": 7.523e-05, "token_acc": 0.89964158, "epoch": 3.65129359, "global_step/max_steps": "3246/8890", "percentage": "36.51%", "elapsed_time": "1h 8m 41s", "remaining_time": "1h 59m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787548} {"loss": 0.31303161, "grad_norm": 2.38406062, "learning_rate": 7.521e-05, "token_acc": 0.89803013, "epoch": 3.65241845, "global_step/max_steps": "3247/8890", "percentage": "36.52%", "elapsed_time": "1h 8m 42s", "remaining_time": "1h 59m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787573} {"loss": 0.43342441, "grad_norm": 2.96337199, "learning_rate": 7.519e-05, "token_acc": 0.8716763, "epoch": 3.65354331, "global_step/max_steps": "3248/8890", "percentage": "36.54%", "elapsed_time": "1h 8m 43s", "remaining_time": "1h 59m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787665} {"loss": 0.26884791, "grad_norm": 2.68415236, "learning_rate": 7.518e-05, "token_acc": 0.91433022, "epoch": 3.65466817, "global_step/max_steps": "3249/8890", "percentage": "36.55%", "elapsed_time": "1h 8m 44s", "remaining_time": "1h 59m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787694} {"loss": 0.35729128, "grad_norm": 3.00426269, "learning_rate": 7.516e-05, "token_acc": 0.86915888, "epoch": 3.65579303, "global_step/max_steps": "3250/8890", "percentage": "36.56%", "elapsed_time": "1h 8m 45s", "remaining_time": "1h 59m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78772} {"loss": 0.42980039, "grad_norm": 2.89135361, "learning_rate": 7.514e-05, "token_acc": 0.86650775, "epoch": 3.65691789, "global_step/max_steps": "3251/8890", "percentage": "36.57%", "elapsed_time": "1h 8m 46s", "remaining_time": "1h 59m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78776} {"loss": 0.32797715, "grad_norm": 2.27868772, "learning_rate": 7.513e-05, "token_acc": 0.8881323, "epoch": 3.65804274, "global_step/max_steps": "3252/8890", "percentage": "36.58%", "elapsed_time": "1h 8m 48s", "remaining_time": "1h 59m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787782} {"loss": 0.4198752, "grad_norm": 3.01680541, "learning_rate": 7.511e-05, "token_acc": 0.8631139, "epoch": 3.6591676, "global_step/max_steps": "3253/8890", "percentage": "36.59%", "elapsed_time": "1h 8m 49s", "remaining_time": "1h 59m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787803} {"loss": 0.30858517, "grad_norm": 3.08427644, "learning_rate": 7.51e-05, "token_acc": 0.875, "epoch": 3.66029246, "global_step/max_steps": "3254/8890", "percentage": "36.60%", "elapsed_time": "1h 8m 50s", "remaining_time": "1h 59m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787845} {"loss": 0.26549709, "grad_norm": 2.47170973, "learning_rate": 7.508e-05, "token_acc": 0.90629371, "epoch": 3.66141732, "global_step/max_steps": "3255/8890", "percentage": "36.61%", "elapsed_time": "1h 8m 51s", "remaining_time": "1h 59m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787865} {"loss": 0.35442206, "grad_norm": 2.90120745, "learning_rate": 7.506e-05, "token_acc": 0.87830688, "epoch": 3.66254218, "global_step/max_steps": "3256/8890", "percentage": "36.63%", "elapsed_time": "1h 8m 52s", "remaining_time": "1h 59m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787886} {"loss": 0.36775532, "grad_norm": 2.756881, "learning_rate": 7.505e-05, "token_acc": 0.87715517, "epoch": 3.66366704, "global_step/max_steps": "3257/8890", "percentage": "36.64%", "elapsed_time": "1h 8m 53s", "remaining_time": "1h 59m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787928} {"loss": 0.44542944, "grad_norm": 2.7419548, "learning_rate": 7.503e-05, "token_acc": 0.86051282, "epoch": 3.6647919, "global_step/max_steps": "3258/8890", "percentage": "36.65%", "elapsed_time": "1h 8m 55s", "remaining_time": "1h 59m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787846} {"loss": 0.3823638, "grad_norm": 2.89528155, "learning_rate": 7.502e-05, "token_acc": 0.87964149, "epoch": 3.66591676, "global_step/max_steps": "3259/8890", "percentage": "36.66%", "elapsed_time": "1h 8m 56s", "remaining_time": "1h 59m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787874} {"loss": 0.30073166, "grad_norm": 2.90377951, "learning_rate": 7.5e-05, "token_acc": 0.88439306, "epoch": 3.66704162, "global_step/max_steps": "3260/8890", "percentage": "36.67%", "elapsed_time": "1h 8m 57s", "remaining_time": "1h 59m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78791} {"loss": 0.3315126, "grad_norm": 2.71455383, "learning_rate": 7.498e-05, "token_acc": 0.90247074, "epoch": 3.66816648, "global_step/max_steps": "3261/8890", "percentage": "36.68%", "elapsed_time": "1h 8m 58s", "remaining_time": "1h 59m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787937} {"loss": 0.2969456, "grad_norm": 2.37918544, "learning_rate": 7.497e-05, "token_acc": 0.88913043, "epoch": 3.66929134, "global_step/max_steps": "3262/8890", "percentage": "36.69%", "elapsed_time": "1h 8m 59s", "remaining_time": "1h 59m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787936} {"loss": 0.33122164, "grad_norm": 2.67747641, "learning_rate": 7.495e-05, "token_acc": 0.88440111, "epoch": 3.6704162, "global_step/max_steps": "3263/8890", "percentage": "36.70%", "elapsed_time": "1h 9m 1s", "remaining_time": "1h 59m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787965} {"loss": 0.32078189, "grad_norm": 2.64027691, "learning_rate": 7.494e-05, "token_acc": 0.89514349, "epoch": 3.67154106, "global_step/max_steps": "3264/8890", "percentage": "36.72%", "elapsed_time": "1h 9m 2s", "remaining_time": "1h 58m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787993} {"loss": 0.25208524, "grad_norm": 2.31909466, "learning_rate": 7.492e-05, "token_acc": 0.91782554, "epoch": 3.67266592, "global_step/max_steps": "3265/8890", "percentage": "36.73%", "elapsed_time": "1h 9m 3s", "remaining_time": "1h 58m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788029} {"loss": 0.31298912, "grad_norm": 2.64749026, "learning_rate": 7.49e-05, "token_acc": 0.90049751, "epoch": 3.67379078, "global_step/max_steps": "3266/8890", "percentage": "36.74%", "elapsed_time": "1h 9m 4s", "remaining_time": "1h 58m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788079} {"loss": 0.32964236, "grad_norm": 2.62147546, "learning_rate": 7.489e-05, "token_acc": 0.89588689, "epoch": 3.67491564, "global_step/max_steps": "3267/8890", "percentage": "36.75%", "elapsed_time": "1h 9m 5s", "remaining_time": "1h 58m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788099} {"loss": 0.33033693, "grad_norm": 2.5332706, "learning_rate": 7.487e-05, "token_acc": 0.88811995, "epoch": 3.67604049, "global_step/max_steps": "3268/8890", "percentage": "36.76%", "elapsed_time": "1h 9m 6s", "remaining_time": "1h 58m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788127} {"loss": 0.35690412, "grad_norm": 2.36319876, "learning_rate": 7.485e-05, "token_acc": 0.87924866, "epoch": 3.67716535, "global_step/max_steps": "3269/8890", "percentage": "36.77%", "elapsed_time": "1h 9m 7s", "remaining_time": "1h 58m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788146} {"loss": 0.30293536, "grad_norm": 2.5814898, "learning_rate": 7.484e-05, "token_acc": 0.88888889, "epoch": 3.67829021, "global_step/max_steps": "3270/8890", "percentage": "36.78%", "elapsed_time": "1h 9m 8s", "remaining_time": "1h 58m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788187} {"loss": 0.32505876, "grad_norm": 2.50756311, "learning_rate": 7.482e-05, "token_acc": 0.88784067, "epoch": 3.67941507, "global_step/max_steps": "3271/8890", "percentage": "36.79%", "elapsed_time": "1h 9m 9s", "remaining_time": "1h 58m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788208} {"loss": 0.40178493, "grad_norm": 2.68590927, "learning_rate": 7.481e-05, "token_acc": 0.87740385, "epoch": 3.68053993, "global_step/max_steps": "3272/8890", "percentage": "36.81%", "elapsed_time": "1h 9m 11s", "remaining_time": "1h 58m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788231} {"loss": 0.28171167, "grad_norm": 2.2930038, "learning_rate": 7.479e-05, "token_acc": 0.90463215, "epoch": 3.68166479, "global_step/max_steps": "3273/8890", "percentage": "36.82%", "elapsed_time": "1h 9m 12s", "remaining_time": "1h 58m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788257} {"loss": 0.27241409, "grad_norm": 2.45855808, "learning_rate": 7.477e-05, "token_acc": 0.90980392, "epoch": 3.68278965, "global_step/max_steps": "3274/8890", "percentage": "36.83%", "elapsed_time": "1h 9m 13s", "remaining_time": "1h 58m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78828} {"loss": 0.32965159, "grad_norm": 2.36540604, "learning_rate": 7.476e-05, "token_acc": 0.89012209, "epoch": 3.68391451, "global_step/max_steps": "3275/8890", "percentage": "36.84%", "elapsed_time": "1h 9m 14s", "remaining_time": "1h 58m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788308} {"loss": 0.30911422, "grad_norm": 2.15803647, "learning_rate": 7.474e-05, "token_acc": 0.89566116, "epoch": 3.68503937, "global_step/max_steps": "3276/8890", "percentage": "36.85%", "elapsed_time": "1h 9m 15s", "remaining_time": "1h 58m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788346} {"loss": 0.39000395, "grad_norm": 2.57720137, "learning_rate": 7.473e-05, "token_acc": 0.86432161, "epoch": 3.68616423, "global_step/max_steps": "3277/8890", "percentage": "36.86%", "elapsed_time": "1h 9m 16s", "remaining_time": "1h 58m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788366} {"loss": 0.35453543, "grad_norm": 2.18901134, "learning_rate": 7.471e-05, "token_acc": 0.88929889, "epoch": 3.68728909, "global_step/max_steps": "3278/8890", "percentage": "36.87%", "elapsed_time": "1h 9m 17s", "remaining_time": "1h 58m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788389} {"loss": 0.38364327, "grad_norm": 2.59692836, "learning_rate": 7.469e-05, "token_acc": 0.87918015, "epoch": 3.68841395, "global_step/max_steps": "3279/8890", "percentage": "36.88%", "elapsed_time": "1h 9m 18s", "remaining_time": "1h 58m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788414} {"loss": 0.3476204, "grad_norm": 2.59554005, "learning_rate": 7.468e-05, "token_acc": 0.89318182, "epoch": 3.68953881, "global_step/max_steps": "3280/8890", "percentage": "36.90%", "elapsed_time": "1h 9m 20s", "remaining_time": "1h 58m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788434} {"loss": 0.35183805, "grad_norm": 2.80911589, "learning_rate": 7.466e-05, "token_acc": 0.886, "epoch": 3.69066367, "global_step/max_steps": "3281/8890", "percentage": "36.91%", "elapsed_time": "1h 9m 21s", "remaining_time": "1h 58m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788453} {"loss": 0.19059521, "grad_norm": 1.9520216, "learning_rate": 7.464e-05, "token_acc": 0.9415656, "epoch": 3.69178853, "global_step/max_steps": "3282/8890", "percentage": "36.92%", "elapsed_time": "1h 9m 22s", "remaining_time": "1h 58m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788446} {"loss": 0.42484403, "grad_norm": 2.64548254, "learning_rate": 7.463e-05, "token_acc": 0.86407767, "epoch": 3.69291339, "global_step/max_steps": "3283/8890", "percentage": "36.93%", "elapsed_time": "1h 9m 23s", "remaining_time": "1h 58m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78847} {"loss": 0.28411716, "grad_norm": 2.44358134, "learning_rate": 7.461e-05, "token_acc": 0.89707928, "epoch": 3.69403825, "global_step/max_steps": "3284/8890", "percentage": "36.94%", "elapsed_time": "1h 9m 24s", "remaining_time": "1h 58m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78852} {"loss": 0.33672398, "grad_norm": 2.32614398, "learning_rate": 7.46e-05, "token_acc": 0.8870822, "epoch": 3.6951631, "global_step/max_steps": "3285/8890", "percentage": "36.95%", "elapsed_time": "1h 9m 25s", "remaining_time": "1h 58m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788538} {"loss": 0.46429491, "grad_norm": 2.86880183, "learning_rate": 7.458e-05, "token_acc": 0.85371179, "epoch": 3.69628796, "global_step/max_steps": "3286/8890", "percentage": "36.96%", "elapsed_time": "1h 9m 26s", "remaining_time": "1h 58m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788607} {"loss": 0.41131499, "grad_norm": 2.74057889, "learning_rate": 7.456e-05, "token_acc": 0.86767896, "epoch": 3.69741282, "global_step/max_steps": "3287/8890", "percentage": "36.97%", "elapsed_time": "1h 9m 27s", "remaining_time": "1h 58m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788634} {"loss": 0.38373393, "grad_norm": 2.6045115, "learning_rate": 7.455e-05, "token_acc": 0.88348416, "epoch": 3.69853768, "global_step/max_steps": "3288/8890", "percentage": "36.99%", "elapsed_time": "1h 9m 29s", "remaining_time": "1h 58m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788658} {"loss": 0.3732684, "grad_norm": 2.89693141, "learning_rate": 7.453e-05, "token_acc": 0.89148352, "epoch": 3.69966254, "global_step/max_steps": "3289/8890", "percentage": "37.00%", "elapsed_time": "1h 9m 30s", "remaining_time": "1h 58m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788696} {"loss": 0.27872741, "grad_norm": 2.15511823, "learning_rate": 7.452e-05, "token_acc": 0.90485075, "epoch": 3.7007874, "global_step/max_steps": "3290/8890", "percentage": "37.01%", "elapsed_time": "1h 9m 31s", "remaining_time": "1h 58m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788715} {"loss": 0.31723648, "grad_norm": 2.37523103, "learning_rate": 7.45e-05, "token_acc": 0.89496718, "epoch": 3.70191226, "global_step/max_steps": "3291/8890", "percentage": "37.02%", "elapsed_time": "1h 9m 32s", "remaining_time": "1h 58m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788708} {"loss": 0.33135641, "grad_norm": 2.16766, "learning_rate": 7.448e-05, "token_acc": 0.90086207, "epoch": 3.70303712, "global_step/max_steps": "3292/8890", "percentage": "37.03%", "elapsed_time": "1h 9m 33s", "remaining_time": "1h 58m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788699} {"loss": 0.30381605, "grad_norm": 2.91681314, "learning_rate": 7.447e-05, "token_acc": 0.89348172, "epoch": 3.70416198, "global_step/max_steps": "3293/8890", "percentage": "37.04%", "elapsed_time": "1h 9m 35s", "remaining_time": "1h 58m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788736} {"loss": 0.40975088, "grad_norm": 2.76482201, "learning_rate": 7.445e-05, "token_acc": 0.86745407, "epoch": 3.70528684, "global_step/max_steps": "3294/8890", "percentage": "37.05%", "elapsed_time": "1h 9m 36s", "remaining_time": "1h 58m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788756} {"loss": 0.32650343, "grad_norm": 2.44632959, "learning_rate": 7.443e-05, "token_acc": 0.90966123, "epoch": 3.7064117, "global_step/max_steps": "3295/8890", "percentage": "37.06%", "elapsed_time": "1h 9m 37s", "remaining_time": "1h 58m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788782} {"loss": 0.38563728, "grad_norm": 2.45019317, "learning_rate": 7.442e-05, "token_acc": 0.87773933, "epoch": 3.70753656, "global_step/max_steps": "3296/8890", "percentage": "37.08%", "elapsed_time": "1h 9m 38s", "remaining_time": "1h 58m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788825} {"loss": 0.37215477, "grad_norm": 2.59607267, "learning_rate": 7.44e-05, "token_acc": 0.87760098, "epoch": 3.70866142, "global_step/max_steps": "3297/8890", "percentage": "37.09%", "elapsed_time": "1h 9m 39s", "remaining_time": "1h 58m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788852} {"loss": 0.29258698, "grad_norm": 2.38147926, "learning_rate": 7.439e-05, "token_acc": 0.90524968, "epoch": 3.70978628, "global_step/max_steps": "3298/8890", "percentage": "37.10%", "elapsed_time": "1h 9m 40s", "remaining_time": "1h 58m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788876} {"loss": 0.36340684, "grad_norm": 2.43085003, "learning_rate": 7.437e-05, "token_acc": 0.87445415, "epoch": 3.71091114, "global_step/max_steps": "3299/8890", "percentage": "37.11%", "elapsed_time": "1h 9m 41s", "remaining_time": "1h 58m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788902} {"loss": 0.28645596, "grad_norm": 2.19285417, "learning_rate": 7.435e-05, "token_acc": 0.9005848, "epoch": 3.712036, "global_step/max_steps": "3300/8890", "percentage": "37.12%", "elapsed_time": "1h 9m 42s", "remaining_time": "1h 58m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.788924} {"eval_loss": 1.06897652, "eval_runtime": 31.7402, "eval_samples_per_second": 25.299, "eval_steps_per_second": 3.182, "eval_token_acc": 0.73587218, "epoch": 3.712036, "global_step/max_steps": "3300/8890", "percentage": "37.12%", "elapsed_time": "1h 10m 14s", "remaining_time": "1h 58m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782981} {"loss": 0.30777052, "grad_norm": 2.4932673, "learning_rate": 7.434e-05, "token_acc": 0.90284974, "epoch": 3.71316085, "global_step/max_steps": "3301/8890", "percentage": "37.13%", "elapsed_time": "1h 10m 30s", "remaining_time": "1h 59m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780363} {"loss": 0.34643719, "grad_norm": 2.28224754, "learning_rate": 7.432e-05, "token_acc": 0.88760504, "epoch": 3.71428571, "global_step/max_steps": "3302/8890", "percentage": "37.14%", "elapsed_time": "1h 10m 31s", "remaining_time": "1h 59m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780401} {"loss": 0.30912516, "grad_norm": 2.52715516, "learning_rate": 7.43e-05, "token_acc": 0.89818689, "epoch": 3.71541057, "global_step/max_steps": "3303/8890", "percentage": "37.15%", "elapsed_time": "1h 10m 32s", "remaining_time": "1h 59m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780431} {"loss": 0.39779598, "grad_norm": 2.27888799, "learning_rate": 7.429e-05, "token_acc": 0.87918871, "epoch": 3.71653543, "global_step/max_steps": "3304/8890", "percentage": "37.17%", "elapsed_time": "1h 10m 33s", "remaining_time": "1h 59m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78048} {"loss": 0.33498004, "grad_norm": 2.92174292, "learning_rate": 7.427e-05, "token_acc": 0.89863843, "epoch": 3.71766029, "global_step/max_steps": "3305/8890", "percentage": "37.18%", "elapsed_time": "1h 10m 34s", "remaining_time": "1h 59m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780503} {"loss": 0.34877235, "grad_norm": 2.37494731, "learning_rate": 7.426e-05, "token_acc": 0.89262372, "epoch": 3.71878515, "global_step/max_steps": "3306/8890", "percentage": "37.19%", "elapsed_time": "1h 10m 35s", "remaining_time": "1h 59m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780505} {"loss": 0.33986425, "grad_norm": 2.12783742, "learning_rate": 7.424e-05, "token_acc": 0.89434276, "epoch": 3.71991001, "global_step/max_steps": "3307/8890", "percentage": "37.20%", "elapsed_time": "1h 10m 36s", "remaining_time": "1h 59m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780532} {"loss": 0.30216151, "grad_norm": 2.78021288, "learning_rate": 7.422e-05, "token_acc": 0.89240506, "epoch": 3.72103487, "global_step/max_steps": "3308/8890", "percentage": "37.21%", "elapsed_time": "1h 10m 38s", "remaining_time": "1h 59m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780556} {"loss": 0.28407472, "grad_norm": 2.60795498, "learning_rate": 7.421e-05, "token_acc": 0.90648379, "epoch": 3.72215973, "global_step/max_steps": "3309/8890", "percentage": "37.22%", "elapsed_time": "1h 10m 39s", "remaining_time": "1h 59m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780488} {"loss": 0.41495049, "grad_norm": 2.74294734, "learning_rate": 7.419e-05, "token_acc": 0.86508876, "epoch": 3.72328459, "global_step/max_steps": "3310/8890", "percentage": "37.23%", "elapsed_time": "1h 10m 40s", "remaining_time": "1h 59m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780538} {"loss": 0.4267199, "grad_norm": 2.72570705, "learning_rate": 7.417e-05, "token_acc": 0.86140351, "epoch": 3.72440945, "global_step/max_steps": "3311/8890", "percentage": "37.24%", "elapsed_time": "1h 10m 41s", "remaining_time": "1h 59m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780569} {"loss": 0.25894618, "grad_norm": 2.54864621, "learning_rate": 7.416e-05, "token_acc": 0.9078341, "epoch": 3.72553431, "global_step/max_steps": "3312/8890", "percentage": "37.26%", "elapsed_time": "1h 10m 42s", "remaining_time": "1h 59m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780592} {"loss": 0.30665168, "grad_norm": 2.04825449, "learning_rate": 7.414e-05, "token_acc": 0.90483619, "epoch": 3.72665917, "global_step/max_steps": "3313/8890", "percentage": "37.27%", "elapsed_time": "1h 10m 44s", "remaining_time": "1h 59m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780609} {"loss": 0.24645692, "grad_norm": 1.99385369, "learning_rate": 7.413e-05, "token_acc": 0.91637324, "epoch": 3.72778403, "global_step/max_steps": "3314/8890", "percentage": "37.28%", "elapsed_time": "1h 10m 45s", "remaining_time": "1h 59m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780637} {"loss": 0.19583258, "grad_norm": 2.36472225, "learning_rate": 7.411e-05, "token_acc": 0.92994746, "epoch": 3.72890889, "global_step/max_steps": "3315/8890", "percentage": "37.29%", "elapsed_time": "1h 10m 46s", "remaining_time": "1h 59m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78068} {"loss": 0.33372504, "grad_norm": 3.14791656, "learning_rate": 7.409e-05, "token_acc": 0.89162562, "epoch": 3.73003375, "global_step/max_steps": "3316/8890", "percentage": "37.30%", "elapsed_time": "1h 10m 47s", "remaining_time": "1h 58m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780693} {"loss": 0.40166509, "grad_norm": 2.59202456, "learning_rate": 7.408e-05, "token_acc": 0.86327345, "epoch": 3.73115861, "global_step/max_steps": "3317/8890", "percentage": "37.31%", "elapsed_time": "1h 10m 48s", "remaining_time": "1h 58m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78071} {"loss": 0.38272396, "grad_norm": 2.67149019, "learning_rate": 7.406e-05, "token_acc": 0.87787056, "epoch": 3.73228346, "global_step/max_steps": "3318/8890", "percentage": "37.32%", "elapsed_time": "1h 10m 49s", "remaining_time": "1h 58m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78071} {"loss": 0.36840141, "grad_norm": 2.65023232, "learning_rate": 7.404e-05, "token_acc": 0.87370838, "epoch": 3.73340832, "global_step/max_steps": "3319/8890", "percentage": "37.33%", "elapsed_time": "1h 10m 51s", "remaining_time": "1h 58m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780737} {"loss": 0.32201219, "grad_norm": 2.35133696, "learning_rate": 7.403e-05, "token_acc": 0.89285714, "epoch": 3.73453318, "global_step/max_steps": "3320/8890", "percentage": "37.35%", "elapsed_time": "1h 10m 52s", "remaining_time": "1h 58m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78078} {"loss": 0.33493438, "grad_norm": 2.53284025, "learning_rate": 7.401e-05, "token_acc": 0.88967972, "epoch": 3.73565804, "global_step/max_steps": "3321/8890", "percentage": "37.36%", "elapsed_time": "1h 10m 53s", "remaining_time": "1h 58m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780822} {"loss": 0.38388768, "grad_norm": 2.74932694, "learning_rate": 7.399e-05, "token_acc": 0.88203018, "epoch": 3.7367829, "global_step/max_steps": "3322/8890", "percentage": "37.37%", "elapsed_time": "1h 10m 54s", "remaining_time": "1h 58m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780846} {"loss": 0.33688983, "grad_norm": 2.80659771, "learning_rate": 7.398e-05, "token_acc": 0.88072122, "epoch": 3.73790776, "global_step/max_steps": "3323/8890", "percentage": "37.38%", "elapsed_time": "1h 10m 55s", "remaining_time": "1h 58m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780874} {"loss": 0.3928594, "grad_norm": 2.84035206, "learning_rate": 7.396e-05, "token_acc": 0.87661406, "epoch": 3.73903262, "global_step/max_steps": "3324/8890", "percentage": "37.39%", "elapsed_time": "1h 10m 56s", "remaining_time": "1h 58m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780902} {"loss": 0.32990098, "grad_norm": 2.39082646, "learning_rate": 7.395e-05, "token_acc": 0.87878788, "epoch": 3.74015748, "global_step/max_steps": "3325/8890", "percentage": "37.40%", "elapsed_time": "1h 10m 57s", "remaining_time": "1h 58m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780906} {"loss": 0.27302116, "grad_norm": 2.16795683, "learning_rate": 7.393e-05, "token_acc": 0.90555015, "epoch": 3.74128234, "global_step/max_steps": "3326/8890", "percentage": "37.41%", "elapsed_time": "1h 10m 59s", "remaining_time": "1h 58m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780923} {"loss": 0.3576653, "grad_norm": 2.57303524, "learning_rate": 7.391e-05, "token_acc": 0.88109394, "epoch": 3.7424072, "global_step/max_steps": "3327/8890", "percentage": "37.42%", "elapsed_time": "1h 11m 0s", "remaining_time": "1h 58m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78096} {"loss": 0.34955159, "grad_norm": 2.64964175, "learning_rate": 7.39e-05, "token_acc": 0.88412017, "epoch": 3.74353206, "global_step/max_steps": "3328/8890", "percentage": "37.44%", "elapsed_time": "1h 11m 1s", "remaining_time": "1h 58m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781004} {"loss": 0.38174212, "grad_norm": 3.03553462, "learning_rate": 7.388e-05, "token_acc": 0.87626263, "epoch": 3.74465692, "global_step/max_steps": "3329/8890", "percentage": "37.45%", "elapsed_time": "1h 11m 2s", "remaining_time": "1h 58m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781046} {"loss": 0.25981134, "grad_norm": 1.94222391, "learning_rate": 7.386e-05, "token_acc": 0.9121813, "epoch": 3.74578178, "global_step/max_steps": "3330/8890", "percentage": "37.46%", "elapsed_time": "1h 11m 3s", "remaining_time": "1h 58m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781078} {"loss": 0.34802902, "grad_norm": 2.16436863, "learning_rate": 7.385e-05, "token_acc": 0.90088322, "epoch": 3.74690664, "global_step/max_steps": "3331/8890", "percentage": "37.47%", "elapsed_time": "1h 11m 4s", "remaining_time": "1h 58m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781122} {"loss": 0.48083043, "grad_norm": 2.9081955, "learning_rate": 7.383e-05, "token_acc": 0.86321839, "epoch": 3.7480315, "global_step/max_steps": "3332/8890", "percentage": "37.48%", "elapsed_time": "1h 11m 5s", "remaining_time": "1h 58m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781144} {"loss": 0.27852613, "grad_norm": 2.43305087, "learning_rate": 7.382e-05, "token_acc": 0.90422535, "epoch": 3.74915636, "global_step/max_steps": "3333/8890", "percentage": "37.49%", "elapsed_time": "1h 11m 6s", "remaining_time": "1h 58m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781168} {"loss": 0.32102811, "grad_norm": 2.57923365, "learning_rate": 7.38e-05, "token_acc": 0.90633609, "epoch": 3.75028121, "global_step/max_steps": "3334/8890", "percentage": "37.50%", "elapsed_time": "1h 11m 7s", "remaining_time": "1h 58m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781196} {"loss": 0.29479539, "grad_norm": 2.73429894, "learning_rate": 7.378e-05, "token_acc": 0.90937997, "epoch": 3.75140607, "global_step/max_steps": "3335/8890", "percentage": "37.51%", "elapsed_time": "1h 11m 8s", "remaining_time": "1h 58m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781221} {"loss": 0.37440562, "grad_norm": 2.88353825, "learning_rate": 7.377e-05, "token_acc": 0.87712418, "epoch": 3.75253093, "global_step/max_steps": "3336/8890", "percentage": "37.53%", "elapsed_time": "1h 11m 10s", "remaining_time": "1h 58m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781245} {"loss": 0.26443031, "grad_norm": 2.36937571, "learning_rate": 7.375e-05, "token_acc": 0.9117984, "epoch": 3.75365579, "global_step/max_steps": "3337/8890", "percentage": "37.54%", "elapsed_time": "1h 11m 11s", "remaining_time": "1h 58m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781284} {"loss": 0.30281767, "grad_norm": 2.59681439, "learning_rate": 7.373e-05, "token_acc": 0.89659686, "epoch": 3.75478065, "global_step/max_steps": "3338/8890", "percentage": "37.55%", "elapsed_time": "1h 11m 12s", "remaining_time": "1h 58m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78131} {"loss": 0.30372572, "grad_norm": 2.234128, "learning_rate": 7.372e-05, "token_acc": 0.90748441, "epoch": 3.75590551, "global_step/max_steps": "3339/8890", "percentage": "37.56%", "elapsed_time": "1h 11m 13s", "remaining_time": "1h 58m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781336} {"loss": 0.34861511, "grad_norm": 2.65601277, "learning_rate": 7.37e-05, "token_acc": 0.89487516, "epoch": 3.75703037, "global_step/max_steps": "3340/8890", "percentage": "37.57%", "elapsed_time": "1h 11m 14s", "remaining_time": "1h 58m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781363} {"loss": 0.31716496, "grad_norm": 2.21111703, "learning_rate": 7.368e-05, "token_acc": 0.90243902, "epoch": 3.75815523, "global_step/max_steps": "3341/8890", "percentage": "37.58%", "elapsed_time": "1h 11m 15s", "remaining_time": "1h 58m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781382} {"loss": 0.40042967, "grad_norm": 2.78977346, "learning_rate": 7.367e-05, "token_acc": 0.86834734, "epoch": 3.75928009, "global_step/max_steps": "3342/8890", "percentage": "37.59%", "elapsed_time": "1h 11m 16s", "remaining_time": "1h 58m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781405} {"loss": 0.24289821, "grad_norm": 2.06392932, "learning_rate": 7.365e-05, "token_acc": 0.91728395, "epoch": 3.76040495, "global_step/max_steps": "3343/8890", "percentage": "37.60%", "elapsed_time": "1h 11m 18s", "remaining_time": "1h 58m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781425} {"loss": 0.34069288, "grad_norm": 2.60037112, "learning_rate": 7.363e-05, "token_acc": 0.89596879, "epoch": 3.76152981, "global_step/max_steps": "3344/8890", "percentage": "37.62%", "elapsed_time": "1h 11m 19s", "remaining_time": "1h 58m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781453} {"loss": 0.34195614, "grad_norm": 2.90674162, "learning_rate": 7.362e-05, "token_acc": 0.88967972, "epoch": 3.76265467, "global_step/max_steps": "3345/8890", "percentage": "37.63%", "elapsed_time": "1h 11m 20s", "remaining_time": "1h 58m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781515} {"loss": 0.37978852, "grad_norm": 2.54240465, "learning_rate": 7.36e-05, "token_acc": 0.87472527, "epoch": 3.76377953, "global_step/max_steps": "3346/8890", "percentage": "37.64%", "elapsed_time": "1h 11m 21s", "remaining_time": "1h 58m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781564} {"loss": 0.35461533, "grad_norm": 2.89201093, "learning_rate": 7.359e-05, "token_acc": 0.8885906, "epoch": 3.76490439, "global_step/max_steps": "3347/8890", "percentage": "37.65%", "elapsed_time": "1h 11m 22s", "remaining_time": "1h 58m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781593} {"loss": 0.34966606, "grad_norm": 2.66787601, "learning_rate": 7.357e-05, "token_acc": 0.87699877, "epoch": 3.76602925, "global_step/max_steps": "3348/8890", "percentage": "37.66%", "elapsed_time": "1h 11m 23s", "remaining_time": "1h 58m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781586} {"loss": 0.34673402, "grad_norm": 2.76530838, "learning_rate": 7.355e-05, "token_acc": 0.87347561, "epoch": 3.76715411, "global_step/max_steps": "3349/8890", "percentage": "37.67%", "elapsed_time": "1h 11m 24s", "remaining_time": "1h 58m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781611} {"loss": 0.2844739, "grad_norm": 2.54262495, "learning_rate": 7.354e-05, "token_acc": 0.89845758, "epoch": 3.76827897, "global_step/max_steps": "3350/8890", "percentage": "37.68%", "elapsed_time": "1h 11m 25s", "remaining_time": "1h 58m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781662} {"loss": 0.43339401, "grad_norm": 2.71768212, "learning_rate": 7.352e-05, "token_acc": 0.85788382, "epoch": 3.76940382, "global_step/max_steps": "3351/8890", "percentage": "37.69%", "elapsed_time": "1h 11m 26s", "remaining_time": "1h 58m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781705} {"loss": 0.42745495, "grad_norm": 2.52938461, "learning_rate": 7.35e-05, "token_acc": 0.85862786, "epoch": 3.77052868, "global_step/max_steps": "3352/8890", "percentage": "37.71%", "elapsed_time": "1h 11m 27s", "remaining_time": "1h 58m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78174} {"loss": 0.36188668, "grad_norm": 2.73436522, "learning_rate": 7.349e-05, "token_acc": 0.87887888, "epoch": 3.77165354, "global_step/max_steps": "3353/8890", "percentage": "37.72%", "elapsed_time": "1h 11m 29s", "remaining_time": "1h 58m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781762} {"loss": 0.34484291, "grad_norm": 2.8434248, "learning_rate": 7.347e-05, "token_acc": 0.88983051, "epoch": 3.7727784, "global_step/max_steps": "3354/8890", "percentage": "37.73%", "elapsed_time": "1h 11m 30s", "remaining_time": "1h 58m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781804} {"loss": 0.35184622, "grad_norm": 2.07632041, "learning_rate": 7.345e-05, "token_acc": 0.88289037, "epoch": 3.77390326, "global_step/max_steps": "3355/8890", "percentage": "37.74%", "elapsed_time": "1h 11m 31s", "remaining_time": "1h 57m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781782} {"loss": 0.34782887, "grad_norm": 2.70022249, "learning_rate": 7.344e-05, "token_acc": 0.88409704, "epoch": 3.77502812, "global_step/max_steps": "3356/8890", "percentage": "37.75%", "elapsed_time": "1h 11m 32s", "remaining_time": "1h 57m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781802} {"loss": 0.39113986, "grad_norm": 2.37526846, "learning_rate": 7.342e-05, "token_acc": 0.8741328, "epoch": 3.77615298, "global_step/max_steps": "3357/8890", "percentage": "37.76%", "elapsed_time": "1h 11m 33s", "remaining_time": "1h 57m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781824} {"loss": 0.35253489, "grad_norm": 2.68954086, "learning_rate": 7.341e-05, "token_acc": 0.88888889, "epoch": 3.77727784, "global_step/max_steps": "3358/8890", "percentage": "37.77%", "elapsed_time": "1h 11m 34s", "remaining_time": "1h 57m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781846} {"loss": 0.30799708, "grad_norm": 2.77164102, "learning_rate": 7.339e-05, "token_acc": 0.89081225, "epoch": 3.7784027, "global_step/max_steps": "3359/8890", "percentage": "37.78%", "elapsed_time": "1h 11m 35s", "remaining_time": "1h 57m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781893} {"loss": 0.25826442, "grad_norm": 2.06451273, "learning_rate": 7.337e-05, "token_acc": 0.91885714, "epoch": 3.77952756, "global_step/max_steps": "3360/8890", "percentage": "37.80%", "elapsed_time": "1h 11m 37s", "remaining_time": "1h 57m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781895} {"loss": 0.34534466, "grad_norm": 3.00440288, "learning_rate": 7.336e-05, "token_acc": 0.88216039, "epoch": 3.78065242, "global_step/max_steps": "3361/8890", "percentage": "37.81%", "elapsed_time": "1h 11m 38s", "remaining_time": "1h 57m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781925} {"loss": 0.29070079, "grad_norm": 2.35920668, "learning_rate": 7.334e-05, "token_acc": 0.90208078, "epoch": 3.78177728, "global_step/max_steps": "3362/8890", "percentage": "37.82%", "elapsed_time": "1h 11m 39s", "remaining_time": "1h 57m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781951} {"loss": 0.28143483, "grad_norm": 2.38927126, "learning_rate": 7.332e-05, "token_acc": 0.90495315, "epoch": 3.78290214, "global_step/max_steps": "3363/8890", "percentage": "37.83%", "elapsed_time": "1h 11m 40s", "remaining_time": "1h 57m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781983} {"loss": 0.35551155, "grad_norm": 2.22788358, "learning_rate": 7.331e-05, "token_acc": 0.88383349, "epoch": 3.784027, "global_step/max_steps": "3364/8890", "percentage": "37.84%", "elapsed_time": "1h 11m 41s", "remaining_time": "1h 57m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782009} {"loss": 0.38774979, "grad_norm": 2.77342677, "learning_rate": 7.329e-05, "token_acc": 0.87236679, "epoch": 3.78515186, "global_step/max_steps": "3365/8890", "percentage": "37.85%", "elapsed_time": "1h 11m 42s", "remaining_time": "1h 57m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782051} {"loss": 0.23398849, "grad_norm": 2.13241529, "learning_rate": 7.327e-05, "token_acc": 0.92377261, "epoch": 3.78627672, "global_step/max_steps": "3366/8890", "percentage": "37.86%", "elapsed_time": "1h 11m 43s", "remaining_time": "1h 57m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78207} {"loss": 0.36670741, "grad_norm": 2.59128857, "learning_rate": 7.326e-05, "token_acc": 0.88169869, "epoch": 3.78740157, "global_step/max_steps": "3367/8890", "percentage": "37.87%", "elapsed_time": "1h 11m 45s", "remaining_time": "1h 57m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782097} {"loss": 0.41274714, "grad_norm": 2.50340557, "learning_rate": 7.324e-05, "token_acc": 0.87245445, "epoch": 3.78852643, "global_step/max_steps": "3368/8890", "percentage": "37.89%", "elapsed_time": "1h 11m 46s", "remaining_time": "1h 57m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78212} {"loss": 0.32885599, "grad_norm": 2.77600527, "learning_rate": 7.322e-05, "token_acc": 0.89558233, "epoch": 3.78965129, "global_step/max_steps": "3369/8890", "percentage": "37.90%", "elapsed_time": "1h 11m 47s", "remaining_time": "1h 57m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782143} {"loss": 0.38411942, "grad_norm": 2.53141427, "learning_rate": 7.321e-05, "token_acc": 0.86834171, "epoch": 3.79077615, "global_step/max_steps": "3370/8890", "percentage": "37.91%", "elapsed_time": "1h 11m 48s", "remaining_time": "1h 57m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78215} {"loss": 0.40819019, "grad_norm": 3.09914088, "learning_rate": 7.319e-05, "token_acc": 0.86535764, "epoch": 3.79190101, "global_step/max_steps": "3371/8890", "percentage": "37.92%", "elapsed_time": "1h 11m 49s", "remaining_time": "1h 57m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782193} {"loss": 0.30249244, "grad_norm": 2.53892803, "learning_rate": 7.317e-05, "token_acc": 0.90199081, "epoch": 3.79302587, "global_step/max_steps": "3372/8890", "percentage": "37.93%", "elapsed_time": "1h 11m 50s", "remaining_time": "1h 57m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782233} {"loss": 0.32476559, "grad_norm": 2.42022562, "learning_rate": 7.316e-05, "token_acc": 0.89132821, "epoch": 3.79415073, "global_step/max_steps": "3373/8890", "percentage": "37.94%", "elapsed_time": "1h 11m 51s", "remaining_time": "1h 57m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782246} {"loss": 0.3483634, "grad_norm": 2.53376269, "learning_rate": 7.314e-05, "token_acc": 0.89016018, "epoch": 3.79527559, "global_step/max_steps": "3374/8890", "percentage": "37.95%", "elapsed_time": "1h 11m 53s", "remaining_time": "1h 57m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782274} {"loss": 0.37886134, "grad_norm": 2.7791667, "learning_rate": 7.313e-05, "token_acc": 0.87533156, "epoch": 3.79640045, "global_step/max_steps": "3375/8890", "percentage": "37.96%", "elapsed_time": "1h 11m 54s", "remaining_time": "1h 57m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782297} {"loss": 0.3964214, "grad_norm": 2.75492954, "learning_rate": 7.311e-05, "token_acc": 0.87593583, "epoch": 3.79752531, "global_step/max_steps": "3376/8890", "percentage": "37.98%", "elapsed_time": "1h 11m 55s", "remaining_time": "1h 57m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782319} {"loss": 0.34467405, "grad_norm": 2.87003851, "learning_rate": 7.309e-05, "token_acc": 0.87931034, "epoch": 3.79865017, "global_step/max_steps": "3377/8890", "percentage": "37.99%", "elapsed_time": "1h 11m 56s", "remaining_time": "1h 57m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782304} {"loss": 0.32490385, "grad_norm": 2.55828285, "learning_rate": 7.308e-05, "token_acc": 0.88590604, "epoch": 3.79977503, "global_step/max_steps": "3378/8890", "percentage": "38.00%", "elapsed_time": "1h 11m 57s", "remaining_time": "1h 57m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78233} {"loss": 0.30210632, "grad_norm": 2.35734439, "learning_rate": 7.306e-05, "token_acc": 0.90382627, "epoch": 3.80089989, "global_step/max_steps": "3379/8890", "percentage": "38.01%", "elapsed_time": "1h 11m 58s", "remaining_time": "1h 57m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78237} {"loss": 0.44423854, "grad_norm": 3.06099796, "learning_rate": 7.304e-05, "token_acc": 0.85180723, "epoch": 3.80202475, "global_step/max_steps": "3380/8890", "percentage": "38.02%", "elapsed_time": "1h 11m 59s", "remaining_time": "1h 57m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782413} {"loss": 0.281434, "grad_norm": 2.60286999, "learning_rate": 7.303e-05, "token_acc": 0.90871935, "epoch": 3.80314961, "global_step/max_steps": "3381/8890", "percentage": "38.03%", "elapsed_time": "1h 12m 1s", "remaining_time": "1h 57m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782454} {"loss": 0.32710108, "grad_norm": 2.7908802, "learning_rate": 7.301e-05, "token_acc": 0.89660266, "epoch": 3.80427447, "global_step/max_steps": "3382/8890", "percentage": "38.04%", "elapsed_time": "1h 12m 2s", "remaining_time": "1h 57m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782494} {"loss": 0.3548283, "grad_norm": 2.42562628, "learning_rate": 7.299e-05, "token_acc": 0.88487805, "epoch": 3.80539933, "global_step/max_steps": "3383/8890", "percentage": "38.05%", "elapsed_time": "1h 12m 3s", "remaining_time": "1h 57m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782522} {"loss": 0.36590195, "grad_norm": 2.7419734, "learning_rate": 7.298e-05, "token_acc": 0.86804657, "epoch": 3.80652418, "global_step/max_steps": "3384/8890", "percentage": "38.07%", "elapsed_time": "1h 12m 4s", "remaining_time": "1h 57m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782569} {"loss": 0.48439497, "grad_norm": 2.34248209, "learning_rate": 7.296e-05, "token_acc": 0.85136078, "epoch": 3.80764904, "global_step/max_steps": "3385/8890", "percentage": "38.08%", "elapsed_time": "1h 12m 5s", "remaining_time": "1h 57m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782565} {"loss": 0.34940755, "grad_norm": 3.15415621, "learning_rate": 7.294e-05, "token_acc": 0.8735119, "epoch": 3.8087739, "global_step/max_steps": "3386/8890", "percentage": "38.09%", "elapsed_time": "1h 12m 6s", "remaining_time": "1h 57m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782602} {"loss": 0.41560334, "grad_norm": 2.82168674, "learning_rate": 7.293e-05, "token_acc": 0.87614679, "epoch": 3.80989876, "global_step/max_steps": "3387/8890", "percentage": "38.10%", "elapsed_time": "1h 12m 7s", "remaining_time": "1h 57m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782626} {"loss": 0.35633451, "grad_norm": 2.24093962, "learning_rate": 7.291e-05, "token_acc": 0.88319672, "epoch": 3.81102362, "global_step/max_steps": "3388/8890", "percentage": "38.11%", "elapsed_time": "1h 12m 8s", "remaining_time": "1h 57m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78265} {"loss": 0.27299333, "grad_norm": 2.44380975, "learning_rate": 7.289e-05, "token_acc": 0.90104773, "epoch": 3.81214848, "global_step/max_steps": "3389/8890", "percentage": "38.12%", "elapsed_time": "1h 12m 10s", "remaining_time": "1h 57m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782672} {"loss": 0.25397956, "grad_norm": 1.98993623, "learning_rate": 7.288e-05, "token_acc": 0.91676867, "epoch": 3.81327334, "global_step/max_steps": "3390/8890", "percentage": "38.13%", "elapsed_time": "1h 12m 11s", "remaining_time": "1h 57m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782701} {"loss": 0.34522301, "grad_norm": 2.39764237, "learning_rate": 7.286e-05, "token_acc": 0.88669439, "epoch": 3.8143982, "global_step/max_steps": "3391/8890", "percentage": "38.14%", "elapsed_time": "1h 12m 12s", "remaining_time": "1h 57m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782723} {"loss": 0.3433111, "grad_norm": 2.42486739, "learning_rate": 7.284e-05, "token_acc": 0.88305253, "epoch": 3.81552306, "global_step/max_steps": "3392/8890", "percentage": "38.16%", "elapsed_time": "1h 12m 13s", "remaining_time": "1h 57m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782748} {"loss": 0.28981787, "grad_norm": 1.97002447, "learning_rate": 7.283e-05, "token_acc": 0.90548204, "epoch": 3.81664792, "global_step/max_steps": "3393/8890", "percentage": "38.17%", "elapsed_time": "1h 12m 14s", "remaining_time": "1h 57m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78275} {"loss": 0.35590136, "grad_norm": 2.677531, "learning_rate": 7.281e-05, "token_acc": 0.87799043, "epoch": 3.81777278, "global_step/max_steps": "3394/8890", "percentage": "38.18%", "elapsed_time": "1h 12m 15s", "remaining_time": "1h 57m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782776} {"loss": 0.3330074, "grad_norm": 2.28914928, "learning_rate": 7.279e-05, "token_acc": 0.89200415, "epoch": 3.81889764, "global_step/max_steps": "3395/8890", "percentage": "38.19%", "elapsed_time": "1h 12m 16s", "remaining_time": "1h 56m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782801} {"loss": 0.36037576, "grad_norm": 2.73059464, "learning_rate": 7.278e-05, "token_acc": 0.86938776, "epoch": 3.8200225, "global_step/max_steps": "3396/8890", "percentage": "38.20%", "elapsed_time": "1h 12m 18s", "remaining_time": "1h 56m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782814} {"loss": 0.31614715, "grad_norm": 2.61696243, "learning_rate": 7.276e-05, "token_acc": 0.8878628, "epoch": 3.82114736, "global_step/max_steps": "3397/8890", "percentage": "38.21%", "elapsed_time": "1h 12m 19s", "remaining_time": "1h 56m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782843} {"loss": 0.29561347, "grad_norm": 2.65465212, "learning_rate": 7.275e-05, "token_acc": 0.89206762, "epoch": 3.82227222, "global_step/max_steps": "3398/8890", "percentage": "38.22%", "elapsed_time": "1h 12m 20s", "remaining_time": "1h 56m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78288} {"loss": 0.38956425, "grad_norm": 2.42113495, "learning_rate": 7.273e-05, "token_acc": 0.88229572, "epoch": 3.82339708, "global_step/max_steps": "3399/8890", "percentage": "38.23%", "elapsed_time": "1h 12m 21s", "remaining_time": "1h 56m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782882} {"loss": 0.45884696, "grad_norm": 2.86153579, "learning_rate": 7.271e-05, "token_acc": 0.85654886, "epoch": 3.82452193, "global_step/max_steps": "3400/8890", "percentage": "38.25%", "elapsed_time": "1h 12m 22s", "remaining_time": "1h 56m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782881} {"loss": 0.35598522, "grad_norm": 2.25587344, "learning_rate": 7.27e-05, "token_acc": 0.88930582, "epoch": 3.82564679, "global_step/max_steps": "3401/8890", "percentage": "38.26%", "elapsed_time": "1h 12m 24s", "remaining_time": "1h 56m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782907} {"loss": 0.31725821, "grad_norm": 3.24849391, "learning_rate": 7.268e-05, "token_acc": 0.89456343, "epoch": 3.82677165, "global_step/max_steps": "3402/8890", "percentage": "38.27%", "elapsed_time": "1h 12m 25s", "remaining_time": "1h 56m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782937} {"loss": 0.29051268, "grad_norm": 2.77843547, "learning_rate": 7.266e-05, "token_acc": 0.90237099, "epoch": 3.82789651, "global_step/max_steps": "3403/8890", "percentage": "38.28%", "elapsed_time": "1h 12m 26s", "remaining_time": "1h 56m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782961} {"loss": 0.35000697, "grad_norm": 2.82582068, "learning_rate": 7.265e-05, "token_acc": 0.89449004, "epoch": 3.82902137, "global_step/max_steps": "3404/8890", "percentage": "38.29%", "elapsed_time": "1h 12m 27s", "remaining_time": "1h 56m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782972} {"loss": 0.33834881, "grad_norm": 2.2160604, "learning_rate": 7.263e-05, "token_acc": 0.90634146, "epoch": 3.83014623, "global_step/max_steps": "3405/8890", "percentage": "38.30%", "elapsed_time": "1h 12m 28s", "remaining_time": "1h 56m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782971} {"loss": 0.40005958, "grad_norm": 2.53901696, "learning_rate": 7.261e-05, "token_acc": 0.87122736, "epoch": 3.83127109, "global_step/max_steps": "3406/8890", "percentage": "38.31%", "elapsed_time": "1h 12m 29s", "remaining_time": "1h 56m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782989} {"loss": 0.26193741, "grad_norm": 2.16980124, "learning_rate": 7.26e-05, "token_acc": 0.9057971, "epoch": 3.83239595, "global_step/max_steps": "3407/8890", "percentage": "38.32%", "elapsed_time": "1h 12m 31s", "remaining_time": "1h 56m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783015} {"loss": 0.38574752, "grad_norm": 2.59836841, "learning_rate": 7.258e-05, "token_acc": 0.87792969, "epoch": 3.83352081, "global_step/max_steps": "3408/8890", "percentage": "38.34%", "elapsed_time": "1h 12m 32s", "remaining_time": "1h 56m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783012} {"loss": 0.37554216, "grad_norm": 2.38057208, "learning_rate": 7.256e-05, "token_acc": 0.8804243, "epoch": 3.83464567, "global_step/max_steps": "3409/8890", "percentage": "38.35%", "elapsed_time": "1h 12m 33s", "remaining_time": "1h 56m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783029} {"loss": 0.38139522, "grad_norm": 2.5261097, "learning_rate": 7.255e-05, "token_acc": 0.87404995, "epoch": 3.83577053, "global_step/max_steps": "3410/8890", "percentage": "38.36%", "elapsed_time": "1h 12m 34s", "remaining_time": "1h 56m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783055} {"loss": 0.38244796, "grad_norm": 3.00618148, "learning_rate": 7.253e-05, "token_acc": 0.86768448, "epoch": 3.83689539, "global_step/max_steps": "3411/8890", "percentage": "38.37%", "elapsed_time": "1h 12m 35s", "remaining_time": "1h 56m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783077} {"loss": 0.36371738, "grad_norm": 2.98516607, "learning_rate": 7.251e-05, "token_acc": 0.87966306, "epoch": 3.83802025, "global_step/max_steps": "3412/8890", "percentage": "38.38%", "elapsed_time": "1h 12m 37s", "remaining_time": "1h 56m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783103} {"loss": 0.31709611, "grad_norm": 3.13516641, "learning_rate": 7.25e-05, "token_acc": 0.89297659, "epoch": 3.83914511, "global_step/max_steps": "3413/8890", "percentage": "38.39%", "elapsed_time": "1h 12m 38s", "remaining_time": "1h 56m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783127} {"loss": 0.33781362, "grad_norm": 2.60630465, "learning_rate": 7.248e-05, "token_acc": 0.89326557, "epoch": 3.84026997, "global_step/max_steps": "3414/8890", "percentage": "38.40%", "elapsed_time": "1h 12m 39s", "remaining_time": "1h 56m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783151} {"loss": 0.34484112, "grad_norm": 2.82310319, "learning_rate": 7.246e-05, "token_acc": 0.87108886, "epoch": 3.84139483, "global_step/max_steps": "3415/8890", "percentage": "38.41%", "elapsed_time": "1h 12m 40s", "remaining_time": "1h 56m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783171} {"loss": 0.36735556, "grad_norm": 2.27346921, "learning_rate": 7.245e-05, "token_acc": 0.87982456, "epoch": 3.84251969, "global_step/max_steps": "3416/8890", "percentage": "38.43%", "elapsed_time": "1h 12m 41s", "remaining_time": "1h 56m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783195} {"loss": 0.29350984, "grad_norm": 2.54645467, "learning_rate": 7.243e-05, "token_acc": 0.9102402, "epoch": 3.84364454, "global_step/max_steps": "3417/8890", "percentage": "38.44%", "elapsed_time": "1h 12m 42s", "remaining_time": "1h 56m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783235} {"loss": 0.35390887, "grad_norm": 2.79260254, "learning_rate": 7.241e-05, "token_acc": 0.88064047, "epoch": 3.8447694, "global_step/max_steps": "3418/8890", "percentage": "38.45%", "elapsed_time": "1h 12m 43s", "remaining_time": "1h 56m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783275} {"loss": 0.30885893, "grad_norm": 2.40559649, "learning_rate": 7.24e-05, "token_acc": 0.91325898, "epoch": 3.84589426, "global_step/max_steps": "3419/8890", "percentage": "38.46%", "elapsed_time": "1h 12m 44s", "remaining_time": "1h 56m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783303} {"loss": 0.33226842, "grad_norm": 2.69394946, "learning_rate": 7.238e-05, "token_acc": 0.89455782, "epoch": 3.84701912, "global_step/max_steps": "3420/8890", "percentage": "38.47%", "elapsed_time": "1h 12m 45s", "remaining_time": "1h 56m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783332} {"loss": 0.35690749, "grad_norm": 2.2014401, "learning_rate": 7.236e-05, "token_acc": 0.88263283, "epoch": 3.84814398, "global_step/max_steps": "3421/8890", "percentage": "38.48%", "elapsed_time": "1h 12m 47s", "remaining_time": "1h 56m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783351} {"loss": 0.2991299, "grad_norm": 4.17640781, "learning_rate": 7.235e-05, "token_acc": 0.8962963, "epoch": 3.84926884, "global_step/max_steps": "3422/8890", "percentage": "38.49%", "elapsed_time": "1h 12m 48s", "remaining_time": "1h 56m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783344} {"loss": 0.44914061, "grad_norm": 2.71670079, "learning_rate": 7.233e-05, "token_acc": 0.85155196, "epoch": 3.8503937, "global_step/max_steps": "3423/8890", "percentage": "38.50%", "elapsed_time": "1h 12m 49s", "remaining_time": "1h 56m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78337} {"loss": 0.30716696, "grad_norm": 2.33869195, "learning_rate": 7.231e-05, "token_acc": 0.88983957, "epoch": 3.85151856, "global_step/max_steps": "3424/8890", "percentage": "38.52%", "elapsed_time": "1h 12m 50s", "remaining_time": "1h 56m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783389} {"loss": 0.3493776, "grad_norm": 2.32035303, "learning_rate": 7.23e-05, "token_acc": 0.88839681, "epoch": 3.85264342, "global_step/max_steps": "3425/8890", "percentage": "38.53%", "elapsed_time": "1h 12m 51s", "remaining_time": "1h 56m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783413} {"loss": 0.23428336, "grad_norm": 2.36195707, "learning_rate": 7.228e-05, "token_acc": 0.92914654, "epoch": 3.85376828, "global_step/max_steps": "3426/8890", "percentage": "38.54%", "elapsed_time": "1h 12m 52s", "remaining_time": "1h 56m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78346} {"loss": 0.25508392, "grad_norm": 2.34281993, "learning_rate": 7.226e-05, "token_acc": 0.91740413, "epoch": 3.85489314, "global_step/max_steps": "3427/8890", "percentage": "38.55%", "elapsed_time": "1h 12m 54s", "remaining_time": "1h 56m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783482} {"loss": 0.3126536, "grad_norm": 2.17947555, "learning_rate": 7.225e-05, "token_acc": 0.88732394, "epoch": 3.856018, "global_step/max_steps": "3428/8890", "percentage": "38.56%", "elapsed_time": "1h 12m 55s", "remaining_time": "1h 56m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783482} {"loss": 0.42045715, "grad_norm": 2.55070281, "learning_rate": 7.223e-05, "token_acc": 0.86567164, "epoch": 3.85714286, "global_step/max_steps": "3429/8890", "percentage": "38.57%", "elapsed_time": "1h 12m 56s", "remaining_time": "1h 56m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783506} {"loss": 0.43773925, "grad_norm": 3.51734066, "learning_rate": 7.221e-05, "token_acc": 0.85185185, "epoch": 3.85826772, "global_step/max_steps": "3430/8890", "percentage": "38.58%", "elapsed_time": "1h 12m 57s", "remaining_time": "1h 56m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783535} {"loss": 0.30238241, "grad_norm": 2.31956816, "learning_rate": 7.22e-05, "token_acc": 0.9005848, "epoch": 3.85939258, "global_step/max_steps": "3431/8890", "percentage": "38.59%", "elapsed_time": "1h 12m 58s", "remaining_time": "1h 56m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78358} {"loss": 0.34679073, "grad_norm": 2.13145638, "learning_rate": 7.218e-05, "token_acc": 0.88691983, "epoch": 3.86051744, "global_step/max_steps": "3432/8890", "percentage": "38.61%", "elapsed_time": "1h 12m 59s", "remaining_time": "1h 56m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783605} {"loss": 0.33156288, "grad_norm": 2.60767198, "learning_rate": 7.216e-05, "token_acc": 0.88681758, "epoch": 3.86164229, "global_step/max_steps": "3433/8890", "percentage": "38.62%", "elapsed_time": "1h 13m 0s", "remaining_time": "1h 56m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783654} {"loss": 0.28364795, "grad_norm": 2.40224934, "learning_rate": 7.215e-05, "token_acc": 0.9015544, "epoch": 3.86276715, "global_step/max_steps": "3434/8890", "percentage": "38.63%", "elapsed_time": "1h 13m 1s", "remaining_time": "1h 56m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783679} {"loss": 0.39887655, "grad_norm": 2.46610165, "learning_rate": 7.213e-05, "token_acc": 0.88516746, "epoch": 3.86389201, "global_step/max_steps": "3435/8890", "percentage": "38.64%", "elapsed_time": "1h 13m 2s", "remaining_time": "1h 56m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78372} {"loss": 0.31412882, "grad_norm": 2.58933997, "learning_rate": 7.211e-05, "token_acc": 0.89968153, "epoch": 3.86501687, "global_step/max_steps": "3436/8890", "percentage": "38.65%", "elapsed_time": "1h 13m 3s", "remaining_time": "1h 55m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783765} {"loss": 0.28880054, "grad_norm": 1.95100939, "learning_rate": 7.21e-05, "token_acc": 0.89925681, "epoch": 3.86614173, "global_step/max_steps": "3437/8890", "percentage": "38.66%", "elapsed_time": "1h 13m 5s", "remaining_time": "1h 55m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783756} {"loss": 0.37989694, "grad_norm": 2.6996913, "learning_rate": 7.208e-05, "token_acc": 0.88078818, "epoch": 3.86726659, "global_step/max_steps": "3438/8890", "percentage": "38.67%", "elapsed_time": "1h 13m 6s", "remaining_time": "1h 55m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783779} {"loss": 0.34151492, "grad_norm": 2.50344825, "learning_rate": 7.206e-05, "token_acc": 0.90122087, "epoch": 3.86839145, "global_step/max_steps": "3439/8890", "percentage": "38.68%", "elapsed_time": "1h 13m 7s", "remaining_time": "1h 55m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783823} {"loss": 0.37648928, "grad_norm": 2.95376492, "learning_rate": 7.205e-05, "token_acc": 0.8920765, "epoch": 3.86951631, "global_step/max_steps": "3440/8890", "percentage": "38.70%", "elapsed_time": "1h 13m 8s", "remaining_time": "1h 55m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783844} {"loss": 0.37376767, "grad_norm": 2.74142814, "learning_rate": 7.203e-05, "token_acc": 0.87008734, "epoch": 3.87064117, "global_step/max_steps": "3441/8890", "percentage": "38.71%", "elapsed_time": "1h 13m 9s", "remaining_time": "1h 55m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783845} {"loss": 0.36117011, "grad_norm": 2.42369723, "learning_rate": 7.201e-05, "token_acc": 0.87571157, "epoch": 3.87176603, "global_step/max_steps": "3442/8890", "percentage": "38.72%", "elapsed_time": "1h 13m 11s", "remaining_time": "1h 55m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783871} {"loss": 0.35903481, "grad_norm": 2.25282335, "learning_rate": 7.2e-05, "token_acc": 0.88153998, "epoch": 3.87289089, "global_step/max_steps": "3443/8890", "percentage": "38.73%", "elapsed_time": "1h 13m 12s", "remaining_time": "1h 55m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783895} {"loss": 0.38070434, "grad_norm": 2.82183528, "learning_rate": 7.198e-05, "token_acc": 0.87419769, "epoch": 3.87401575, "global_step/max_steps": "3444/8890", "percentage": "38.74%", "elapsed_time": "1h 13m 13s", "remaining_time": "1h 55m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783918} {"loss": 0.33480138, "grad_norm": 2.41826725, "learning_rate": 7.196e-05, "token_acc": 0.8970276, "epoch": 3.87514061, "global_step/max_steps": "3445/8890", "percentage": "38.75%", "elapsed_time": "1h 13m 14s", "remaining_time": "1h 55m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783938} {"loss": 0.35697508, "grad_norm": 2.47585177, "learning_rate": 7.195e-05, "token_acc": 0.89466667, "epoch": 3.87626547, "global_step/max_steps": "3446/8890", "percentage": "38.76%", "elapsed_time": "1h 13m 15s", "remaining_time": "1h 55m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783962} {"loss": 0.37154841, "grad_norm": 2.46464992, "learning_rate": 7.193e-05, "token_acc": 0.89052632, "epoch": 3.87739033, "global_step/max_steps": "3447/8890", "percentage": "38.77%", "elapsed_time": "1h 13m 16s", "remaining_time": "1h 55m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783988} {"loss": 0.25862789, "grad_norm": 2.61418247, "learning_rate": 7.191e-05, "token_acc": 0.91937765, "epoch": 3.87851519, "global_step/max_steps": "3448/8890", "percentage": "38.79%", "elapsed_time": "1h 13m 17s", "remaining_time": "1h 55m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78401} {"loss": 0.48043576, "grad_norm": 2.16173148, "learning_rate": 7.19e-05, "token_acc": 0.85480706, "epoch": 3.87964004, "global_step/max_steps": "3449/8890", "percentage": "38.80%", "elapsed_time": "1h 13m 19s", "remaining_time": "1h 55m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78399} {"loss": 0.41501522, "grad_norm": 3.14663529, "learning_rate": 7.188e-05, "token_acc": 0.87140696, "epoch": 3.8807649, "global_step/max_steps": "3450/8890", "percentage": "38.81%", "elapsed_time": "1h 13m 20s", "remaining_time": "1h 55m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784049} {"loss": 0.41909596, "grad_norm": 2.50151372, "learning_rate": 7.186e-05, "token_acc": 0.86901961, "epoch": 3.88188976, "global_step/max_steps": "3451/8890", "percentage": "38.82%", "elapsed_time": "1h 13m 21s", "remaining_time": "1h 55m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784043} {"loss": 0.38063183, "grad_norm": 2.73629022, "learning_rate": 7.185e-05, "token_acc": 0.88574538, "epoch": 3.88301462, "global_step/max_steps": "3452/8890", "percentage": "38.83%", "elapsed_time": "1h 13m 22s", "remaining_time": "1h 55m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784084} {"loss": 0.40020615, "grad_norm": 2.87389088, "learning_rate": 7.183e-05, "token_acc": 0.86724566, "epoch": 3.88413948, "global_step/max_steps": "3453/8890", "percentage": "38.84%", "elapsed_time": "1h 13m 23s", "remaining_time": "1h 55m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784108} {"loss": 0.40885347, "grad_norm": 2.7918222, "learning_rate": 7.181e-05, "token_acc": 0.86623517, "epoch": 3.88526434, "global_step/max_steps": "3454/8890", "percentage": "38.85%", "elapsed_time": "1h 13m 24s", "remaining_time": "1h 55m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784129} {"loss": 0.41440892, "grad_norm": 2.61396217, "learning_rate": 7.18e-05, "token_acc": 0.88630491, "epoch": 3.8863892, "global_step/max_steps": "3455/8890", "percentage": "38.86%", "elapsed_time": "1h 13m 26s", "remaining_time": "1h 55m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784154} {"loss": 0.32698324, "grad_norm": 2.46184802, "learning_rate": 7.178e-05, "token_acc": 0.8788501, "epoch": 3.88751406, "global_step/max_steps": "3456/8890", "percentage": "38.88%", "elapsed_time": "1h 13m 27s", "remaining_time": "1h 55m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784177} {"loss": 0.28182894, "grad_norm": 2.23458958, "learning_rate": 7.176e-05, "token_acc": 0.90401786, "epoch": 3.88863892, "global_step/max_steps": "3457/8890", "percentage": "38.89%", "elapsed_time": "1h 13m 28s", "remaining_time": "1h 55m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784203} {"loss": 0.28896034, "grad_norm": 2.75088, "learning_rate": 7.175e-05, "token_acc": 0.90509915, "epoch": 3.88976378, "global_step/max_steps": "3458/8890", "percentage": "38.90%", "elapsed_time": "1h 13m 29s", "remaining_time": "1h 55m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784227} {"loss": 0.25883365, "grad_norm": 2.3581531, "learning_rate": 7.173e-05, "token_acc": 0.91389914, "epoch": 3.89088864, "global_step/max_steps": "3459/8890", "percentage": "38.91%", "elapsed_time": "1h 13m 30s", "remaining_time": "1h 55m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784243} {"loss": 0.35964084, "grad_norm": 2.70405245, "learning_rate": 7.171e-05, "token_acc": 0.87543253, "epoch": 3.8920135, "global_step/max_steps": "3460/8890", "percentage": "38.92%", "elapsed_time": "1h 13m 31s", "remaining_time": "1h 55m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784282} {"loss": 0.39674792, "grad_norm": 2.58449221, "learning_rate": 7.17e-05, "token_acc": 0.89254109, "epoch": 3.89313836, "global_step/max_steps": "3461/8890", "percentage": "38.93%", "elapsed_time": "1h 13m 32s", "remaining_time": "1h 55m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784306} {"loss": 0.35110873, "grad_norm": 2.69224358, "learning_rate": 7.168e-05, "token_acc": 0.88227848, "epoch": 3.89426322, "global_step/max_steps": "3462/8890", "percentage": "38.94%", "elapsed_time": "1h 13m 33s", "remaining_time": "1h 55m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784331} {"loss": 0.35808492, "grad_norm": 2.25355434, "learning_rate": 7.166e-05, "token_acc": 0.87735078, "epoch": 3.89538808, "global_step/max_steps": "3463/8890", "percentage": "38.95%", "elapsed_time": "1h 13m 35s", "remaining_time": "1h 55m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784353} {"loss": 0.33627209, "grad_norm": 2.79585409, "learning_rate": 7.165e-05, "token_acc": 0.89454545, "epoch": 3.89651294, "global_step/max_steps": "3464/8890", "percentage": "38.97%", "elapsed_time": "1h 13m 36s", "remaining_time": "1h 55m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784396} {"loss": 0.3075372, "grad_norm": 2.75541115, "learning_rate": 7.163e-05, "token_acc": 0.90510949, "epoch": 3.8976378, "global_step/max_steps": "3465/8890", "percentage": "38.98%", "elapsed_time": "1h 13m 37s", "remaining_time": "1h 55m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784419} {"loss": 0.33376968, "grad_norm": 2.37870955, "learning_rate": 7.161e-05, "token_acc": 0.88781431, "epoch": 3.89876265, "global_step/max_steps": "3466/8890", "percentage": "38.99%", "elapsed_time": "1h 13m 38s", "remaining_time": "1h 55m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78442} {"loss": 0.35796058, "grad_norm": 3.17810631, "learning_rate": 7.159e-05, "token_acc": 0.8822606, "epoch": 3.89988751, "global_step/max_steps": "3467/8890", "percentage": "39.00%", "elapsed_time": "1h 13m 39s", "remaining_time": "1h 55m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784447} {"loss": 0.22398847, "grad_norm": 2.03939819, "learning_rate": 7.158e-05, "token_acc": 0.92987512, "epoch": 3.90101237, "global_step/max_steps": "3468/8890", "percentage": "39.01%", "elapsed_time": "1h 13m 40s", "remaining_time": "1h 55m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784473} {"loss": 0.41400349, "grad_norm": 3.0926497, "learning_rate": 7.156e-05, "token_acc": 0.864682, "epoch": 3.90213723, "global_step/max_steps": "3469/8890", "percentage": "39.02%", "elapsed_time": "1h 13m 41s", "remaining_time": "1h 55m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784495} {"loss": 0.33829254, "grad_norm": 2.83451581, "learning_rate": 7.154e-05, "token_acc": 0.8916129, "epoch": 3.90326209, "global_step/max_steps": "3470/8890", "percentage": "39.03%", "elapsed_time": "1h 13m 43s", "remaining_time": "1h 55m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784535} {"loss": 0.2804375, "grad_norm": 2.61896968, "learning_rate": 7.153e-05, "token_acc": 0.90463215, "epoch": 3.90438695, "global_step/max_steps": "3471/8890", "percentage": "39.04%", "elapsed_time": "1h 13m 44s", "remaining_time": "1h 55m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784563} {"loss": 0.36577225, "grad_norm": 2.91383839, "learning_rate": 7.151e-05, "token_acc": 0.88686131, "epoch": 3.90551181, "global_step/max_steps": "3472/8890", "percentage": "39.06%", "elapsed_time": "1h 13m 45s", "remaining_time": "1h 55m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784585} {"loss": 0.34871981, "grad_norm": 2.81879401, "learning_rate": 7.149e-05, "token_acc": 0.88888889, "epoch": 3.90663667, "global_step/max_steps": "3473/8890", "percentage": "39.07%", "elapsed_time": "1h 13m 46s", "remaining_time": "1h 55m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784625} {"loss": 0.33221698, "grad_norm": 2.77487564, "learning_rate": 7.148e-05, "token_acc": 0.89281211, "epoch": 3.90776153, "global_step/max_steps": "3474/8890", "percentage": "39.08%", "elapsed_time": "1h 13m 47s", "remaining_time": "1h 55m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78465} {"loss": 0.30487901, "grad_norm": 2.60384583, "learning_rate": 7.146e-05, "token_acc": 0.90371389, "epoch": 3.90888639, "global_step/max_steps": "3475/8890", "percentage": "39.09%", "elapsed_time": "1h 13m 48s", "remaining_time": "1h 55m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784685} {"loss": 0.33835649, "grad_norm": 2.51404572, "learning_rate": 7.144e-05, "token_acc": 0.88971368, "epoch": 3.91001125, "global_step/max_steps": "3476/8890", "percentage": "39.10%", "elapsed_time": "1h 13m 49s", "remaining_time": "1h 54m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784684} {"loss": 0.35709059, "grad_norm": 2.36298108, "learning_rate": 7.143e-05, "token_acc": 0.89130435, "epoch": 3.91113611, "global_step/max_steps": "3477/8890", "percentage": "39.11%", "elapsed_time": "1h 13m 50s", "remaining_time": "1h 54m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784705} {"loss": 0.32967082, "grad_norm": 2.60889268, "learning_rate": 7.141e-05, "token_acc": 0.89117291, "epoch": 3.91226097, "global_step/max_steps": "3478/8890", "percentage": "39.12%", "elapsed_time": "1h 13m 52s", "remaining_time": "1h 54m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784721} {"loss": 0.33621097, "grad_norm": 2.49062943, "learning_rate": 7.139e-05, "token_acc": 0.88824214, "epoch": 3.91338583, "global_step/max_steps": "3479/8890", "percentage": "39.13%", "elapsed_time": "1h 13m 53s", "remaining_time": "1h 54m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784749} {"loss": 0.34666982, "grad_norm": 2.23342681, "learning_rate": 7.138e-05, "token_acc": 0.88879004, "epoch": 3.91451069, "global_step/max_steps": "3480/8890", "percentage": "39.15%", "elapsed_time": "1h 13m 54s", "remaining_time": "1h 54m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784765} {"loss": 0.35708308, "grad_norm": 2.78451562, "learning_rate": 7.136e-05, "token_acc": 0.87470726, "epoch": 3.91563555, "global_step/max_steps": "3481/8890", "percentage": "39.16%", "elapsed_time": "1h 13m 55s", "remaining_time": "1h 54m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784788} {"loss": 0.40327802, "grad_norm": 2.75765538, "learning_rate": 7.134e-05, "token_acc": 0.86092715, "epoch": 3.9167604, "global_step/max_steps": "3482/8890", "percentage": "39.17%", "elapsed_time": "1h 13m 56s", "remaining_time": "1h 54m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784813} {"loss": 0.4038527, "grad_norm": 2.70506406, "learning_rate": 7.133e-05, "token_acc": 0.86310905, "epoch": 3.91788526, "global_step/max_steps": "3483/8890", "percentage": "39.18%", "elapsed_time": "1h 13m 57s", "remaining_time": "1h 54m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78484} {"loss": 0.31505591, "grad_norm": 2.6725297, "learning_rate": 7.131e-05, "token_acc": 0.89602054, "epoch": 3.91901012, "global_step/max_steps": "3484/8890", "percentage": "39.19%", "elapsed_time": "1h 13m 59s", "remaining_time": "1h 54m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784858} {"loss": 0.34397769, "grad_norm": 2.87412953, "learning_rate": 7.129e-05, "token_acc": 0.88980716, "epoch": 3.92013498, "global_step/max_steps": "3485/8890", "percentage": "39.20%", "elapsed_time": "1h 14m 0s", "remaining_time": "1h 54m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784896} {"loss": 0.36014032, "grad_norm": 3.17259026, "learning_rate": 7.128e-05, "token_acc": 0.88544474, "epoch": 3.92125984, "global_step/max_steps": "3486/8890", "percentage": "39.21%", "elapsed_time": "1h 14m 1s", "remaining_time": "1h 54m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784938} {"loss": 0.33886087, "grad_norm": 2.37750483, "learning_rate": 7.126e-05, "token_acc": 0.8854719, "epoch": 3.9223847, "global_step/max_steps": "3487/8890", "percentage": "39.22%", "elapsed_time": "1h 14m 2s", "remaining_time": "1h 54m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784964} {"loss": 0.26420003, "grad_norm": 2.342237, "learning_rate": 7.124e-05, "token_acc": 0.90784558, "epoch": 3.92350956, "global_step/max_steps": "3488/8890", "percentage": "39.24%", "elapsed_time": "1h 14m 3s", "remaining_time": "1h 54m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784984} {"loss": 0.46170107, "grad_norm": 2.82654834, "learning_rate": 7.122e-05, "token_acc": 0.86012526, "epoch": 3.92463442, "global_step/max_steps": "3489/8890", "percentage": "39.25%", "elapsed_time": "1h 14m 4s", "remaining_time": "1h 54m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785031} {"loss": 0.39305052, "grad_norm": 2.97844315, "learning_rate": 7.121e-05, "token_acc": 0.89114659, "epoch": 3.92575928, "global_step/max_steps": "3490/8890", "percentage": "39.26%", "elapsed_time": "1h 14m 5s", "remaining_time": "1h 54m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785056} {"loss": 0.44266781, "grad_norm": 2.71297455, "learning_rate": 7.119e-05, "token_acc": 0.85925926, "epoch": 3.92688414, "global_step/max_steps": "3491/8890", "percentage": "39.27%", "elapsed_time": "1h 14m 6s", "remaining_time": "1h 54m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785078} {"loss": 0.40098995, "grad_norm": 2.62977195, "learning_rate": 7.117e-05, "token_acc": 0.87383661, "epoch": 3.928009, "global_step/max_steps": "3492/8890", "percentage": "39.28%", "elapsed_time": "1h 14m 7s", "remaining_time": "1h 54m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785099} {"loss": 0.36129558, "grad_norm": 2.58659959, "learning_rate": 7.116e-05, "token_acc": 0.89223058, "epoch": 3.92913386, "global_step/max_steps": "3493/8890", "percentage": "39.29%", "elapsed_time": "1h 14m 8s", "remaining_time": "1h 54m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785138} {"loss": 0.3171559, "grad_norm": 2.92777109, "learning_rate": 7.114e-05, "token_acc": 0.89925373, "epoch": 3.93025872, "global_step/max_steps": "3494/8890", "percentage": "39.30%", "elapsed_time": "1h 14m 9s", "remaining_time": "1h 54m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785178} {"loss": 0.4497228, "grad_norm": 3.04399085, "learning_rate": 7.112e-05, "token_acc": 0.85956965, "epoch": 3.93138358, "global_step/max_steps": "3495/8890", "percentage": "39.31%", "elapsed_time": "1h 14m 11s", "remaining_time": "1h 54m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785205} {"loss": 0.39666364, "grad_norm": 2.52345252, "learning_rate": 7.111e-05, "token_acc": 0.87234043, "epoch": 3.93250844, "global_step/max_steps": "3496/8890", "percentage": "39.33%", "elapsed_time": "1h 14m 12s", "remaining_time": "1h 54m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785221} {"loss": 0.2921046, "grad_norm": 2.05815578, "learning_rate": 7.109e-05, "token_acc": 0.9096099, "epoch": 3.9336333, "global_step/max_steps": "3497/8890", "percentage": "39.34%", "elapsed_time": "1h 14m 13s", "remaining_time": "1h 54m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785205} {"loss": 0.35035202, "grad_norm": 1.99944293, "learning_rate": 7.107e-05, "token_acc": 0.89909091, "epoch": 3.93475816, "global_step/max_steps": "3498/8890", "percentage": "39.35%", "elapsed_time": "1h 14m 14s", "remaining_time": "1h 54m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785231} {"loss": 0.46507448, "grad_norm": 2.66462398, "learning_rate": 7.106e-05, "token_acc": 0.85620915, "epoch": 3.93588301, "global_step/max_steps": "3499/8890", "percentage": "39.36%", "elapsed_time": "1h 14m 16s", "remaining_time": "1h 54m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785231} {"loss": 0.29203516, "grad_norm": 2.10927844, "learning_rate": 7.104e-05, "token_acc": 0.90591966, "epoch": 3.93700787, "global_step/max_steps": "3500/8890", "percentage": "39.37%", "elapsed_time": "1h 14m 17s", "remaining_time": "1h 54m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785223} {"loss": 0.33083844, "grad_norm": 2.63779712, "learning_rate": 7.102e-05, "token_acc": 0.8746594, "epoch": 3.93813273, "global_step/max_steps": "3501/8890", "percentage": "39.38%", "elapsed_time": "1h 14m 18s", "remaining_time": "1h 54m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785249} {"loss": 0.36691487, "grad_norm": 2.51846862, "learning_rate": 7.101e-05, "token_acc": 0.8716707, "epoch": 3.93925759, "global_step/max_steps": "3502/8890", "percentage": "39.39%", "elapsed_time": "1h 14m 19s", "remaining_time": "1h 54m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785272} {"loss": 0.36602086, "grad_norm": 2.47055697, "learning_rate": 7.099e-05, "token_acc": 0.89285714, "epoch": 3.94038245, "global_step/max_steps": "3503/8890", "percentage": "39.40%", "elapsed_time": "1h 14m 20s", "remaining_time": "1h 54m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785354} {"loss": 0.27362216, "grad_norm": 2.40816855, "learning_rate": 7.097e-05, "token_acc": 0.91416894, "epoch": 3.94150731, "global_step/max_steps": "3504/8890", "percentage": "39.42%", "elapsed_time": "1h 14m 22s", "remaining_time": "1h 54m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785259} {"loss": 0.27986786, "grad_norm": 2.27871752, "learning_rate": 7.096e-05, "token_acc": 0.90702948, "epoch": 3.94263217, "global_step/max_steps": "3505/8890", "percentage": "39.43%", "elapsed_time": "1h 14m 23s", "remaining_time": "1h 54m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785261} {"loss": 0.4024514, "grad_norm": 2.42340803, "learning_rate": 7.094e-05, "token_acc": 0.87377691, "epoch": 3.94375703, "global_step/max_steps": "3506/8890", "percentage": "39.44%", "elapsed_time": "1h 14m 24s", "remaining_time": "1h 54m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785283} {"loss": 0.37118602, "grad_norm": 2.38655376, "learning_rate": 7.092e-05, "token_acc": 0.88715953, "epoch": 3.94488189, "global_step/max_steps": "3507/8890", "percentage": "39.45%", "elapsed_time": "1h 14m 25s", "remaining_time": "1h 54m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785305} {"loss": 0.35277992, "grad_norm": 2.33581352, "learning_rate": 7.09e-05, "token_acc": 0.89289872, "epoch": 3.94600675, "global_step/max_steps": "3508/8890", "percentage": "39.46%", "elapsed_time": "1h 14m 26s", "remaining_time": "1h 54m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785322} {"loss": 0.38140377, "grad_norm": 2.63485193, "learning_rate": 7.089e-05, "token_acc": 0.86930456, "epoch": 3.94713161, "global_step/max_steps": "3509/8890", "percentage": "39.47%", "elapsed_time": "1h 14m 28s", "remaining_time": "1h 54m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785344} {"loss": 0.29155049, "grad_norm": 2.43506455, "learning_rate": 7.087e-05, "token_acc": 0.90547264, "epoch": 3.94825647, "global_step/max_steps": "3510/8890", "percentage": "39.48%", "elapsed_time": "1h 14m 29s", "remaining_time": "1h 54m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785379} {"loss": 0.34706023, "grad_norm": 2.90557885, "learning_rate": 7.085e-05, "token_acc": 0.88639551, "epoch": 3.94938133, "global_step/max_steps": "3511/8890", "percentage": "39.49%", "elapsed_time": "1h 14m 30s", "remaining_time": "1h 54m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785422} {"loss": 0.35157982, "grad_norm": 2.91113591, "learning_rate": 7.084e-05, "token_acc": 0.89014085, "epoch": 3.95050619, "global_step/max_steps": "3512/8890", "percentage": "39.51%", "elapsed_time": "1h 14m 31s", "remaining_time": "1h 54m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785463} {"loss": 0.37763104, "grad_norm": 2.87442064, "learning_rate": 7.082e-05, "token_acc": 0.87749667, "epoch": 3.95163105, "global_step/max_steps": "3513/8890", "percentage": "39.52%", "elapsed_time": "1h 14m 32s", "remaining_time": "1h 54m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785519} {"loss": 0.41750747, "grad_norm": 3.30824995, "learning_rate": 7.08e-05, "token_acc": 0.86494689, "epoch": 3.95275591, "global_step/max_steps": "3514/8890", "percentage": "39.53%", "elapsed_time": "1h 14m 33s", "remaining_time": "1h 54m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78554} {"loss": 0.40509564, "grad_norm": 2.58142304, "learning_rate": 7.079e-05, "token_acc": 0.86481802, "epoch": 3.95388076, "global_step/max_steps": "3515/8890", "percentage": "39.54%", "elapsed_time": "1h 14m 34s", "remaining_time": "1h 54m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785538} {"loss": 0.3448202, "grad_norm": 2.78184533, "learning_rate": 7.077e-05, "token_acc": 0.87377451, "epoch": 3.95500562, "global_step/max_steps": "3516/8890", "percentage": "39.55%", "elapsed_time": "1h 14m 35s", "remaining_time": "1h 54m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785561} {"loss": 0.30438343, "grad_norm": 2.73232293, "learning_rate": 7.075e-05, "token_acc": 0.896, "epoch": 3.95613048, "global_step/max_steps": "3517/8890", "percentage": "39.56%", "elapsed_time": "1h 14m 36s", "remaining_time": "1h 53m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785618} {"loss": 0.33852482, "grad_norm": 2.29237962, "learning_rate": 7.074e-05, "token_acc": 0.89199492, "epoch": 3.95725534, "global_step/max_steps": "3518/8890", "percentage": "39.57%", "elapsed_time": "1h 14m 37s", "remaining_time": "1h 53m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78564} {"loss": 0.4120698, "grad_norm": 2.48181558, "learning_rate": 7.072e-05, "token_acc": 0.87740628, "epoch": 3.9583802, "global_step/max_steps": "3519/8890", "percentage": "39.58%", "elapsed_time": "1h 14m 39s", "remaining_time": "1h 53m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785662} {"loss": 0.43072522, "grad_norm": 2.62634254, "learning_rate": 7.07e-05, "token_acc": 0.87546239, "epoch": 3.95950506, "global_step/max_steps": "3520/8890", "percentage": "39.60%", "elapsed_time": "1h 14m 40s", "remaining_time": "1h 53m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785702} {"loss": 0.28911096, "grad_norm": 2.3955934, "learning_rate": 7.068e-05, "token_acc": 0.89762797, "epoch": 3.96062992, "global_step/max_steps": "3521/8890", "percentage": "39.61%", "elapsed_time": "1h 14m 41s", "remaining_time": "1h 53m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785728} {"loss": 0.26518679, "grad_norm": 2.47153878, "learning_rate": 7.067e-05, "token_acc": 0.91404011, "epoch": 3.96175478, "global_step/max_steps": "3522/8890", "percentage": "39.62%", "elapsed_time": "1h 14m 42s", "remaining_time": "1h 53m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785752} {"loss": 0.36007452, "grad_norm": 2.14909649, "learning_rate": 7.065e-05, "token_acc": 0.89121756, "epoch": 3.96287964, "global_step/max_steps": "3523/8890", "percentage": "39.63%", "elapsed_time": "1h 14m 43s", "remaining_time": "1h 53m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78577} {"loss": 0.31865129, "grad_norm": 2.57547832, "learning_rate": 7.063e-05, "token_acc": 0.88848039, "epoch": 3.9640045, "global_step/max_steps": "3524/8890", "percentage": "39.64%", "elapsed_time": "1h 14m 44s", "remaining_time": "1h 53m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785788} {"loss": 0.31596497, "grad_norm": 2.73537016, "learning_rate": 7.062e-05, "token_acc": 0.88306943, "epoch": 3.96512936, "global_step/max_steps": "3525/8890", "percentage": "39.65%", "elapsed_time": "1h 14m 45s", "remaining_time": "1h 53m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785811} {"loss": 0.45898098, "grad_norm": 2.44821692, "learning_rate": 7.06e-05, "token_acc": 0.8554007, "epoch": 3.96625422, "global_step/max_steps": "3526/8890", "percentage": "39.66%", "elapsed_time": "1h 14m 47s", "remaining_time": "1h 53m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785809} {"loss": 0.38740492, "grad_norm": 2.2705698, "learning_rate": 7.058e-05, "token_acc": 0.87478849, "epoch": 3.96737908, "global_step/max_steps": "3527/8890", "percentage": "39.67%", "elapsed_time": "1h 14m 48s", "remaining_time": "1h 53m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785847} {"loss": 0.34495163, "grad_norm": 2.60521197, "learning_rate": 7.057e-05, "token_acc": 0.88322981, "epoch": 3.96850394, "global_step/max_steps": "3528/8890", "percentage": "39.69%", "elapsed_time": "1h 14m 49s", "remaining_time": "1h 53m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785871} {"loss": 0.37857714, "grad_norm": 2.56258106, "learning_rate": 7.055e-05, "token_acc": 0.88263473, "epoch": 3.9696288, "global_step/max_steps": "3529/8890", "percentage": "39.70%", "elapsed_time": "1h 14m 51s", "remaining_time": "1h 53m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785774} {"loss": 0.30740386, "grad_norm": 2.434659, "learning_rate": 7.053e-05, "token_acc": 0.89243499, "epoch": 3.97075366, "global_step/max_steps": "3530/8890", "percentage": "39.71%", "elapsed_time": "1h 14m 52s", "remaining_time": "1h 53m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785797} {"loss": 0.30315545, "grad_norm": 1.95652318, "learning_rate": 7.052e-05, "token_acc": 0.9, "epoch": 3.97187852, "global_step/max_steps": "3531/8890", "percentage": "39.72%", "elapsed_time": "1h 14m 53s", "remaining_time": "1h 53m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785823} {"loss": 0.39688274, "grad_norm": 2.78017497, "learning_rate": 7.05e-05, "token_acc": 0.86856517, "epoch": 3.97300337, "global_step/max_steps": "3532/8890", "percentage": "39.73%", "elapsed_time": "1h 14m 54s", "remaining_time": "1h 53m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785863} {"loss": 0.28879941, "grad_norm": 2.44549513, "learning_rate": 7.048e-05, "token_acc": 0.90470446, "epoch": 3.97412823, "global_step/max_steps": "3533/8890", "percentage": "39.74%", "elapsed_time": "1h 14m 55s", "remaining_time": "1h 53m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785863} {"loss": 0.30246979, "grad_norm": 2.68881559, "learning_rate": 7.046e-05, "token_acc": 0.9015544, "epoch": 3.97525309, "global_step/max_steps": "3534/8890", "percentage": "39.75%", "elapsed_time": "1h 14m 56s", "remaining_time": "1h 53m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78591} {"loss": 0.40617317, "grad_norm": 2.21317649, "learning_rate": 7.045e-05, "token_acc": 0.87436762, "epoch": 3.97637795, "global_step/max_steps": "3535/8890", "percentage": "39.76%", "elapsed_time": "1h 14m 58s", "remaining_time": "1h 53m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785881} {"loss": 0.38092524, "grad_norm": 2.72712159, "learning_rate": 7.043e-05, "token_acc": 0.87356322, "epoch": 3.97750281, "global_step/max_steps": "3536/8890", "percentage": "39.78%", "elapsed_time": "1h 14m 59s", "remaining_time": "1h 53m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785909} {"loss": 0.31013465, "grad_norm": 2.31359959, "learning_rate": 7.041e-05, "token_acc": 0.90097933, "epoch": 3.97862767, "global_step/max_steps": "3537/8890", "percentage": "39.79%", "elapsed_time": "1h 15m 0s", "remaining_time": "1h 53m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785926} {"loss": 0.22172911, "grad_norm": 1.7627846, "learning_rate": 7.04e-05, "token_acc": 0.92940039, "epoch": 3.97975253, "global_step/max_steps": "3538/8890", "percentage": "39.80%", "elapsed_time": "1h 15m 1s", "remaining_time": "1h 53m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785949} {"loss": 0.30490285, "grad_norm": 2.65038943, "learning_rate": 7.038e-05, "token_acc": 0.89248895, "epoch": 3.98087739, "global_step/max_steps": "3539/8890", "percentage": "39.81%", "elapsed_time": "1h 15m 2s", "remaining_time": "1h 53m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785974} {"loss": 0.41230458, "grad_norm": 2.7723124, "learning_rate": 7.036e-05, "token_acc": 0.86613387, "epoch": 3.98200225, "global_step/max_steps": "3540/8890", "percentage": "39.82%", "elapsed_time": "1h 15m 3s", "remaining_time": "1h 53m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785995} {"loss": 0.36247438, "grad_norm": 2.59536958, "learning_rate": 7.035e-05, "token_acc": 0.89226869, "epoch": 3.98312711, "global_step/max_steps": "3541/8890", "percentage": "39.83%", "elapsed_time": "1h 15m 5s", "remaining_time": "1h 53m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786014} {"loss": 0.32397377, "grad_norm": 2.77366519, "learning_rate": 7.033e-05, "token_acc": 0.88010204, "epoch": 3.98425197, "global_step/max_steps": "3542/8890", "percentage": "39.84%", "elapsed_time": "1h 15m 6s", "remaining_time": "1h 53m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786062} {"loss": 0.39535332, "grad_norm": 2.64656091, "learning_rate": 7.031e-05, "token_acc": 0.868676, "epoch": 3.98537683, "global_step/max_steps": "3543/8890", "percentage": "39.85%", "elapsed_time": "1h 15m 7s", "remaining_time": "1h 53m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786061} {"loss": 0.41160446, "grad_norm": 2.77793193, "learning_rate": 7.029e-05, "token_acc": 0.87580994, "epoch": 3.98650169, "global_step/max_steps": "3544/8890", "percentage": "39.87%", "elapsed_time": "1h 15m 8s", "remaining_time": "1h 53m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786097} {"loss": 0.3904559, "grad_norm": 2.84685612, "learning_rate": 7.028e-05, "token_acc": 0.87139108, "epoch": 3.98762655, "global_step/max_steps": "3545/8890", "percentage": "39.88%", "elapsed_time": "1h 15m 9s", "remaining_time": "1h 53m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786122} {"loss": 0.38707459, "grad_norm": 2.55121017, "learning_rate": 7.026e-05, "token_acc": 0.87305122, "epoch": 3.98875141, "global_step/max_steps": "3546/8890", "percentage": "39.89%", "elapsed_time": "1h 15m 10s", "remaining_time": "1h 53m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786149} {"loss": 0.36758071, "grad_norm": 2.55040383, "learning_rate": 7.024e-05, "token_acc": 0.88597285, "epoch": 3.98987627, "global_step/max_steps": "3547/8890", "percentage": "39.90%", "elapsed_time": "1h 15m 11s", "remaining_time": "1h 53m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786171} {"loss": 0.38964906, "grad_norm": 2.45323491, "learning_rate": 7.023e-05, "token_acc": 0.88826816, "epoch": 3.99100112, "global_step/max_steps": "3548/8890", "percentage": "39.91%", "elapsed_time": "1h 15m 13s", "remaining_time": "1h 53m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786172} {"loss": 0.35320976, "grad_norm": 2.79277182, "learning_rate": 7.021e-05, "token_acc": 0.8819242, "epoch": 3.99212598, "global_step/max_steps": "3549/8890", "percentage": "39.92%", "elapsed_time": "1h 15m 14s", "remaining_time": "1h 53m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786208} {"loss": 0.36615384, "grad_norm": 2.80073476, "learning_rate": 7.019e-05, "token_acc": 0.87321937, "epoch": 3.99325084, "global_step/max_steps": "3550/8890", "percentage": "39.93%", "elapsed_time": "1h 15m 15s", "remaining_time": "1h 53m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786235} {"loss": 0.40590411, "grad_norm": 2.56219506, "learning_rate": 7.018e-05, "token_acc": 0.86530612, "epoch": 3.9943757, "global_step/max_steps": "3551/8890", "percentage": "39.94%", "elapsed_time": "1h 15m 16s", "remaining_time": "1h 53m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786223} {"loss": 0.33046606, "grad_norm": 2.51408458, "learning_rate": 7.016e-05, "token_acc": 0.90066225, "epoch": 3.99550056, "global_step/max_steps": "3552/8890", "percentage": "39.96%", "elapsed_time": "1h 15m 17s", "remaining_time": "1h 53m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786248} {"loss": 0.41356027, "grad_norm": 2.78724384, "learning_rate": 7.014e-05, "token_acc": 0.85330347, "epoch": 3.99662542, "global_step/max_steps": "3553/8890", "percentage": "39.97%", "elapsed_time": "1h 15m 18s", "remaining_time": "1h 53m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78627} {"loss": 0.3730917, "grad_norm": 2.71587515, "learning_rate": 7.012e-05, "token_acc": 0.87379808, "epoch": 3.99775028, "global_step/max_steps": "3554/8890", "percentage": "39.98%", "elapsed_time": "1h 15m 19s", "remaining_time": "1h 53m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786289} {"loss": 0.37112886, "grad_norm": 2.48494935, "learning_rate": 7.011e-05, "token_acc": 0.8762215, "epoch": 3.99887514, "global_step/max_steps": "3555/8890", "percentage": "39.99%", "elapsed_time": "1h 15m 21s", "remaining_time": "1h 53m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786308} {"loss": 0.32300007, "grad_norm": 2.37807894, "learning_rate": 7.009e-05, "token_acc": 0.89591568, "epoch": 4.0, "global_step/max_steps": "3556/8890", "percentage": "40.00%", "elapsed_time": "1h 15m 22s", "remaining_time": "1h 53m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78634} {"loss": 0.15970975, "grad_norm": 1.8811003, "learning_rate": 7.007e-05, "token_acc": 0.95079787, "epoch": 4.00112486, "global_step/max_steps": "3557/8890", "percentage": "40.01%", "elapsed_time": "1h 15m 24s", "remaining_time": "1h 53m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786243} {"loss": 0.16966346, "grad_norm": 1.70686197, "learning_rate": 7.006e-05, "token_acc": 0.95382883, "epoch": 4.00224972, "global_step/max_steps": "3558/8890", "percentage": "40.02%", "elapsed_time": "1h 15m 25s", "remaining_time": "1h 53m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786267} {"loss": 0.15070602, "grad_norm": 1.19246614, "learning_rate": 7.004e-05, "token_acc": 0.96375099, "epoch": 4.00337458, "global_step/max_steps": "3559/8890", "percentage": "40.03%", "elapsed_time": "1h 15m 26s", "remaining_time": "1h 52m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786313} {"loss": 0.23501232, "grad_norm": 2.43688917, "learning_rate": 7.002e-05, "token_acc": 0.92824427, "epoch": 4.00449944, "global_step/max_steps": "3560/8890", "percentage": "40.04%", "elapsed_time": "1h 15m 27s", "remaining_time": "1h 52m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786349} {"loss": 0.14508559, "grad_norm": 1.68096435, "learning_rate": 7e-05, "token_acc": 0.96194226, "epoch": 4.0056243, "global_step/max_steps": "3561/8890", "percentage": "40.06%", "elapsed_time": "1h 15m 28s", "remaining_time": "1h 52m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786373} {"loss": 0.1955892, "grad_norm": 1.9253056, "learning_rate": 6.999e-05, "token_acc": 0.94194962, "epoch": 4.00674916, "global_step/max_steps": "3562/8890", "percentage": "40.07%", "elapsed_time": "1h 15m 29s", "remaining_time": "1h 52m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786408} {"loss": 0.17547815, "grad_norm": 1.95691335, "learning_rate": 6.997e-05, "token_acc": 0.94706559, "epoch": 4.00787402, "global_step/max_steps": "3563/8890", "percentage": "40.08%", "elapsed_time": "1h 15m 30s", "remaining_time": "1h 52m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786445} {"loss": 0.1291313, "grad_norm": 1.89500594, "learning_rate": 6.995e-05, "token_acc": 0.96166667, "epoch": 4.00899888, "global_step/max_steps": "3564/8890", "percentage": "40.09%", "elapsed_time": "1h 15m 31s", "remaining_time": "1h 52m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78647} {"loss": 0.15875806, "grad_norm": 1.88025415, "learning_rate": 6.994e-05, "token_acc": 0.94835007, "epoch": 4.01012373, "global_step/max_steps": "3565/8890", "percentage": "40.10%", "elapsed_time": "1h 15m 32s", "remaining_time": "1h 52m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786508} {"loss": 0.27619594, "grad_norm": 1.98100674, "learning_rate": 6.992e-05, "token_acc": 0.91710875, "epoch": 4.01124859, "global_step/max_steps": "3566/8890", "percentage": "40.11%", "elapsed_time": "1h 15m 34s", "remaining_time": "1h 52m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786492} {"loss": 0.18466137, "grad_norm": 2.90383554, "learning_rate": 6.99e-05, "token_acc": 0.94444444, "epoch": 4.01237345, "global_step/max_steps": "3567/8890", "percentage": "40.12%", "elapsed_time": "1h 15m 35s", "remaining_time": "1h 52m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786532} {"loss": 0.1485334, "grad_norm": 2.29164124, "learning_rate": 6.989e-05, "token_acc": 0.95363541, "epoch": 4.01349831, "global_step/max_steps": "3568/8890", "percentage": "40.13%", "elapsed_time": "1h 15m 36s", "remaining_time": "1h 52m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786559} {"loss": 0.26840532, "grad_norm": 2.75740051, "learning_rate": 6.987e-05, "token_acc": 0.91689498, "epoch": 4.01462317, "global_step/max_steps": "3569/8890", "percentage": "40.15%", "elapsed_time": "1h 15m 37s", "remaining_time": "1h 52m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786624} {"loss": 0.21954879, "grad_norm": 2.94959211, "learning_rate": 6.985e-05, "token_acc": 0.92885375, "epoch": 4.01574803, "global_step/max_steps": "3570/8890", "percentage": "40.16%", "elapsed_time": "1h 15m 38s", "remaining_time": "1h 52m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78666} {"loss": 0.11626558, "grad_norm": 2.35535884, "learning_rate": 6.983e-05, "token_acc": 0.95630586, "epoch": 4.01687289, "global_step/max_steps": "3571/8890", "percentage": "40.17%", "elapsed_time": "1h 15m 39s", "remaining_time": "1h 52m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786685} {"loss": 0.22470382, "grad_norm": 2.4337604, "learning_rate": 6.982e-05, "token_acc": 0.91789311, "epoch": 4.01799775, "global_step/max_steps": "3572/8890", "percentage": "40.18%", "elapsed_time": "1h 15m 40s", "remaining_time": "1h 52m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78671} {"loss": 0.14526409, "grad_norm": 2.83629012, "learning_rate": 6.98e-05, "token_acc": 0.95534506, "epoch": 4.01912261, "global_step/max_steps": "3573/8890", "percentage": "40.19%", "elapsed_time": "1h 15m 41s", "remaining_time": "1h 52m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78673} {"loss": 0.10901333, "grad_norm": 2.17428279, "learning_rate": 6.978e-05, "token_acc": 0.96494845, "epoch": 4.02024747, "global_step/max_steps": "3574/8890", "percentage": "40.20%", "elapsed_time": "1h 15m 42s", "remaining_time": "1h 52m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786751} {"loss": 0.19222644, "grad_norm": 2.4856205, "learning_rate": 6.977e-05, "token_acc": 0.94254788, "epoch": 4.02137233, "global_step/max_steps": "3575/8890", "percentage": "40.21%", "elapsed_time": "1h 15m 43s", "remaining_time": "1h 52m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786767} {"loss": 0.15952039, "grad_norm": 2.78961778, "learning_rate": 6.975e-05, "token_acc": 0.94405594, "epoch": 4.02249719, "global_step/max_steps": "3576/8890", "percentage": "40.22%", "elapsed_time": "1h 15m 45s", "remaining_time": "1h 52m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786787} {"loss": 0.14931008, "grad_norm": 2.8084681, "learning_rate": 6.973e-05, "token_acc": 0.95633803, "epoch": 4.02362205, "global_step/max_steps": "3577/8890", "percentage": "40.24%", "elapsed_time": "1h 15m 46s", "remaining_time": "1h 52m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786809} {"loss": 0.17234141, "grad_norm": 2.24893785, "learning_rate": 6.971e-05, "token_acc": 0.94933078, "epoch": 4.02474691, "global_step/max_steps": "3578/8890", "percentage": "40.25%", "elapsed_time": "1h 15m 47s", "remaining_time": "1h 52m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786829} {"loss": 0.22951278, "grad_norm": 3.26565719, "learning_rate": 6.97e-05, "token_acc": 0.9229584, "epoch": 4.02587177, "global_step/max_steps": "3579/8890", "percentage": "40.26%", "elapsed_time": "1h 15m 48s", "remaining_time": "1h 52m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786857} {"loss": 0.14276333, "grad_norm": 2.45043778, "learning_rate": 6.968e-05, "token_acc": 0.95417156, "epoch": 4.02699663, "global_step/max_steps": "3580/8890", "percentage": "40.27%", "elapsed_time": "1h 15m 49s", "remaining_time": "1h 52m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786877} {"loss": 0.13607135, "grad_norm": 2.66334152, "learning_rate": 6.966e-05, "token_acc": 0.96018735, "epoch": 4.02812148, "global_step/max_steps": "3581/8890", "percentage": "40.28%", "elapsed_time": "1h 15m 50s", "remaining_time": "1h 52m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786902} {"loss": 0.17004874, "grad_norm": 2.59764957, "learning_rate": 6.965e-05, "token_acc": 0.94556452, "epoch": 4.02924634, "global_step/max_steps": "3582/8890", "percentage": "40.29%", "elapsed_time": "1h 15m 51s", "remaining_time": "1h 52m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786927} {"loss": 0.12934956, "grad_norm": 2.31173635, "learning_rate": 6.963e-05, "token_acc": 0.94565217, "epoch": 4.0303712, "global_step/max_steps": "3583/8890", "percentage": "40.30%", "elapsed_time": "1h 15m 53s", "remaining_time": "1h 52m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786953} {"loss": 0.15042955, "grad_norm": 2.37762666, "learning_rate": 6.961e-05, "token_acc": 0.94509346, "epoch": 4.03149606, "global_step/max_steps": "3584/8890", "percentage": "40.31%", "elapsed_time": "1h 15m 54s", "remaining_time": "1h 52m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786973} {"loss": 0.18267177, "grad_norm": 2.27515459, "learning_rate": 6.96e-05, "token_acc": 0.94641385, "epoch": 4.03262092, "global_step/max_steps": "3585/8890", "percentage": "40.33%", "elapsed_time": "1h 15m 55s", "remaining_time": "1h 52m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786959} {"loss": 0.13414897, "grad_norm": 2.20800757, "learning_rate": 6.958e-05, "token_acc": 0.95095694, "epoch": 4.03374578, "global_step/max_steps": "3586/8890", "percentage": "40.34%", "elapsed_time": "1h 15m 56s", "remaining_time": "1h 52m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786979} {"loss": 0.16864187, "grad_norm": 3.73164511, "learning_rate": 6.956e-05, "token_acc": 0.94166667, "epoch": 4.03487064, "global_step/max_steps": "3587/8890", "percentage": "40.35%", "elapsed_time": "1h 15m 57s", "remaining_time": "1h 52m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786979} {"loss": 0.16241077, "grad_norm": 2.6331563, "learning_rate": 6.954e-05, "token_acc": 0.94954128, "epoch": 4.0359955, "global_step/max_steps": "3588/8890", "percentage": "40.36%", "elapsed_time": "1h 15m 59s", "remaining_time": "1h 52m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787002} {"loss": 0.15368192, "grad_norm": 2.27436709, "learning_rate": 6.953e-05, "token_acc": 0.94533333, "epoch": 4.03712036, "global_step/max_steps": "3589/8890", "percentage": "40.37%", "elapsed_time": "1h 16m 0s", "remaining_time": "1h 52m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787027} {"loss": 0.20416296, "grad_norm": 3.18465066, "learning_rate": 6.951e-05, "token_acc": 0.94, "epoch": 4.03824522, "global_step/max_steps": "3590/8890", "percentage": "40.38%", "elapsed_time": "1h 16m 1s", "remaining_time": "1h 52m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787061} {"loss": 0.1481314, "grad_norm": 2.26368117, "learning_rate": 6.949e-05, "token_acc": 0.94662921, "epoch": 4.03937008, "global_step/max_steps": "3591/8890", "percentage": "40.39%", "elapsed_time": "1h 16m 2s", "remaining_time": "1h 52m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787052} {"loss": 0.21786584, "grad_norm": 2.93411136, "learning_rate": 6.948e-05, "token_acc": 0.94408322, "epoch": 4.04049494, "global_step/max_steps": "3592/8890", "percentage": "40.40%", "elapsed_time": "1h 16m 3s", "remaining_time": "1h 52m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787086} {"loss": 0.20312865, "grad_norm": 2.65181851, "learning_rate": 6.946e-05, "token_acc": 0.93058568, "epoch": 4.0416198, "global_step/max_steps": "3593/8890", "percentage": "40.42%", "elapsed_time": "1h 16m 4s", "remaining_time": "1h 52m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787105} {"loss": 0.16823079, "grad_norm": 2.43204236, "learning_rate": 6.944e-05, "token_acc": 0.94755245, "epoch": 4.04274466, "global_step/max_steps": "3594/8890", "percentage": "40.43%", "elapsed_time": "1h 16m 5s", "remaining_time": "1h 52m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787132} {"loss": 0.16789213, "grad_norm": 2.55771804, "learning_rate": 6.942e-05, "token_acc": 0.9400978, "epoch": 4.04386952, "global_step/max_steps": "3595/8890", "percentage": "40.44%", "elapsed_time": "1h 16m 7s", "remaining_time": "1h 52m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787159} {"loss": 0.20313114, "grad_norm": 2.57601023, "learning_rate": 6.941e-05, "token_acc": 0.93140794, "epoch": 4.04499438, "global_step/max_steps": "3596/8890", "percentage": "40.45%", "elapsed_time": "1h 16m 8s", "remaining_time": "1h 52m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78718} {"loss": 0.17301241, "grad_norm": 2.64113545, "learning_rate": 6.939e-05, "token_acc": 0.95100223, "epoch": 4.04611924, "global_step/max_steps": "3597/8890", "percentage": "40.46%", "elapsed_time": "1h 16m 9s", "remaining_time": "1h 52m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787201} {"loss": 0.19107407, "grad_norm": 3.20188546, "learning_rate": 6.937e-05, "token_acc": 0.94297636, "epoch": 4.04724409, "global_step/max_steps": "3598/8890", "percentage": "40.47%", "elapsed_time": "1h 16m 10s", "remaining_time": "1h 52m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787256} {"loss": 0.11060096, "grad_norm": 1.75029695, "learning_rate": 6.936e-05, "token_acc": 0.96411483, "epoch": 4.04836895, "global_step/max_steps": "3599/8890", "percentage": "40.48%", "elapsed_time": "1h 16m 11s", "remaining_time": "1h 52m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787281} {"loss": 0.15073559, "grad_norm": 2.16202497, "learning_rate": 6.934e-05, "token_acc": 0.95273632, "epoch": 4.04949381, "global_step/max_steps": "3600/8890", "percentage": "40.49%", "elapsed_time": "1h 16m 12s", "remaining_time": "1h 51m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.787298} {"eval_loss": 1.26682007, "eval_runtime": 31.8187, "eval_samples_per_second": 25.237, "eval_steps_per_second": 3.174, "eval_token_acc": 0.73109114, "epoch": 4.04949381, "global_step/max_steps": "3600/8890", "percentage": "40.49%", "elapsed_time": "1h 16m 44s", "remaining_time": "1h 52m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781856} {"loss": 0.20977713, "grad_norm": 2.25066495, "learning_rate": 6.932e-05, "token_acc": 0.92520568, "epoch": 4.05061867, "global_step/max_steps": "3601/8890", "percentage": "40.51%", "elapsed_time": "1h 16m 59s", "remaining_time": "1h 53m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779469} {"loss": 0.13998604, "grad_norm": 2.03784084, "learning_rate": 6.93e-05, "token_acc": 0.95956873, "epoch": 4.05174353, "global_step/max_steps": "3602/8890", "percentage": "40.52%", "elapsed_time": "1h 17m 0s", "remaining_time": "1h 53m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779503} {"loss": 0.13953468, "grad_norm": 2.14951301, "learning_rate": 6.929e-05, "token_acc": 0.96577947, "epoch": 4.05286839, "global_step/max_steps": "3603/8890", "percentage": "40.53%", "elapsed_time": "1h 17m 2s", "remaining_time": "1h 53m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779426} {"loss": 0.18229127, "grad_norm": 2.6287744, "learning_rate": 6.927e-05, "token_acc": 0.93715546, "epoch": 4.05399325, "global_step/max_steps": "3604/8890", "percentage": "40.54%", "elapsed_time": "1h 17m 3s", "remaining_time": "1h 53m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779445} {"loss": 0.15671727, "grad_norm": 2.47633791, "learning_rate": 6.925e-05, "token_acc": 0.95915986, "epoch": 4.05511811, "global_step/max_steps": "3605/8890", "percentage": "40.55%", "elapsed_time": "1h 17m 4s", "remaining_time": "1h 53m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779467} {"loss": 0.17836167, "grad_norm": 2.43795037, "learning_rate": 6.924e-05, "token_acc": 0.93956044, "epoch": 4.05624297, "global_step/max_steps": "3606/8890", "percentage": "40.56%", "elapsed_time": "1h 17m 5s", "remaining_time": "1h 52m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779557} {"loss": 0.2208086, "grad_norm": 2.77077675, "learning_rate": 6.922e-05, "token_acc": 0.93551587, "epoch": 4.05736783, "global_step/max_steps": "3607/8890", "percentage": "40.57%", "elapsed_time": "1h 17m 6s", "remaining_time": "1h 52m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779581} {"loss": 0.16719435, "grad_norm": 2.34831619, "learning_rate": 6.92e-05, "token_acc": 0.9494655, "epoch": 4.05849269, "global_step/max_steps": "3608/8890", "percentage": "40.58%", "elapsed_time": "1h 17m 7s", "remaining_time": "1h 52m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779606} {"loss": 0.1617364, "grad_norm": 2.27607274, "learning_rate": 6.918e-05, "token_acc": 0.94339623, "epoch": 4.05961755, "global_step/max_steps": "3609/8890", "percentage": "40.60%", "elapsed_time": "1h 17m 8s", "remaining_time": "1h 52m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.77966} {"loss": 0.18990549, "grad_norm": 2.77589321, "learning_rate": 6.917e-05, "token_acc": 0.94524496, "epoch": 4.06074241, "global_step/max_steps": "3610/8890", "percentage": "40.61%", "elapsed_time": "1h 17m 9s", "remaining_time": "1h 52m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779717} {"loss": 0.15395944, "grad_norm": 2.20981336, "learning_rate": 6.915e-05, "token_acc": 0.95652174, "epoch": 4.06186727, "global_step/max_steps": "3611/8890", "percentage": "40.62%", "elapsed_time": "1h 17m 11s", "remaining_time": "1h 52m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779735} {"loss": 0.20010994, "grad_norm": 2.60688853, "learning_rate": 6.913e-05, "token_acc": 0.9224572, "epoch": 4.06299213, "global_step/max_steps": "3612/8890", "percentage": "40.63%", "elapsed_time": "1h 17m 12s", "remaining_time": "1h 52m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779756} {"loss": 0.22031313, "grad_norm": 3.60736823, "learning_rate": 6.911e-05, "token_acc": 0.92821369, "epoch": 4.06411699, "global_step/max_steps": "3613/8890", "percentage": "40.64%", "elapsed_time": "1h 17m 13s", "remaining_time": "1h 52m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779789} {"loss": 0.15244809, "grad_norm": 2.39957833, "learning_rate": 6.91e-05, "token_acc": 0.94823232, "epoch": 4.06524184, "global_step/max_steps": "3614/8890", "percentage": "40.65%", "elapsed_time": "1h 17m 14s", "remaining_time": "1h 52m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779777} {"loss": 0.19818492, "grad_norm": 2.81021309, "learning_rate": 6.908e-05, "token_acc": 0.92356688, "epoch": 4.0663667, "global_step/max_steps": "3615/8890", "percentage": "40.66%", "elapsed_time": "1h 17m 15s", "remaining_time": "1h 52m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779824} {"loss": 0.14569989, "grad_norm": 2.09766364, "learning_rate": 6.906e-05, "token_acc": 0.94928826, "epoch": 4.06749156, "global_step/max_steps": "3616/8890", "percentage": "40.67%", "elapsed_time": "1h 17m 16s", "remaining_time": "1h 52m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779849} {"loss": 0.19605513, "grad_norm": 2.64649034, "learning_rate": 6.905e-05, "token_acc": 0.93548387, "epoch": 4.06861642, "global_step/max_steps": "3617/8890", "percentage": "40.69%", "elapsed_time": "1h 17m 17s", "remaining_time": "1h 52m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779866} {"loss": 0.17419684, "grad_norm": 2.66712832, "learning_rate": 6.903e-05, "token_acc": 0.94520548, "epoch": 4.06974128, "global_step/max_steps": "3618/8890", "percentage": "40.70%", "elapsed_time": "1h 17m 19s", "remaining_time": "1h 52m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779888} {"loss": 0.14911152, "grad_norm": 2.21871877, "learning_rate": 6.901e-05, "token_acc": 0.95286195, "epoch": 4.07086614, "global_step/max_steps": "3619/8890", "percentage": "40.71%", "elapsed_time": "1h 17m 20s", "remaining_time": "1h 52m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779951} {"loss": 0.22770731, "grad_norm": 3.26539111, "learning_rate": 6.899e-05, "token_acc": 0.91428571, "epoch": 4.071991, "global_step/max_steps": "3620/8890", "percentage": "40.72%", "elapsed_time": "1h 17m 21s", "remaining_time": "1h 52m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779975} {"loss": 0.16460703, "grad_norm": 1.99884665, "learning_rate": 6.898e-05, "token_acc": 0.94934498, "epoch": 4.07311586, "global_step/max_steps": "3621/8890", "percentage": "40.73%", "elapsed_time": "1h 17m 22s", "remaining_time": "1h 52m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779971} {"loss": 0.24641529, "grad_norm": 2.67361832, "learning_rate": 6.896e-05, "token_acc": 0.93220339, "epoch": 4.07424072, "global_step/max_steps": "3622/8890", "percentage": "40.74%", "elapsed_time": "1h 17m 23s", "remaining_time": "1h 52m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779989} {"loss": 0.15642333, "grad_norm": 2.47595382, "learning_rate": 6.894e-05, "token_acc": 0.94676259, "epoch": 4.07536558, "global_step/max_steps": "3623/8890", "percentage": "40.75%", "elapsed_time": "1h 17m 24s", "remaining_time": "1h 52m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780017} {"loss": 0.12929882, "grad_norm": 2.38777709, "learning_rate": 6.893e-05, "token_acc": 0.95646438, "epoch": 4.07649044, "global_step/max_steps": "3624/8890", "percentage": "40.76%", "elapsed_time": "1h 17m 25s", "remaining_time": "1h 52m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78004} {"loss": 0.16939913, "grad_norm": 2.52593136, "learning_rate": 6.891e-05, "token_acc": 0.94878706, "epoch": 4.0776153, "global_step/max_steps": "3625/8890", "percentage": "40.78%", "elapsed_time": "1h 17m 27s", "remaining_time": "1h 52m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780056} {"loss": 0.12612073, "grad_norm": 1.93220031, "learning_rate": 6.889e-05, "token_acc": 0.96171171, "epoch": 4.07874016, "global_step/max_steps": "3626/8890", "percentage": "40.79%", "elapsed_time": "1h 17m 28s", "remaining_time": "1h 52m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780083} {"loss": 0.21150067, "grad_norm": 2.71439719, "learning_rate": 6.887e-05, "token_acc": 0.93181818, "epoch": 4.07986502, "global_step/max_steps": "3627/8890", "percentage": "40.80%", "elapsed_time": "1h 17m 29s", "remaining_time": "1h 52m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780104} {"loss": 0.14276819, "grad_norm": 2.1312995, "learning_rate": 6.886e-05, "token_acc": 0.95899054, "epoch": 4.08098988, "global_step/max_steps": "3628/8890", "percentage": "40.81%", "elapsed_time": "1h 17m 30s", "remaining_time": "1h 52m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780129} {"loss": 0.16128846, "grad_norm": 2.37000251, "learning_rate": 6.884e-05, "token_acc": 0.94789082, "epoch": 4.08211474, "global_step/max_steps": "3629/8890", "percentage": "40.82%", "elapsed_time": "1h 17m 31s", "remaining_time": "1h 52m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780156} {"loss": 0.14699069, "grad_norm": 2.54381347, "learning_rate": 6.882e-05, "token_acc": 0.94865526, "epoch": 4.0832396, "global_step/max_steps": "3630/8890", "percentage": "40.83%", "elapsed_time": "1h 17m 32s", "remaining_time": "1h 52m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780196} {"loss": 0.11763898, "grad_norm": 1.70158911, "learning_rate": 6.881e-05, "token_acc": 0.96529968, "epoch": 4.08436445, "global_step/max_steps": "3631/8890", "percentage": "40.84%", "elapsed_time": "1h 17m 33s", "remaining_time": "1h 52m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780204} {"loss": 0.18812519, "grad_norm": 2.43024158, "learning_rate": 6.879e-05, "token_acc": 0.93702497, "epoch": 4.08548931, "global_step/max_steps": "3632/8890", "percentage": "40.85%", "elapsed_time": "1h 17m 35s", "remaining_time": "1h 52m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780231} {"loss": 0.22459154, "grad_norm": 2.3008368, "learning_rate": 6.877e-05, "token_acc": 0.93458647, "epoch": 4.08661417, "global_step/max_steps": "3633/8890", "percentage": "40.87%", "elapsed_time": "1h 17m 36s", "remaining_time": "1h 52m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780174} {"loss": 0.20301586, "grad_norm": 2.90763545, "learning_rate": 6.875e-05, "token_acc": 0.94117647, "epoch": 4.08773903, "global_step/max_steps": "3634/8890", "percentage": "40.88%", "elapsed_time": "1h 17m 37s", "remaining_time": "1h 52m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780212} {"loss": 0.14061192, "grad_norm": 2.26689386, "learning_rate": 6.874e-05, "token_acc": 0.9527845, "epoch": 4.08886389, "global_step/max_steps": "3635/8890", "percentage": "40.89%", "elapsed_time": "1h 17m 38s", "remaining_time": "1h 52m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780253} {"loss": 0.21425006, "grad_norm": 3.00471997, "learning_rate": 6.872e-05, "token_acc": 0.9407314, "epoch": 4.08998875, "global_step/max_steps": "3636/8890", "percentage": "40.90%", "elapsed_time": "1h 17m 40s", "remaining_time": "1h 52m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78024} {"loss": 0.14808565, "grad_norm": 2.4252491, "learning_rate": 6.87e-05, "token_acc": 0.95207668, "epoch": 4.09111361, "global_step/max_steps": "3637/8890", "percentage": "40.91%", "elapsed_time": "1h 17m 41s", "remaining_time": "1h 52m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780265} {"loss": 0.16300857, "grad_norm": 2.00345969, "learning_rate": 6.868e-05, "token_acc": 0.95258621, "epoch": 4.09223847, "global_step/max_steps": "3638/8890", "percentage": "40.92%", "elapsed_time": "1h 17m 42s", "remaining_time": "1h 52m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780282} {"loss": 0.16242166, "grad_norm": 2.23268676, "learning_rate": 6.867e-05, "token_acc": 0.94881517, "epoch": 4.09336333, "global_step/max_steps": "3639/8890", "percentage": "40.93%", "elapsed_time": "1h 17m 43s", "remaining_time": "1h 52m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780305} {"loss": 0.16850817, "grad_norm": 2.71475482, "learning_rate": 6.865e-05, "token_acc": 0.94793926, "epoch": 4.09448819, "global_step/max_steps": "3640/8890", "percentage": "40.94%", "elapsed_time": "1h 17m 44s", "remaining_time": "1h 52m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780333} {"loss": 0.16119203, "grad_norm": 2.30114317, "learning_rate": 6.863e-05, "token_acc": 0.95138889, "epoch": 4.09561305, "global_step/max_steps": "3641/8890", "percentage": "40.96%", "elapsed_time": "1h 17m 45s", "remaining_time": "1h 52m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780356} {"loss": 0.1519959, "grad_norm": 2.39973855, "learning_rate": 6.862e-05, "token_acc": 0.94755662, "epoch": 4.09673791, "global_step/max_steps": "3642/8890", "percentage": "40.97%", "elapsed_time": "1h 17m 46s", "remaining_time": "1h 52m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780394} {"loss": 0.15313321, "grad_norm": 2.3119266, "learning_rate": 6.86e-05, "token_acc": 0.94590846, "epoch": 4.09786277, "global_step/max_steps": "3643/8890", "percentage": "40.98%", "elapsed_time": "1h 17m 47s", "remaining_time": "1h 52m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780424} {"loss": 0.14244957, "grad_norm": 2.3761332, "learning_rate": 6.858e-05, "token_acc": 0.94093407, "epoch": 4.09898763, "global_step/max_steps": "3644/8890", "percentage": "40.99%", "elapsed_time": "1h 17m 49s", "remaining_time": "1h 52m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780449} {"loss": 0.18383545, "grad_norm": 2.3159852, "learning_rate": 6.856e-05, "token_acc": 0.94857143, "epoch": 4.10011249, "global_step/max_steps": "3645/8890", "percentage": "41.00%", "elapsed_time": "1h 17m 50s", "remaining_time": "1h 52m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780469} {"loss": 0.15827383, "grad_norm": 3.15158868, "learning_rate": 6.855e-05, "token_acc": 0.95238095, "epoch": 4.10123735, "global_step/max_steps": "3646/8890", "percentage": "41.01%", "elapsed_time": "1h 17m 51s", "remaining_time": "1h 51m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780504} {"loss": 0.13325104, "grad_norm": 2.11452341, "learning_rate": 6.853e-05, "token_acc": 0.95774648, "epoch": 4.1023622, "global_step/max_steps": "3647/8890", "percentage": "41.02%", "elapsed_time": "1h 17m 52s", "remaining_time": "1h 51m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780527} {"loss": 0.17803302, "grad_norm": 2.34392953, "learning_rate": 6.851e-05, "token_acc": 0.94313305, "epoch": 4.10348706, "global_step/max_steps": "3648/8890", "percentage": "41.03%", "elapsed_time": "1h 17m 53s", "remaining_time": "1h 51m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780551} {"loss": 0.1725353, "grad_norm": 2.32306361, "learning_rate": 6.849e-05, "token_acc": 0.95143488, "epoch": 4.10461192, "global_step/max_steps": "3649/8890", "percentage": "41.05%", "elapsed_time": "1h 17m 54s", "remaining_time": "1h 51m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780576} {"loss": 0.12294613, "grad_norm": 2.97761774, "learning_rate": 6.848e-05, "token_acc": 0.95861148, "epoch": 4.10573678, "global_step/max_steps": "3650/8890", "percentage": "41.06%", "elapsed_time": "1h 17m 55s", "remaining_time": "1h 51m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780674} {"loss": 0.13320279, "grad_norm": 2.15408659, "learning_rate": 6.846e-05, "token_acc": 0.95800525, "epoch": 4.10686164, "global_step/max_steps": "3651/8890", "percentage": "41.07%", "elapsed_time": "1h 17m 56s", "remaining_time": "1h 51m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780702} {"loss": 0.14649095, "grad_norm": 2.43996, "learning_rate": 6.844e-05, "token_acc": 0.96045198, "epoch": 4.1079865, "global_step/max_steps": "3652/8890", "percentage": "41.08%", "elapsed_time": "1h 17m 57s", "remaining_time": "1h 51m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780757} {"loss": 0.1596494, "grad_norm": 3.06993175, "learning_rate": 6.843e-05, "token_acc": 0.95400593, "epoch": 4.10911136, "global_step/max_steps": "3653/8890", "percentage": "41.09%", "elapsed_time": "1h 17m 58s", "remaining_time": "1h 51m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780781} {"loss": 0.20600839, "grad_norm": 2.48065662, "learning_rate": 6.841e-05, "token_acc": 0.93403442, "epoch": 4.11023622, "global_step/max_steps": "3654/8890", "percentage": "41.10%", "elapsed_time": "1h 18m 0s", "remaining_time": "1h 51m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780693} {"loss": 0.19958264, "grad_norm": 2.60891914, "learning_rate": 6.839e-05, "token_acc": 0.93606557, "epoch": 4.11136108, "global_step/max_steps": "3655/8890", "percentage": "41.11%", "elapsed_time": "1h 18m 1s", "remaining_time": "1h 51m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78068} {"loss": 0.1902017, "grad_norm": 2.80378389, "learning_rate": 6.837e-05, "token_acc": 0.93742331, "epoch": 4.11248594, "global_step/max_steps": "3656/8890", "percentage": "41.12%", "elapsed_time": "1h 18m 2s", "remaining_time": "1h 51m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780698} {"loss": 0.15409835, "grad_norm": 2.4505353, "learning_rate": 6.836e-05, "token_acc": 0.95560254, "epoch": 4.1136108, "global_step/max_steps": "3657/8890", "percentage": "41.14%", "elapsed_time": "1h 18m 4s", "remaining_time": "1h 51m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780723} {"loss": 0.17699423, "grad_norm": 2.49281454, "learning_rate": 6.834e-05, "token_acc": 0.93977273, "epoch": 4.11473566, "global_step/max_steps": "3658/8890", "percentage": "41.15%", "elapsed_time": "1h 18m 5s", "remaining_time": "1h 51m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780748} {"loss": 0.15423043, "grad_norm": 2.26149631, "learning_rate": 6.832e-05, "token_acc": 0.95386703, "epoch": 4.11586052, "global_step/max_steps": "3659/8890", "percentage": "41.16%", "elapsed_time": "1h 18m 6s", "remaining_time": "1h 51m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780775} {"loss": 0.17248495, "grad_norm": 3.09544683, "learning_rate": 6.83e-05, "token_acc": 0.93442623, "epoch": 4.11698538, "global_step/max_steps": "3660/8890", "percentage": "41.17%", "elapsed_time": "1h 18m 7s", "remaining_time": "1h 51m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780856} {"loss": 0.15364364, "grad_norm": 2.39721894, "learning_rate": 6.829e-05, "token_acc": 0.94967381, "epoch": 4.11811024, "global_step/max_steps": "3661/8890", "percentage": "41.18%", "elapsed_time": "1h 18m 8s", "remaining_time": "1h 51m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780879} {"loss": 0.15133549, "grad_norm": 2.55804181, "learning_rate": 6.827e-05, "token_acc": 0.95467033, "epoch": 4.1192351, "global_step/max_steps": "3662/8890", "percentage": "41.19%", "elapsed_time": "1h 18m 9s", "remaining_time": "1h 51m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780919} {"loss": 0.13670883, "grad_norm": 2.48952246, "learning_rate": 6.825e-05, "token_acc": 0.9550706, "epoch": 4.12035996, "global_step/max_steps": "3663/8890", "percentage": "41.20%", "elapsed_time": "1h 18m 10s", "remaining_time": "1h 51m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780955} {"loss": 0.12875026, "grad_norm": 2.30335617, "learning_rate": 6.823e-05, "token_acc": 0.95465686, "epoch": 4.12148481, "global_step/max_steps": "3664/8890", "percentage": "41.21%", "elapsed_time": "1h 18m 11s", "remaining_time": "1h 51m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780977} {"loss": 0.18204011, "grad_norm": 2.62598491, "learning_rate": 6.822e-05, "token_acc": 0.94154229, "epoch": 4.12260967, "global_step/max_steps": "3665/8890", "percentage": "41.23%", "elapsed_time": "1h 18m 12s", "remaining_time": "1h 51m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78101} {"loss": 0.1601423, "grad_norm": 2.71167874, "learning_rate": 6.82e-05, "token_acc": 0.94516595, "epoch": 4.12373453, "global_step/max_steps": "3666/8890", "percentage": "41.24%", "elapsed_time": "1h 18m 13s", "remaining_time": "1h 51m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781036} {"loss": 0.12445655, "grad_norm": 2.43972802, "learning_rate": 6.818e-05, "token_acc": 0.95646067, "epoch": 4.12485939, "global_step/max_steps": "3667/8890", "percentage": "41.25%", "elapsed_time": "1h 18m 14s", "remaining_time": "1h 51m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781059} {"loss": 0.21506241, "grad_norm": 3.34017491, "learning_rate": 6.817e-05, "token_acc": 0.93414634, "epoch": 4.12598425, "global_step/max_steps": "3668/8890", "percentage": "41.26%", "elapsed_time": "1h 18m 16s", "remaining_time": "1h 51m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78106} {"loss": 0.16853926, "grad_norm": 2.29059219, "learning_rate": 6.815e-05, "token_acc": 0.95348837, "epoch": 4.12710911, "global_step/max_steps": "3669/8890", "percentage": "41.27%", "elapsed_time": "1h 18m 17s", "remaining_time": "1h 51m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781055} {"loss": 0.12047716, "grad_norm": 2.15212798, "learning_rate": 6.813e-05, "token_acc": 0.9576659, "epoch": 4.12823397, "global_step/max_steps": "3670/8890", "percentage": "41.28%", "elapsed_time": "1h 18m 18s", "remaining_time": "1h 51m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781081} {"loss": 0.14077941, "grad_norm": 2.77014971, "learning_rate": 6.811e-05, "token_acc": 0.96020539, "epoch": 4.12935883, "global_step/max_steps": "3671/8890", "percentage": "41.29%", "elapsed_time": "1h 18m 19s", "remaining_time": "1h 51m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781102} {"loss": 0.16673227, "grad_norm": 3.07902384, "learning_rate": 6.81e-05, "token_acc": 0.9377916, "epoch": 4.13048369, "global_step/max_steps": "3672/8890", "percentage": "41.30%", "elapsed_time": "1h 18m 20s", "remaining_time": "1h 51m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781142} {"loss": 0.17980191, "grad_norm": 2.55375195, "learning_rate": 6.808e-05, "token_acc": 0.94481982, "epoch": 4.13160855, "global_step/max_steps": "3673/8890", "percentage": "41.32%", "elapsed_time": "1h 18m 21s", "remaining_time": "1h 51m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781166} {"loss": 0.18309473, "grad_norm": 2.90285802, "learning_rate": 6.806e-05, "token_acc": 0.93852459, "epoch": 4.13273341, "global_step/max_steps": "3674/8890", "percentage": "41.33%", "elapsed_time": "1h 18m 23s", "remaining_time": "1h 51m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781191} {"loss": 0.15631086, "grad_norm": 2.38712382, "learning_rate": 6.804e-05, "token_acc": 0.94950604, "epoch": 4.13385827, "global_step/max_steps": "3675/8890", "percentage": "41.34%", "elapsed_time": "1h 18m 24s", "remaining_time": "1h 51m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781208} {"loss": 0.12164969, "grad_norm": 1.85090017, "learning_rate": 6.803e-05, "token_acc": 0.95568401, "epoch": 4.13498313, "global_step/max_steps": "3676/8890", "percentage": "41.35%", "elapsed_time": "1h 18m 25s", "remaining_time": "1h 51m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781231} {"loss": 0.19664729, "grad_norm": 3.1200254, "learning_rate": 6.801e-05, "token_acc": 0.93687231, "epoch": 4.13610799, "global_step/max_steps": "3677/8890", "percentage": "41.36%", "elapsed_time": "1h 18m 26s", "remaining_time": "1h 51m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781283} {"loss": 0.2002582, "grad_norm": 2.80844831, "learning_rate": 6.799e-05, "token_acc": 0.93918919, "epoch": 4.13723285, "global_step/max_steps": "3678/8890", "percentage": "41.37%", "elapsed_time": "1h 18m 27s", "remaining_time": "1h 51m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781303} {"loss": 0.1338883, "grad_norm": 2.14704514, "learning_rate": 6.797e-05, "token_acc": 0.95788337, "epoch": 4.13835771, "global_step/max_steps": "3679/8890", "percentage": "41.38%", "elapsed_time": "1h 18m 28s", "remaining_time": "1h 51m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781329} {"loss": 0.16848031, "grad_norm": 2.50179744, "learning_rate": 6.796e-05, "token_acc": 0.94638404, "epoch": 4.13948256, "global_step/max_steps": "3680/8890", "percentage": "41.39%", "elapsed_time": "1h 18m 29s", "remaining_time": "1h 51m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781366} {"loss": 0.20056596, "grad_norm": 3.46243382, "learning_rate": 6.794e-05, "token_acc": 0.91878173, "epoch": 4.14060742, "global_step/max_steps": "3681/8890", "percentage": "41.41%", "elapsed_time": "1h 18m 30s", "remaining_time": "1h 51m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781405} {"loss": 0.15168515, "grad_norm": 2.68604684, "learning_rate": 6.792e-05, "token_acc": 0.94642857, "epoch": 4.14173228, "global_step/max_steps": "3682/8890", "percentage": "41.42%", "elapsed_time": "1h 18m 31s", "remaining_time": "1h 51m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781464} {"loss": 0.18519339, "grad_norm": 2.3415339, "learning_rate": 6.791e-05, "token_acc": 0.94172932, "epoch": 4.14285714, "global_step/max_steps": "3683/8890", "percentage": "41.43%", "elapsed_time": "1h 18m 32s", "remaining_time": "1h 51m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781488} {"loss": 0.18854231, "grad_norm": 2.61040163, "learning_rate": 6.789e-05, "token_acc": 0.93962678, "epoch": 4.143982, "global_step/max_steps": "3684/8890", "percentage": "41.44%", "elapsed_time": "1h 18m 33s", "remaining_time": "1h 51m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781523} {"loss": 0.26591396, "grad_norm": 2.64593577, "learning_rate": 6.787e-05, "token_acc": 0.91745112, "epoch": 4.14510686, "global_step/max_steps": "3685/8890", "percentage": "41.45%", "elapsed_time": "1h 18m 35s", "remaining_time": "1h 50m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781547} {"loss": 0.17303964, "grad_norm": 2.50349927, "learning_rate": 6.785e-05, "token_acc": 0.94680851, "epoch": 4.14623172, "global_step/max_steps": "3686/8890", "percentage": "41.46%", "elapsed_time": "1h 18m 36s", "remaining_time": "1h 50m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781568} {"loss": 0.16284116, "grad_norm": 2.65385962, "learning_rate": 6.784e-05, "token_acc": 0.94977169, "epoch": 4.14735658, "global_step/max_steps": "3687/8890", "percentage": "41.47%", "elapsed_time": "1h 18m 37s", "remaining_time": "1h 50m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781605} {"loss": 0.15686339, "grad_norm": 2.35608697, "learning_rate": 6.782e-05, "token_acc": 0.94940797, "epoch": 4.14848144, "global_step/max_steps": "3688/8890", "percentage": "41.48%", "elapsed_time": "1h 18m 38s", "remaining_time": "1h 50m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781642} {"loss": 0.17063215, "grad_norm": 2.6580174, "learning_rate": 6.78e-05, "token_acc": 0.93965517, "epoch": 4.1496063, "global_step/max_steps": "3689/8890", "percentage": "41.50%", "elapsed_time": "1h 18m 39s", "remaining_time": "1h 50m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781665} {"loss": 0.19660775, "grad_norm": 2.76600862, "learning_rate": 6.778e-05, "token_acc": 0.94179894, "epoch": 4.15073116, "global_step/max_steps": "3690/8890", "percentage": "41.51%", "elapsed_time": "1h 18m 40s", "remaining_time": "1h 50m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781655} {"loss": 0.13198917, "grad_norm": 2.27825713, "learning_rate": 6.777e-05, "token_acc": 0.95690608, "epoch": 4.15185602, "global_step/max_steps": "3691/8890", "percentage": "41.52%", "elapsed_time": "1h 18m 41s", "remaining_time": "1h 50m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781674} {"loss": 0.20728302, "grad_norm": 2.78104186, "learning_rate": 6.775e-05, "token_acc": 0.93930636, "epoch": 4.15298088, "global_step/max_steps": "3692/8890", "percentage": "41.53%", "elapsed_time": "1h 18m 43s", "remaining_time": "1h 50m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781675} {"loss": 0.16618776, "grad_norm": 2.15725279, "learning_rate": 6.773e-05, "token_acc": 0.953125, "epoch": 4.15410574, "global_step/max_steps": "3693/8890", "percentage": "41.54%", "elapsed_time": "1h 18m 44s", "remaining_time": "1h 50m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781673} {"loss": 0.28171015, "grad_norm": 3.56014013, "learning_rate": 6.771e-05, "token_acc": 0.90835031, "epoch": 4.1552306, "global_step/max_steps": "3694/8890", "percentage": "41.55%", "elapsed_time": "1h 18m 45s", "remaining_time": "1h 50m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781696} {"loss": 0.12950012, "grad_norm": 1.9497093, "learning_rate": 6.77e-05, "token_acc": 0.95824176, "epoch": 4.15635546, "global_step/max_steps": "3695/8890", "percentage": "41.56%", "elapsed_time": "1h 18m 46s", "remaining_time": "1h 50m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781731} {"loss": 0.20100312, "grad_norm": 2.9572928, "learning_rate": 6.768e-05, "token_acc": 0.93617021, "epoch": 4.15748031, "global_step/max_steps": "3696/8890", "percentage": "41.57%", "elapsed_time": "1h 18m 47s", "remaining_time": "1h 50m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781769} {"loss": 0.21984133, "grad_norm": 2.4926424, "learning_rate": 6.766e-05, "token_acc": 0.92842536, "epoch": 4.15860517, "global_step/max_steps": "3697/8890", "percentage": "41.59%", "elapsed_time": "1h 18m 48s", "remaining_time": "1h 50m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781793} {"loss": 0.22015563, "grad_norm": 2.48544097, "learning_rate": 6.764e-05, "token_acc": 0.92196532, "epoch": 4.15973003, "global_step/max_steps": "3698/8890", "percentage": "41.60%", "elapsed_time": "1h 18m 50s", "remaining_time": "1h 50m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781813} {"loss": 0.2079874, "grad_norm": 3.18478227, "learning_rate": 6.763e-05, "token_acc": 0.9433657, "epoch": 4.16085489, "global_step/max_steps": "3699/8890", "percentage": "41.61%", "elapsed_time": "1h 18m 51s", "remaining_time": "1h 50m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781835} {"loss": 0.12253433, "grad_norm": 1.94657993, "learning_rate": 6.761e-05, "token_acc": 0.95660036, "epoch": 4.16197975, "global_step/max_steps": "3700/8890", "percentage": "41.62%", "elapsed_time": "1h 18m 52s", "remaining_time": "1h 50m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781853} {"loss": 0.17682917, "grad_norm": 2.38045263, "learning_rate": 6.759e-05, "token_acc": 0.94545455, "epoch": 4.16310461, "global_step/max_steps": "3701/8890", "percentage": "41.63%", "elapsed_time": "1h 18m 53s", "remaining_time": "1h 50m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781874} {"loss": 0.18901159, "grad_norm": 2.64771748, "learning_rate": 6.757e-05, "token_acc": 0.93889542, "epoch": 4.16422947, "global_step/max_steps": "3702/8890", "percentage": "41.64%", "elapsed_time": "1h 18m 54s", "remaining_time": "1h 50m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781893} {"loss": 0.21809131, "grad_norm": 2.80309749, "learning_rate": 6.756e-05, "token_acc": 0.93098782, "epoch": 4.16535433, "global_step/max_steps": "3703/8890", "percentage": "41.65%", "elapsed_time": "1h 18m 55s", "remaining_time": "1h 50m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781914} {"loss": 0.19254425, "grad_norm": 2.68481207, "learning_rate": 6.754e-05, "token_acc": 0.93855422, "epoch": 4.16647919, "global_step/max_steps": "3704/8890", "percentage": "41.66%", "elapsed_time": "1h 18m 56s", "remaining_time": "1h 50m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781938} {"loss": 0.14788973, "grad_norm": 2.50497127, "learning_rate": 6.752e-05, "token_acc": 0.94543298, "epoch": 4.16760405, "global_step/max_steps": "3705/8890", "percentage": "41.68%", "elapsed_time": "1h 18m 58s", "remaining_time": "1h 50m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781954} {"loss": 0.17580765, "grad_norm": 2.44413185, "learning_rate": 6.751e-05, "token_acc": 0.94273128, "epoch": 4.16872891, "global_step/max_steps": "3706/8890", "percentage": "41.69%", "elapsed_time": "1h 18m 59s", "remaining_time": "1h 50m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781975} {"loss": 0.16878667, "grad_norm": 2.38601494, "learning_rate": 6.749e-05, "token_acc": 0.94875, "epoch": 4.16985377, "global_step/max_steps": "3707/8890", "percentage": "41.70%", "elapsed_time": "1h 19m 0s", "remaining_time": "1h 50m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781994} {"loss": 0.17334509, "grad_norm": 2.75565886, "learning_rate": 6.747e-05, "token_acc": 0.94483734, "epoch": 4.17097863, "global_step/max_steps": "3708/8890", "percentage": "41.71%", "elapsed_time": "1h 19m 1s", "remaining_time": "1h 50m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782017} {"loss": 0.16557278, "grad_norm": 2.1654222, "learning_rate": 6.745e-05, "token_acc": 0.95099819, "epoch": 4.17210349, "global_step/max_steps": "3709/8890", "percentage": "41.72%", "elapsed_time": "1h 19m 2s", "remaining_time": "1h 50m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782056} {"loss": 0.17222291, "grad_norm": 2.61260056, "learning_rate": 6.744e-05, "token_acc": 0.94191344, "epoch": 4.17322835, "global_step/max_steps": "3710/8890", "percentage": "41.73%", "elapsed_time": "1h 19m 3s", "remaining_time": "1h 50m 23s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782053} {"loss": 0.18808872, "grad_norm": 2.93939543, "learning_rate": 6.742e-05, "token_acc": 0.93348891, "epoch": 4.17435321, "global_step/max_steps": "3711/8890", "percentage": "41.74%", "elapsed_time": "1h 19m 4s", "remaining_time": "1h 50m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782109} {"loss": 0.13831332, "grad_norm": 2.35413098, "learning_rate": 6.74e-05, "token_acc": 0.95211268, "epoch": 4.17547807, "global_step/max_steps": "3712/8890", "percentage": "41.75%", "elapsed_time": "1h 19m 5s", "remaining_time": "1h 50m 20s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782134} {"loss": 0.16855451, "grad_norm": 2.29033661, "learning_rate": 6.738e-05, "token_acc": 0.94579008, "epoch": 4.17660292, "global_step/max_steps": "3713/8890", "percentage": "41.77%", "elapsed_time": "1h 19m 7s", "remaining_time": "1h 50m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782135} {"loss": 0.15993442, "grad_norm": 2.46211457, "learning_rate": 6.737e-05, "token_acc": 0.95065458, "epoch": 4.17772778, "global_step/max_steps": "3714/8890", "percentage": "41.78%", "elapsed_time": "1h 19m 8s", "remaining_time": "1h 50m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782174} {"loss": 0.14280066, "grad_norm": 2.60276246, "learning_rate": 6.735e-05, "token_acc": 0.95883134, "epoch": 4.17885264, "global_step/max_steps": "3715/8890", "percentage": "41.79%", "elapsed_time": "1h 19m 9s", "remaining_time": "1h 50m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782197} {"loss": 0.16071868, "grad_norm": 2.46352005, "learning_rate": 6.733e-05, "token_acc": 0.95006935, "epoch": 4.1799775, "global_step/max_steps": "3716/8890", "percentage": "41.80%", "elapsed_time": "1h 19m 10s", "remaining_time": "1h 50m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782218} {"loss": 0.12359494, "grad_norm": 2.03466749, "learning_rate": 6.731e-05, "token_acc": 0.95833333, "epoch": 4.18110236, "global_step/max_steps": "3717/8890", "percentage": "41.81%", "elapsed_time": "1h 19m 11s", "remaining_time": "1h 50m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782243} {"loss": 0.17052175, "grad_norm": 2.45300198, "learning_rate": 6.73e-05, "token_acc": 0.94117647, "epoch": 4.18222722, "global_step/max_steps": "3718/8890", "percentage": "41.82%", "elapsed_time": "1h 19m 12s", "remaining_time": "1h 50m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782268} {"loss": 0.23867969, "grad_norm": 2.35182738, "learning_rate": 6.728e-05, "token_acc": 0.92635315, "epoch": 4.18335208, "global_step/max_steps": "3719/8890", "percentage": "41.83%", "elapsed_time": "1h 19m 14s", "remaining_time": "1h 50m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782267} {"loss": 0.16580877, "grad_norm": 2.50327921, "learning_rate": 6.726e-05, "token_acc": 0.94948335, "epoch": 4.18447694, "global_step/max_steps": "3720/8890", "percentage": "41.84%", "elapsed_time": "1h 19m 15s", "remaining_time": "1h 50m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782288} {"loss": 0.15542237, "grad_norm": 2.44743991, "learning_rate": 6.724e-05, "token_acc": 0.95113636, "epoch": 4.1856018, "global_step/max_steps": "3721/8890", "percentage": "41.86%", "elapsed_time": "1h 19m 16s", "remaining_time": "1h 50m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782308} {"loss": 0.24296761, "grad_norm": 3.21764684, "learning_rate": 6.723e-05, "token_acc": 0.93059126, "epoch": 4.18672666, "global_step/max_steps": "3722/8890", "percentage": "41.87%", "elapsed_time": "1h 19m 17s", "remaining_time": "1h 50m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782334} {"loss": 0.18400043, "grad_norm": 2.4128592, "learning_rate": 6.721e-05, "token_acc": 0.93756294, "epoch": 4.18785152, "global_step/max_steps": "3723/8890", "percentage": "41.88%", "elapsed_time": "1h 19m 18s", "remaining_time": "1h 50m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782352} {"loss": 0.14706273, "grad_norm": 2.48363423, "learning_rate": 6.719e-05, "token_acc": 0.95619524, "epoch": 4.18897638, "global_step/max_steps": "3724/8890", "percentage": "41.89%", "elapsed_time": "1h 19m 19s", "remaining_time": "1h 50m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782375} {"loss": 0.17187148, "grad_norm": 2.82108831, "learning_rate": 6.717e-05, "token_acc": 0.9448183, "epoch": 4.19010124, "global_step/max_steps": "3725/8890", "percentage": "41.90%", "elapsed_time": "1h 19m 20s", "remaining_time": "1h 50m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782413} {"loss": 0.21229944, "grad_norm": 2.74097943, "learning_rate": 6.716e-05, "token_acc": 0.92941176, "epoch": 4.1912261, "global_step/max_steps": "3726/8890", "percentage": "41.91%", "elapsed_time": "1h 19m 22s", "remaining_time": "1h 49m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782437} {"loss": 0.15024847, "grad_norm": 2.5989747, "learning_rate": 6.714e-05, "token_acc": 0.95081967, "epoch": 4.19235096, "global_step/max_steps": "3727/8890", "percentage": "41.92%", "elapsed_time": "1h 19m 23s", "remaining_time": "1h 49m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782461} {"loss": 0.15172575, "grad_norm": 2.34310436, "learning_rate": 6.712e-05, "token_acc": 0.95189873, "epoch": 4.19347582, "global_step/max_steps": "3728/8890", "percentage": "41.93%", "elapsed_time": "1h 19m 24s", "remaining_time": "1h 49m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782501} {"loss": 0.18805431, "grad_norm": 2.30646563, "learning_rate": 6.71e-05, "token_acc": 0.9452954, "epoch": 4.19460067, "global_step/max_steps": "3729/8890", "percentage": "41.95%", "elapsed_time": "1h 19m 25s", "remaining_time": "1h 49m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782539} {"loss": 0.1327385, "grad_norm": 1.99407196, "learning_rate": 6.709e-05, "token_acc": 0.96108108, "epoch": 4.19572553, "global_step/max_steps": "3730/8890", "percentage": "41.96%", "elapsed_time": "1h 19m 26s", "remaining_time": "1h 49m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782565} {"loss": 0.16862309, "grad_norm": 2.5725956, "learning_rate": 6.707e-05, "token_acc": 0.94522293, "epoch": 4.19685039, "global_step/max_steps": "3731/8890", "percentage": "41.97%", "elapsed_time": "1h 19m 27s", "remaining_time": "1h 49m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782586} {"loss": 0.18170001, "grad_norm": 2.67902493, "learning_rate": 6.705e-05, "token_acc": 0.94554974, "epoch": 4.19797525, "global_step/max_steps": "3732/8890", "percentage": "41.98%", "elapsed_time": "1h 19m 28s", "remaining_time": "1h 49m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782623} {"loss": 0.22162412, "grad_norm": 2.68135428, "learning_rate": 6.703e-05, "token_acc": 0.93774704, "epoch": 4.19910011, "global_step/max_steps": "3733/8890", "percentage": "41.99%", "elapsed_time": "1h 19m 29s", "remaining_time": "1h 49m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782658} {"loss": 0.16059327, "grad_norm": 2.39812112, "learning_rate": 6.702e-05, "token_acc": 0.94736842, "epoch": 4.20022497, "global_step/max_steps": "3734/8890", "percentage": "42.00%", "elapsed_time": "1h 19m 30s", "remaining_time": "1h 49m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782703} {"loss": 0.15969895, "grad_norm": 3.15551758, "learning_rate": 6.7e-05, "token_acc": 0.94731296, "epoch": 4.20134983, "global_step/max_steps": "3735/8890", "percentage": "42.01%", "elapsed_time": "1h 19m 31s", "remaining_time": "1h 49m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782723} {"loss": 0.19451624, "grad_norm": 2.96975851, "learning_rate": 6.698e-05, "token_acc": 0.93560146, "epoch": 4.20247469, "global_step/max_steps": "3736/8890", "percentage": "42.02%", "elapsed_time": "1h 19m 32s", "remaining_time": "1h 49m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782744} {"loss": 0.13333821, "grad_norm": 2.28760648, "learning_rate": 6.696e-05, "token_acc": 0.95916115, "epoch": 4.20359955, "global_step/max_steps": "3737/8890", "percentage": "42.04%", "elapsed_time": "1h 19m 34s", "remaining_time": "1h 49m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782763} {"loss": 0.2278659, "grad_norm": 3.14980984, "learning_rate": 6.695e-05, "token_acc": 0.9212766, "epoch": 4.20472441, "global_step/max_steps": "3738/8890", "percentage": "42.05%", "elapsed_time": "1h 19m 35s", "remaining_time": "1h 49m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782764} {"loss": 0.17103411, "grad_norm": 2.54192281, "learning_rate": 6.693e-05, "token_acc": 0.94880952, "epoch": 4.20584927, "global_step/max_steps": "3739/8890", "percentage": "42.06%", "elapsed_time": "1h 19m 36s", "remaining_time": "1h 49m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782762} {"loss": 0.14323956, "grad_norm": 2.41011357, "learning_rate": 6.691e-05, "token_acc": 0.96107784, "epoch": 4.20697413, "global_step/max_steps": "3740/8890", "percentage": "42.07%", "elapsed_time": "1h 19m 37s", "remaining_time": "1h 49m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782812} {"loss": 0.12983781, "grad_norm": 2.10408235, "learning_rate": 6.689e-05, "token_acc": 0.95622896, "epoch": 4.20809899, "global_step/max_steps": "3741/8890", "percentage": "42.08%", "elapsed_time": "1h 19m 38s", "remaining_time": "1h 49m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782829} {"loss": 0.17234233, "grad_norm": 2.36947513, "learning_rate": 6.688e-05, "token_acc": 0.94594595, "epoch": 4.20922385, "global_step/max_steps": "3742/8890", "percentage": "42.09%", "elapsed_time": "1h 19m 39s", "remaining_time": "1h 49m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782862} {"loss": 0.15386972, "grad_norm": 2.03983092, "learning_rate": 6.686e-05, "token_acc": 0.94834308, "epoch": 4.21034871, "global_step/max_steps": "3743/8890", "percentage": "42.10%", "elapsed_time": "1h 19m 41s", "remaining_time": "1h 49m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782878} {"loss": 0.24172209, "grad_norm": 2.85966301, "learning_rate": 6.684e-05, "token_acc": 0.9248269, "epoch": 4.21147357, "global_step/max_steps": "3744/8890", "percentage": "42.11%", "elapsed_time": "1h 19m 42s", "remaining_time": "1h 49m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782913} {"loss": 0.1937533, "grad_norm": 3.02880144, "learning_rate": 6.682e-05, "token_acc": 0.93742758, "epoch": 4.21259843, "global_step/max_steps": "3745/8890", "percentage": "42.13%", "elapsed_time": "1h 19m 43s", "remaining_time": "1h 49m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78293} {"loss": 0.18643068, "grad_norm": 2.63975358, "learning_rate": 6.681e-05, "token_acc": 0.93765586, "epoch": 4.21372328, "global_step/max_steps": "3746/8890", "percentage": "42.14%", "elapsed_time": "1h 19m 44s", "remaining_time": "1h 49m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782955} {"loss": 0.08912258, "grad_norm": 1.49300766, "learning_rate": 6.679e-05, "token_acc": 0.97222222, "epoch": 4.21484814, "global_step/max_steps": "3747/8890", "percentage": "42.15%", "elapsed_time": "1h 19m 45s", "remaining_time": "1h 49m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.782949} {"loss": 0.13489485, "grad_norm": 2.47653413, "learning_rate": 6.677e-05, "token_acc": 0.96102151, "epoch": 4.215973, "global_step/max_steps": "3748/8890", "percentage": "42.16%", "elapsed_time": "1h 19m 46s", "remaining_time": "1h 49m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783003} {"loss": 0.15680593, "grad_norm": 2.53172016, "learning_rate": 6.675e-05, "token_acc": 0.95013477, "epoch": 4.21709786, "global_step/max_steps": "3749/8890", "percentage": "42.17%", "elapsed_time": "1h 19m 47s", "remaining_time": "1h 49m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783024} {"loss": 0.16700506, "grad_norm": 2.46473169, "learning_rate": 6.674e-05, "token_acc": 0.9501845, "epoch": 4.21822272, "global_step/max_steps": "3750/8890", "percentage": "42.18%", "elapsed_time": "1h 19m 49s", "remaining_time": "1h 49m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78304} {"loss": 0.18908943, "grad_norm": 2.34315991, "learning_rate": 6.672e-05, "token_acc": 0.93210394, "epoch": 4.21934758, "global_step/max_steps": "3751/8890", "percentage": "42.19%", "elapsed_time": "1h 19m 50s", "remaining_time": "1h 49m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783058} {"loss": 0.18904531, "grad_norm": 2.72816801, "learning_rate": 6.67e-05, "token_acc": 0.94607268, "epoch": 4.22047244, "global_step/max_steps": "3752/8890", "percentage": "42.20%", "elapsed_time": "1h 19m 51s", "remaining_time": "1h 49m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783084} {"loss": 0.17757154, "grad_norm": 2.77492213, "learning_rate": 6.668e-05, "token_acc": 0.94070696, "epoch": 4.2215973, "global_step/max_steps": "3753/8890", "percentage": "42.22%", "elapsed_time": "1h 19m 52s", "remaining_time": "1h 49m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783104} {"loss": 0.22690096, "grad_norm": 2.70132399, "learning_rate": 6.667e-05, "token_acc": 0.93462717, "epoch": 4.22272216, "global_step/max_steps": "3754/8890", "percentage": "42.23%", "elapsed_time": "1h 19m 53s", "remaining_time": "1h 49m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783129} {"loss": 0.21296766, "grad_norm": 3.32957196, "learning_rate": 6.665e-05, "token_acc": 0.94926569, "epoch": 4.22384702, "global_step/max_steps": "3755/8890", "percentage": "42.24%", "elapsed_time": "1h 19m 54s", "remaining_time": "1h 49m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783166} {"loss": 0.09954396, "grad_norm": 1.60343695, "learning_rate": 6.663e-05, "token_acc": 0.96363636, "epoch": 4.22497188, "global_step/max_steps": "3756/8890", "percentage": "42.25%", "elapsed_time": "1h 19m 55s", "remaining_time": "1h 49m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78319} {"loss": 0.14970267, "grad_norm": 2.17742205, "learning_rate": 6.661e-05, "token_acc": 0.95698925, "epoch": 4.22609674, "global_step/max_steps": "3757/8890", "percentage": "42.26%", "elapsed_time": "1h 19m 56s", "remaining_time": "1h 49m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783229} {"loss": 0.13007259, "grad_norm": 2.00414705, "learning_rate": 6.66e-05, "token_acc": 0.95780177, "epoch": 4.2272216, "global_step/max_steps": "3758/8890", "percentage": "42.27%", "elapsed_time": "1h 19m 57s", "remaining_time": "1h 49m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783273} {"loss": 0.17279182, "grad_norm": 2.51959825, "learning_rate": 6.658e-05, "token_acc": 0.93743372, "epoch": 4.22834646, "global_step/max_steps": "3759/8890", "percentage": "42.28%", "elapsed_time": "1h 19m 58s", "remaining_time": "1h 49m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783293} {"loss": 0.18496409, "grad_norm": 2.7664547, "learning_rate": 6.656e-05, "token_acc": 0.93198992, "epoch": 4.22947132, "global_step/max_steps": "3760/8890", "percentage": "42.29%", "elapsed_time": "1h 20m 0s", "remaining_time": "1h 49m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783331} {"loss": 0.12712443, "grad_norm": 2.10039592, "learning_rate": 6.654e-05, "token_acc": 0.96149068, "epoch": 4.23059618, "global_step/max_steps": "3761/8890", "percentage": "42.31%", "elapsed_time": "1h 20m 0s", "remaining_time": "1h 49m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783416} {"loss": 0.15072179, "grad_norm": 2.36058807, "learning_rate": 6.653e-05, "token_acc": 0.94701349, "epoch": 4.23172103, "global_step/max_steps": "3762/8890", "percentage": "42.32%", "elapsed_time": "1h 20m 2s", "remaining_time": "1h 49m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783414} {"loss": 0.18180583, "grad_norm": 2.8128159, "learning_rate": 6.651e-05, "token_acc": 0.93154362, "epoch": 4.23284589, "global_step/max_steps": "3763/8890", "percentage": "42.33%", "elapsed_time": "1h 20m 3s", "remaining_time": "1h 49m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783434} {"loss": 0.1888531, "grad_norm": 2.65746212, "learning_rate": 6.649e-05, "token_acc": 0.94028103, "epoch": 4.23397075, "global_step/max_steps": "3764/8890", "percentage": "42.34%", "elapsed_time": "1h 20m 4s", "remaining_time": "1h 49m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783449} {"loss": 0.1790995, "grad_norm": 2.49893117, "learning_rate": 6.647e-05, "token_acc": 0.94258873, "epoch": 4.23509561, "global_step/max_steps": "3765/8890", "percentage": "42.35%", "elapsed_time": "1h 20m 5s", "remaining_time": "1h 49m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78347} {"loss": 0.16198593, "grad_norm": 3.09894753, "learning_rate": 6.646e-05, "token_acc": 0.94850949, "epoch": 4.23622047, "global_step/max_steps": "3766/8890", "percentage": "42.36%", "elapsed_time": "1h 20m 6s", "remaining_time": "1h 48m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783491} {"loss": 0.16176143, "grad_norm": 2.59035325, "learning_rate": 6.644e-05, "token_acc": 0.95316804, "epoch": 4.23734533, "global_step/max_steps": "3767/8890", "percentage": "42.37%", "elapsed_time": "1h 20m 7s", "remaining_time": "1h 48m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783514} {"loss": 0.13091004, "grad_norm": 2.58771944, "learning_rate": 6.642e-05, "token_acc": 0.95345345, "epoch": 4.23847019, "global_step/max_steps": "3768/8890", "percentage": "42.38%", "elapsed_time": "1h 20m 8s", "remaining_time": "1h 48m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783558} {"loss": 0.22249991, "grad_norm": 2.96422005, "learning_rate": 6.64e-05, "token_acc": 0.93209877, "epoch": 4.23959505, "global_step/max_steps": "3769/8890", "percentage": "42.40%", "elapsed_time": "1h 20m 9s", "remaining_time": "1h 48m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783585} {"loss": 0.1956546, "grad_norm": 2.35288835, "learning_rate": 6.639e-05, "token_acc": 0.94343891, "epoch": 4.24071991, "global_step/max_steps": "3770/8890", "percentage": "42.41%", "elapsed_time": "1h 20m 11s", "remaining_time": "1h 48m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783599} {"loss": 0.19062898, "grad_norm": 2.87348843, "learning_rate": 6.637e-05, "token_acc": 0.9437751, "epoch": 4.24184477, "global_step/max_steps": "3771/8890", "percentage": "42.42%", "elapsed_time": "1h 20m 12s", "remaining_time": "1h 48m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783622} {"loss": 0.17482975, "grad_norm": 2.20958138, "learning_rate": 6.635e-05, "token_acc": 0.94186047, "epoch": 4.24296963, "global_step/max_steps": "3772/8890", "percentage": "42.43%", "elapsed_time": "1h 20m 13s", "remaining_time": "1h 48m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783643} {"loss": 0.17792937, "grad_norm": 2.47122383, "learning_rate": 6.633e-05, "token_acc": 0.94789357, "epoch": 4.24409449, "global_step/max_steps": "3773/8890", "percentage": "42.44%", "elapsed_time": "1h 20m 14s", "remaining_time": "1h 48m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783667} {"loss": 0.14914764, "grad_norm": 2.21159983, "learning_rate": 6.632e-05, "token_acc": 0.94844358, "epoch": 4.24521935, "global_step/max_steps": "3774/8890", "percentage": "42.45%", "elapsed_time": "1h 20m 15s", "remaining_time": "1h 48m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783704} {"loss": 0.16301784, "grad_norm": 2.57697082, "learning_rate": 6.63e-05, "token_acc": 0.95128552, "epoch": 4.24634421, "global_step/max_steps": "3775/8890", "percentage": "42.46%", "elapsed_time": "1h 20m 16s", "remaining_time": "1h 48m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783741} {"loss": 0.14661026, "grad_norm": 2.44839883, "learning_rate": 6.628e-05, "token_acc": 0.95646766, "epoch": 4.24746907, "global_step/max_steps": "3776/8890", "percentage": "42.47%", "elapsed_time": "1h 20m 17s", "remaining_time": "1h 48m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783767} {"loss": 0.21187174, "grad_norm": 2.67039704, "learning_rate": 6.626e-05, "token_acc": 0.93, "epoch": 4.24859393, "global_step/max_steps": "3777/8890", "percentage": "42.49%", "elapsed_time": "1h 20m 18s", "remaining_time": "1h 48m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783789} {"loss": 0.15919682, "grad_norm": 3.14940095, "learning_rate": 6.624e-05, "token_acc": 0.93904209, "epoch": 4.24971879, "global_step/max_steps": "3778/8890", "percentage": "42.50%", "elapsed_time": "1h 20m 19s", "remaining_time": "1h 48m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78385} {"loss": 0.1461941, "grad_norm": 2.16218448, "learning_rate": 6.623e-05, "token_acc": 0.95770677, "epoch": 4.25084364, "global_step/max_steps": "3779/8890", "percentage": "42.51%", "elapsed_time": "1h 20m 20s", "remaining_time": "1h 48m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783873} {"loss": 0.21277456, "grad_norm": 2.99582839, "learning_rate": 6.621e-05, "token_acc": 0.92505593, "epoch": 4.2519685, "global_step/max_steps": "3780/8890", "percentage": "42.52%", "elapsed_time": "1h 20m 22s", "remaining_time": "1h 48m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783893} {"loss": 0.13099602, "grad_norm": 2.41288328, "learning_rate": 6.619e-05, "token_acc": 0.95933735, "epoch": 4.25309336, "global_step/max_steps": "3781/8890", "percentage": "42.53%", "elapsed_time": "1h 20m 23s", "remaining_time": "1h 48m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783929} {"loss": 0.19804566, "grad_norm": 3.10412788, "learning_rate": 6.617e-05, "token_acc": 0.92406542, "epoch": 4.25421822, "global_step/max_steps": "3782/8890", "percentage": "42.54%", "elapsed_time": "1h 20m 24s", "remaining_time": "1h 48m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783949} {"loss": 0.19165295, "grad_norm": 2.23398399, "learning_rate": 6.616e-05, "token_acc": 0.93930906, "epoch": 4.25534308, "global_step/max_steps": "3783/8890", "percentage": "42.55%", "elapsed_time": "1h 20m 25s", "remaining_time": "1h 48m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783965} {"loss": 0.18799713, "grad_norm": 2.70426917, "learning_rate": 6.614e-05, "token_acc": 0.93812155, "epoch": 4.25646794, "global_step/max_steps": "3784/8890", "percentage": "42.56%", "elapsed_time": "1h 20m 26s", "remaining_time": "1h 48m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783985} {"loss": 0.17609914, "grad_norm": 3.11627078, "learning_rate": 6.612e-05, "token_acc": 0.94453507, "epoch": 4.2575928, "global_step/max_steps": "3785/8890", "percentage": "42.58%", "elapsed_time": "1h 20m 27s", "remaining_time": "1h 48m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.783978} {"loss": 0.15905532, "grad_norm": 2.39323187, "learning_rate": 6.61e-05, "token_acc": 0.94505495, "epoch": 4.25871766, "global_step/max_steps": "3786/8890", "percentage": "42.59%", "elapsed_time": "1h 20m 28s", "remaining_time": "1h 48m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784015} {"loss": 0.15581217, "grad_norm": 2.32726574, "learning_rate": 6.609e-05, "token_acc": 0.95483193, "epoch": 4.25984252, "global_step/max_steps": "3787/8890", "percentage": "42.60%", "elapsed_time": "1h 20m 30s", "remaining_time": "1h 48m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784034} {"loss": 0.11764111, "grad_norm": 1.98147964, "learning_rate": 6.607e-05, "token_acc": 0.96270854, "epoch": 4.26096738, "global_step/max_steps": "3788/8890", "percentage": "42.61%", "elapsed_time": "1h 20m 31s", "remaining_time": "1h 48m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78405} {"loss": 0.18660519, "grad_norm": 2.68074679, "learning_rate": 6.605e-05, "token_acc": 0.95604396, "epoch": 4.26209224, "global_step/max_steps": "3789/8890", "percentage": "42.62%", "elapsed_time": "1h 20m 32s", "remaining_time": "1h 48m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784073} {"loss": 0.12984806, "grad_norm": 2.32569623, "learning_rate": 6.603e-05, "token_acc": 0.96502385, "epoch": 4.2632171, "global_step/max_steps": "3790/8890", "percentage": "42.63%", "elapsed_time": "1h 20m 33s", "remaining_time": "1h 48m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784114} {"loss": 0.16436914, "grad_norm": 2.40488386, "learning_rate": 6.602e-05, "token_acc": 0.94606742, "epoch": 4.26434196, "global_step/max_steps": "3791/8890", "percentage": "42.64%", "elapsed_time": "1h 20m 34s", "remaining_time": "1h 48m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784137} {"loss": 0.188278, "grad_norm": 2.35870337, "learning_rate": 6.6e-05, "token_acc": 0.94438615, "epoch": 4.26546682, "global_step/max_steps": "3792/8890", "percentage": "42.65%", "elapsed_time": "1h 20m 35s", "remaining_time": "1h 48m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784153} {"loss": 0.13120241, "grad_norm": 2.13528585, "learning_rate": 6.598e-05, "token_acc": 0.9609375, "epoch": 4.26659168, "global_step/max_steps": "3793/8890", "percentage": "42.67%", "elapsed_time": "1h 20m 36s", "remaining_time": "1h 48m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784232} {"loss": 0.18227297, "grad_norm": 2.92764854, "learning_rate": 6.596e-05, "token_acc": 0.94563843, "epoch": 4.26771654, "global_step/max_steps": "3794/8890", "percentage": "42.68%", "elapsed_time": "1h 20m 37s", "remaining_time": "1h 48m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784258} {"loss": 0.16016515, "grad_norm": 2.27923608, "learning_rate": 6.595e-05, "token_acc": 0.94711538, "epoch": 4.26884139, "global_step/max_steps": "3795/8890", "percentage": "42.69%", "elapsed_time": "1h 20m 38s", "remaining_time": "1h 48m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784279} {"loss": 0.19379288, "grad_norm": 3.22098351, "learning_rate": 6.593e-05, "token_acc": 0.93854167, "epoch": 4.26996625, "global_step/max_steps": "3796/8890", "percentage": "42.70%", "elapsed_time": "1h 20m 39s", "remaining_time": "1h 48m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784302} {"loss": 0.15635987, "grad_norm": 2.62961221, "learning_rate": 6.591e-05, "token_acc": 0.94778481, "epoch": 4.27109111, "global_step/max_steps": "3797/8890", "percentage": "42.71%", "elapsed_time": "1h 20m 41s", "remaining_time": "1h 48m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784322} {"loss": 0.19253403, "grad_norm": 2.84180069, "learning_rate": 6.589e-05, "token_acc": 0.93399015, "epoch": 4.27221597, "global_step/max_steps": "3798/8890", "percentage": "42.72%", "elapsed_time": "1h 20m 42s", "remaining_time": "1h 48m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784341} {"loss": 0.21907192, "grad_norm": 3.01542425, "learning_rate": 6.587e-05, "token_acc": 0.93299621, "epoch": 4.27334083, "global_step/max_steps": "3799/8890", "percentage": "42.73%", "elapsed_time": "1h 20m 43s", "remaining_time": "1h 48m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784367} {"loss": 0.15507913, "grad_norm": 2.03937888, "learning_rate": 6.586e-05, "token_acc": 0.95100402, "epoch": 4.27446569, "global_step/max_steps": "3800/8890", "percentage": "42.74%", "elapsed_time": "1h 20m 44s", "remaining_time": "1h 48m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784381} {"loss": 0.13058093, "grad_norm": 2.32067037, "learning_rate": 6.584e-05, "token_acc": 0.9600432, "epoch": 4.27559055, "global_step/max_steps": "3801/8890", "percentage": "42.76%", "elapsed_time": "1h 20m 45s", "remaining_time": "1h 48m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784398} {"loss": 0.22212881, "grad_norm": 2.67743301, "learning_rate": 6.582e-05, "token_acc": 0.94222689, "epoch": 4.27671541, "global_step/max_steps": "3802/8890", "percentage": "42.77%", "elapsed_time": "1h 20m 46s", "remaining_time": "1h 48m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784417} {"loss": 0.18040678, "grad_norm": 2.26020622, "learning_rate": 6.58e-05, "token_acc": 0.93890675, "epoch": 4.27784027, "global_step/max_steps": "3803/8890", "percentage": "42.78%", "elapsed_time": "1h 20m 48s", "remaining_time": "1h 48m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784439} {"loss": 0.17671229, "grad_norm": 2.51738667, "learning_rate": 6.579e-05, "token_acc": 0.94736842, "epoch": 4.27896513, "global_step/max_steps": "3804/8890", "percentage": "42.79%", "elapsed_time": "1h 20m 49s", "remaining_time": "1h 48m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784462} {"loss": 0.16029578, "grad_norm": 2.06205869, "learning_rate": 6.577e-05, "token_acc": 0.94459339, "epoch": 4.28008999, "global_step/max_steps": "3805/8890", "percentage": "42.80%", "elapsed_time": "1h 20m 50s", "remaining_time": "1h 48m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784481} {"loss": 0.16204292, "grad_norm": 2.75296545, "learning_rate": 6.575e-05, "token_acc": 0.94472876, "epoch": 4.28121485, "global_step/max_steps": "3806/8890", "percentage": "42.81%", "elapsed_time": "1h 20m 51s", "remaining_time": "1h 48m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784503} {"loss": 0.18876925, "grad_norm": 2.5979619, "learning_rate": 6.573e-05, "token_acc": 0.93389991, "epoch": 4.28233971, "global_step/max_steps": "3807/8890", "percentage": "42.82%", "elapsed_time": "1h 20m 52s", "remaining_time": "1h 47m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784527} {"loss": 0.16365108, "grad_norm": 2.68691468, "learning_rate": 6.572e-05, "token_acc": 0.95393759, "epoch": 4.28346457, "global_step/max_steps": "3808/8890", "percentage": "42.83%", "elapsed_time": "1h 20m 53s", "remaining_time": "1h 47m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784566} {"loss": 0.15854678, "grad_norm": 2.61681724, "learning_rate": 6.57e-05, "token_acc": 0.94271482, "epoch": 4.28458943, "global_step/max_steps": "3809/8890", "percentage": "42.85%", "elapsed_time": "1h 20m 54s", "remaining_time": "1h 47m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784588} {"loss": 0.15585278, "grad_norm": 3.08291364, "learning_rate": 6.568e-05, "token_acc": 0.94973545, "epoch": 4.28571429, "global_step/max_steps": "3810/8890", "percentage": "42.86%", "elapsed_time": "1h 20m 55s", "remaining_time": "1h 47m 54s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784623} {"loss": 0.20356306, "grad_norm": 3.13182735, "learning_rate": 6.566e-05, "token_acc": 0.93932322, "epoch": 4.28683915, "global_step/max_steps": "3811/8890", "percentage": "42.87%", "elapsed_time": "1h 20m 56s", "remaining_time": "1h 47m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784648} {"loss": 0.22870979, "grad_norm": 2.77021265, "learning_rate": 6.565e-05, "token_acc": 0.9245648, "epoch": 4.287964, "global_step/max_steps": "3812/8890", "percentage": "42.88%", "elapsed_time": "1h 20m 58s", "remaining_time": "1h 47m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784669} {"loss": 0.12851916, "grad_norm": 2.3713758, "learning_rate": 6.563e-05, "token_acc": 0.95865237, "epoch": 4.28908886, "global_step/max_steps": "3813/8890", "percentage": "42.89%", "elapsed_time": "1h 20m 59s", "remaining_time": "1h 47m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784693} {"loss": 0.17731604, "grad_norm": 2.35060263, "learning_rate": 6.561e-05, "token_acc": 0.93901036, "epoch": 4.29021372, "global_step/max_steps": "3814/8890", "percentage": "42.90%", "elapsed_time": "1h 21m 0s", "remaining_time": "1h 47m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784719} {"loss": 0.16323, "grad_norm": 2.54615855, "learning_rate": 6.559e-05, "token_acc": 0.94240196, "epoch": 4.29133858, "global_step/max_steps": "3815/8890", "percentage": "42.91%", "elapsed_time": "1h 21m 1s", "remaining_time": "1h 47m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784738} {"loss": 0.15531872, "grad_norm": 3.06977272, "learning_rate": 6.557e-05, "token_acc": 0.95245399, "epoch": 4.29246344, "global_step/max_steps": "3816/8890", "percentage": "42.92%", "elapsed_time": "1h 21m 2s", "remaining_time": "1h 47m 45s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784776} {"loss": 0.17083098, "grad_norm": 2.34168458, "learning_rate": 6.556e-05, "token_acc": 0.94917012, "epoch": 4.2935883, "global_step/max_steps": "3817/8890", "percentage": "42.94%", "elapsed_time": "1h 21m 3s", "remaining_time": "1h 47m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784811} {"loss": 0.16759342, "grad_norm": 2.63289785, "learning_rate": 6.554e-05, "token_acc": 0.95071194, "epoch": 4.29471316, "global_step/max_steps": "3818/8890", "percentage": "42.95%", "elapsed_time": "1h 21m 4s", "remaining_time": "1h 47m 42s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784833} {"loss": 0.12455223, "grad_norm": 2.46383572, "learning_rate": 6.552e-05, "token_acc": 0.9640045, "epoch": 4.29583802, "global_step/max_steps": "3819/8890", "percentage": "42.96%", "elapsed_time": "1h 21m 5s", "remaining_time": "1h 47m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784849} {"loss": 0.13324693, "grad_norm": 2.17766857, "learning_rate": 6.55e-05, "token_acc": 0.96436526, "epoch": 4.29696288, "global_step/max_steps": "3820/8890", "percentage": "42.97%", "elapsed_time": "1h 21m 6s", "remaining_time": "1h 47m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784883} {"loss": 0.11534175, "grad_norm": 2.23651266, "learning_rate": 6.549e-05, "token_acc": 0.96239554, "epoch": 4.29808774, "global_step/max_steps": "3821/8890", "percentage": "42.98%", "elapsed_time": "1h 21m 8s", "remaining_time": "1h 47m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784908} {"loss": 0.13815117, "grad_norm": 2.11274958, "learning_rate": 6.547e-05, "token_acc": 0.95884316, "epoch": 4.2992126, "global_step/max_steps": "3822/8890", "percentage": "42.99%", "elapsed_time": "1h 21m 9s", "remaining_time": "1h 47m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784931} {"loss": 0.17231014, "grad_norm": 2.21766949, "learning_rate": 6.545e-05, "token_acc": 0.93797791, "epoch": 4.30033746, "global_step/max_steps": "3823/8890", "percentage": "43.00%", "elapsed_time": "1h 21m 10s", "remaining_time": "1h 47m 35s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784921} {"loss": 0.18668267, "grad_norm": 2.27034688, "learning_rate": 6.543e-05, "token_acc": 0.94686907, "epoch": 4.30146232, "global_step/max_steps": "3824/8890", "percentage": "43.01%", "elapsed_time": "1h 21m 11s", "remaining_time": "1h 47m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784938} {"loss": 0.20725088, "grad_norm": 3.01785994, "learning_rate": 6.542e-05, "token_acc": 0.94647202, "epoch": 4.30258718, "global_step/max_steps": "3825/8890", "percentage": "43.03%", "elapsed_time": "1h 21m 12s", "remaining_time": "1h 47m 32s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784963} {"loss": 0.19018003, "grad_norm": 2.7153759, "learning_rate": 6.54e-05, "token_acc": 0.9377551, "epoch": 4.30371204, "global_step/max_steps": "3826/8890", "percentage": "43.04%", "elapsed_time": "1h 21m 13s", "remaining_time": "1h 47m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784982} {"loss": 0.27119285, "grad_norm": 3.29054332, "learning_rate": 6.538e-05, "token_acc": 0.92159228, "epoch": 4.3048369, "global_step/max_steps": "3827/8890", "percentage": "43.05%", "elapsed_time": "1h 21m 15s", "remaining_time": "1h 47m 29s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.784999} {"loss": 0.16022579, "grad_norm": 2.31590962, "learning_rate": 6.536e-05, "token_acc": 0.94994786, "epoch": 4.30596175, "global_step/max_steps": "3828/8890", "percentage": "43.06%", "elapsed_time": "1h 21m 16s", "remaining_time": "1h 47m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785015} {"loss": 0.14814012, "grad_norm": 2.71977568, "learning_rate": 6.534e-05, "token_acc": 0.95203837, "epoch": 4.30708661, "global_step/max_steps": "3829/8890", "percentage": "43.07%", "elapsed_time": "1h 21m 17s", "remaining_time": "1h 47m 26s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78503} {"loss": 0.13231419, "grad_norm": 2.62882876, "learning_rate": 6.533e-05, "token_acc": 0.96328671, "epoch": 4.30821147, "global_step/max_steps": "3830/8890", "percentage": "43.08%", "elapsed_time": "1h 21m 18s", "remaining_time": "1h 47m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785045} {"loss": 0.15709998, "grad_norm": 2.42589545, "learning_rate": 6.531e-05, "token_acc": 0.95512083, "epoch": 4.30933633, "global_step/max_steps": "3831/8890", "percentage": "43.09%", "elapsed_time": "1h 21m 19s", "remaining_time": "1h 47m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785045} {"loss": 0.16182742, "grad_norm": 3.07081079, "learning_rate": 6.529e-05, "token_acc": 0.9469697, "epoch": 4.31046119, "global_step/max_steps": "3832/8890", "percentage": "43.10%", "elapsed_time": "1h 21m 21s", "remaining_time": "1h 47m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785067} {"loss": 0.16653432, "grad_norm": 2.67117238, "learning_rate": 6.527e-05, "token_acc": 0.95228216, "epoch": 4.31158605, "global_step/max_steps": "3833/8890", "percentage": "43.12%", "elapsed_time": "1h 21m 22s", "remaining_time": "1h 47m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785085} {"loss": 0.15041131, "grad_norm": 2.40141606, "learning_rate": 6.526e-05, "token_acc": 0.94960806, "epoch": 4.31271091, "global_step/max_steps": "3834/8890", "percentage": "43.13%", "elapsed_time": "1h 21m 23s", "remaining_time": "1h 47m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785107} {"loss": 0.23698142, "grad_norm": 3.08076096, "learning_rate": 6.524e-05, "token_acc": 0.9301701, "epoch": 4.31383577, "global_step/max_steps": "3835/8890", "percentage": "43.14%", "elapsed_time": "1h 21m 24s", "remaining_time": "1h 47m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785097} {"loss": 0.17641319, "grad_norm": 2.84788966, "learning_rate": 6.522e-05, "token_acc": 0.94444444, "epoch": 4.31496063, "global_step/max_steps": "3836/8890", "percentage": "43.15%", "elapsed_time": "1h 21m 25s", "remaining_time": "1h 47m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785135} {"loss": 0.19962165, "grad_norm": 2.85938835, "learning_rate": 6.52e-05, "token_acc": 0.92764579, "epoch": 4.31608549, "global_step/max_steps": "3837/8890", "percentage": "43.16%", "elapsed_time": "1h 21m 26s", "remaining_time": "1h 47m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785197} {"loss": 0.20705657, "grad_norm": 2.65152669, "learning_rate": 6.519e-05, "token_acc": 0.93708609, "epoch": 4.31721035, "global_step/max_steps": "3838/8890", "percentage": "43.17%", "elapsed_time": "1h 21m 27s", "remaining_time": "1h 47m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78525} {"loss": 0.16633715, "grad_norm": 2.38638163, "learning_rate": 6.517e-05, "token_acc": 0.94534413, "epoch": 4.31833521, "global_step/max_steps": "3839/8890", "percentage": "43.18%", "elapsed_time": "1h 21m 28s", "remaining_time": "1h 47m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785272} {"loss": 0.19309467, "grad_norm": 2.72057366, "learning_rate": 6.515e-05, "token_acc": 0.93485714, "epoch": 4.31946007, "global_step/max_steps": "3840/8890", "percentage": "43.19%", "elapsed_time": "1h 21m 29s", "remaining_time": "1h 47m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785312} {"loss": 0.19865012, "grad_norm": 2.39007378, "learning_rate": 6.513e-05, "token_acc": 0.94485684, "epoch": 4.32058493, "global_step/max_steps": "3841/8890", "percentage": "43.21%", "elapsed_time": "1h 21m 30s", "remaining_time": "1h 47m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785334} {"loss": 0.13508952, "grad_norm": 1.86426616, "learning_rate": 6.511e-05, "token_acc": 0.95786517, "epoch": 4.32170979, "global_step/max_steps": "3842/8890", "percentage": "43.22%", "elapsed_time": "1h 21m 31s", "remaining_time": "1h 47m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785391} {"loss": 0.17330125, "grad_norm": 2.55394816, "learning_rate": 6.51e-05, "token_acc": 0.94234801, "epoch": 4.32283465, "global_step/max_steps": "3843/8890", "percentage": "43.23%", "elapsed_time": "1h 21m 32s", "remaining_time": "1h 47m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785415} {"loss": 0.1820707, "grad_norm": 2.44065285, "learning_rate": 6.508e-05, "token_acc": 0.93583416, "epoch": 4.32395951, "global_step/max_steps": "3844/8890", "percentage": "43.24%", "elapsed_time": "1h 21m 34s", "remaining_time": "1h 47m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785435} {"loss": 0.22415206, "grad_norm": 2.46555018, "learning_rate": 6.506e-05, "token_acc": 0.93569432, "epoch": 4.32508436, "global_step/max_steps": "3845/8890", "percentage": "43.25%", "elapsed_time": "1h 21m 35s", "remaining_time": "1h 47m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785457} {"loss": 0.18477339, "grad_norm": 2.64894509, "learning_rate": 6.504e-05, "token_acc": 0.94257179, "epoch": 4.32620922, "global_step/max_steps": "3846/8890", "percentage": "43.26%", "elapsed_time": "1h 21m 36s", "remaining_time": "1h 47m 1s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785476} {"loss": 0.14028382, "grad_norm": 2.23571038, "learning_rate": 6.503e-05, "token_acc": 0.96202532, "epoch": 4.32733408, "global_step/max_steps": "3847/8890", "percentage": "43.27%", "elapsed_time": "1h 21m 37s", "remaining_time": "1h 47m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785501} {"loss": 0.11133818, "grad_norm": 1.91541839, "learning_rate": 6.501e-05, "token_acc": 0.96206897, "epoch": 4.32845894, "global_step/max_steps": "3848/8890", "percentage": "43.28%", "elapsed_time": "1h 21m 38s", "remaining_time": "1h 46m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785525} {"loss": 0.20186867, "grad_norm": 2.86584902, "learning_rate": 6.499e-05, "token_acc": 0.93411265, "epoch": 4.3295838, "global_step/max_steps": "3849/8890", "percentage": "43.30%", "elapsed_time": "1h 21m 39s", "remaining_time": "1h 46m 57s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785557} {"loss": 0.18602598, "grad_norm": 2.44098735, "learning_rate": 6.497e-05, "token_acc": 0.94651163, "epoch": 4.33070866, "global_step/max_steps": "3850/8890", "percentage": "43.31%", "elapsed_time": "1h 21m 40s", "remaining_time": "1h 46m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785579} {"loss": 0.16577689, "grad_norm": 2.39786863, "learning_rate": 6.495e-05, "token_acc": 0.94899044, "epoch": 4.33183352, "global_step/max_steps": "3851/8890", "percentage": "43.32%", "elapsed_time": "1h 21m 41s", "remaining_time": "1h 46m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785628} {"loss": 0.1529358, "grad_norm": 2.33649278, "learning_rate": 6.494e-05, "token_acc": 0.94492441, "epoch": 4.33295838, "global_step/max_steps": "3852/8890", "percentage": "43.33%", "elapsed_time": "1h 21m 42s", "remaining_time": "1h 46m 52s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78565} {"loss": 0.15446985, "grad_norm": 3.39270306, "learning_rate": 6.492e-05, "token_acc": 0.9399684, "epoch": 4.33408324, "global_step/max_steps": "3853/8890", "percentage": "43.34%", "elapsed_time": "1h 21m 44s", "remaining_time": "1h 46m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785677} {"loss": 0.161623, "grad_norm": 2.61494493, "learning_rate": 6.49e-05, "token_acc": 0.9462486, "epoch": 4.3352081, "global_step/max_steps": "3854/8890", "percentage": "43.35%", "elapsed_time": "1h 21m 45s", "remaining_time": "1h 46m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785675} {"loss": 0.25905621, "grad_norm": 3.29611397, "learning_rate": 6.488e-05, "token_acc": 0.92390012, "epoch": 4.33633296, "global_step/max_steps": "3855/8890", "percentage": "43.36%", "elapsed_time": "1h 21m 46s", "remaining_time": "1h 46m 48s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785734} {"loss": 0.19821562, "grad_norm": 2.83998513, "learning_rate": 6.487e-05, "token_acc": 0.94223363, "epoch": 4.33745782, "global_step/max_steps": "3856/8890", "percentage": "43.37%", "elapsed_time": "1h 21m 47s", "remaining_time": "1h 46m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785776} {"loss": 0.23703006, "grad_norm": 3.48612309, "learning_rate": 6.485e-05, "token_acc": 0.92729306, "epoch": 4.33858268, "global_step/max_steps": "3857/8890", "percentage": "43.39%", "elapsed_time": "1h 21m 48s", "remaining_time": "1h 46m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785801} {"loss": 0.15230459, "grad_norm": 2.40041804, "learning_rate": 6.483e-05, "token_acc": 0.94512878, "epoch": 4.33970754, "global_step/max_steps": "3858/8890", "percentage": "43.40%", "elapsed_time": "1h 21m 49s", "remaining_time": "1h 46m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785839} {"loss": 0.17554176, "grad_norm": 2.61082959, "learning_rate": 6.481e-05, "token_acc": 0.94404145, "epoch": 4.3408324, "global_step/max_steps": "3859/8890", "percentage": "43.41%", "elapsed_time": "1h 21m 50s", "remaining_time": "1h 46m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785861} {"loss": 0.1239787, "grad_norm": 2.18497419, "learning_rate": 6.479e-05, "token_acc": 0.96575342, "epoch": 4.34195726, "global_step/max_steps": "3860/8890", "percentage": "43.42%", "elapsed_time": "1h 21m 51s", "remaining_time": "1h 46m 40s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785881} {"loss": 0.1775946, "grad_norm": 2.56697154, "learning_rate": 6.478e-05, "token_acc": 0.946, "epoch": 4.34308211, "global_step/max_steps": "3861/8890", "percentage": "43.43%", "elapsed_time": "1h 21m 52s", "remaining_time": "1h 46m 39s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785903} {"loss": 0.1931437, "grad_norm": 2.4768815, "learning_rate": 6.476e-05, "token_acc": 0.94809322, "epoch": 4.34420697, "global_step/max_steps": "3862/8890", "percentage": "43.44%", "elapsed_time": "1h 21m 54s", "remaining_time": "1h 46m 37s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785879} {"loss": 0.18494165, "grad_norm": 3.3243773, "learning_rate": 6.474e-05, "token_acc": 0.93262411, "epoch": 4.34533183, "global_step/max_steps": "3863/8890", "percentage": "43.45%", "elapsed_time": "1h 21m 55s", "remaining_time": "1h 46m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.7859} {"loss": 0.24111228, "grad_norm": 3.27001071, "learning_rate": 6.472e-05, "token_acc": 0.92479436, "epoch": 4.34645669, "global_step/max_steps": "3864/8890", "percentage": "43.46%", "elapsed_time": "1h 21m 56s", "remaining_time": "1h 46m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785942} {"loss": 0.25509247, "grad_norm": 3.015481, "learning_rate": 6.471e-05, "token_acc": 0.92833147, "epoch": 4.34758155, "global_step/max_steps": "3865/8890", "percentage": "43.48%", "elapsed_time": "1h 21m 57s", "remaining_time": "1h 46m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.785966} {"loss": 0.17958069, "grad_norm": 2.40106797, "learning_rate": 6.469e-05, "token_acc": 0.93552169, "epoch": 4.34870641, "global_step/max_steps": "3866/8890", "percentage": "43.49%", "elapsed_time": "1h 21m 58s", "remaining_time": "1h 46m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786002} {"loss": 0.18791002, "grad_norm": 2.31112933, "learning_rate": 6.467e-05, "token_acc": 0.9402823, "epoch": 4.34983127, "global_step/max_steps": "3867/8890", "percentage": "43.50%", "elapsed_time": "1h 21m 59s", "remaining_time": "1h 46m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786019} {"loss": 0.18226908, "grad_norm": 2.2549057, "learning_rate": 6.465e-05, "token_acc": 0.93944791, "epoch": 4.35095613, "global_step/max_steps": "3868/8890", "percentage": "43.51%", "elapsed_time": "1h 22m 0s", "remaining_time": "1h 46m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786051} {"loss": 0.29418999, "grad_norm": 2.92907357, "learning_rate": 6.463e-05, "token_acc": 0.9103512, "epoch": 4.35208099, "global_step/max_steps": "3869/8890", "percentage": "43.52%", "elapsed_time": "1h 22m 1s", "remaining_time": "1h 46m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786095} {"loss": 0.15127511, "grad_norm": 3.38175297, "learning_rate": 6.462e-05, "token_acc": 0.94354839, "epoch": 4.35320585, "global_step/max_steps": "3870/8890", "percentage": "43.53%", "elapsed_time": "1h 22m 2s", "remaining_time": "1h 46m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786128} {"loss": 0.24954432, "grad_norm": 2.66535234, "learning_rate": 6.46e-05, "token_acc": 0.92255566, "epoch": 4.35433071, "global_step/max_steps": "3871/8890", "percentage": "43.54%", "elapsed_time": "1h 22m 3s", "remaining_time": "1h 46m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786159} {"loss": 0.20642117, "grad_norm": 2.28744125, "learning_rate": 6.458e-05, "token_acc": 0.93769152, "epoch": 4.35545557, "global_step/max_steps": "3872/8890", "percentage": "43.55%", "elapsed_time": "1h 22m 5s", "remaining_time": "1h 46m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786179} {"loss": 0.19572857, "grad_norm": 2.75373244, "learning_rate": 6.456e-05, "token_acc": 0.94038929, "epoch": 4.35658043, "global_step/max_steps": "3873/8890", "percentage": "43.57%", "elapsed_time": "1h 22m 6s", "remaining_time": "1h 46m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78623} {"loss": 0.18024796, "grad_norm": 2.98681211, "learning_rate": 6.455e-05, "token_acc": 0.94180704, "epoch": 4.35770529, "global_step/max_steps": "3874/8890", "percentage": "43.58%", "elapsed_time": "1h 22m 7s", "remaining_time": "1h 46m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786265} {"loss": 0.16139363, "grad_norm": 2.18918467, "learning_rate": 6.453e-05, "token_acc": 0.95008606, "epoch": 4.35883015, "global_step/max_steps": "3875/8890", "percentage": "43.59%", "elapsed_time": "1h 22m 8s", "remaining_time": "1h 46m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786282} {"loss": 0.0925973, "grad_norm": 1.64754331, "learning_rate": 6.451e-05, "token_acc": 0.96749729, "epoch": 4.35995501, "global_step/max_steps": "3876/8890", "percentage": "43.60%", "elapsed_time": "1h 22m 9s", "remaining_time": "1h 46m 16s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786279} {"loss": 0.15567935, "grad_norm": 2.66103625, "learning_rate": 6.449e-05, "token_acc": 0.94827586, "epoch": 4.36107987, "global_step/max_steps": "3877/8890", "percentage": "43.61%", "elapsed_time": "1h 22m 10s", "remaining_time": "1h 46m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786296} {"loss": 0.14552036, "grad_norm": 2.28318357, "learning_rate": 6.447e-05, "token_acc": 0.95317726, "epoch": 4.36220472, "global_step/max_steps": "3878/8890", "percentage": "43.62%", "elapsed_time": "1h 22m 11s", "remaining_time": "1h 46m 14s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786319} {"loss": 0.12041246, "grad_norm": 1.96567297, "learning_rate": 6.446e-05, "token_acc": 0.95828636, "epoch": 4.36332958, "global_step/max_steps": "3879/8890", "percentage": "43.63%", "elapsed_time": "1h 22m 12s", "remaining_time": "1h 46m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786342} {"loss": 0.23866095, "grad_norm": 2.69341516, "learning_rate": 6.444e-05, "token_acc": 0.92465753, "epoch": 4.36445444, "global_step/max_steps": "3880/8890", "percentage": "43.64%", "elapsed_time": "1h 22m 14s", "remaining_time": "1h 46m 11s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786328} {"loss": 0.18291384, "grad_norm": 2.73186064, "learning_rate": 6.442e-05, "token_acc": 0.94715447, "epoch": 4.3655793, "global_step/max_steps": "3881/8890", "percentage": "43.66%", "elapsed_time": "1h 22m 15s", "remaining_time": "1h 46m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78638} {"loss": 0.23513146, "grad_norm": 3.42270398, "learning_rate": 6.44e-05, "token_acc": 0.91282051, "epoch": 4.36670416, "global_step/max_steps": "3882/8890", "percentage": "43.67%", "elapsed_time": "1h 22m 16s", "remaining_time": "1h 46m 8s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786401} {"loss": 0.18233673, "grad_norm": 2.89941907, "learning_rate": 6.439e-05, "token_acc": 0.93705293, "epoch": 4.36782902, "global_step/max_steps": "3883/8890", "percentage": "43.68%", "elapsed_time": "1h 22m 17s", "remaining_time": "1h 46m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786435} {"loss": 0.23381762, "grad_norm": 2.95323038, "learning_rate": 6.437e-05, "token_acc": 0.92925659, "epoch": 4.36895388, "global_step/max_steps": "3884/8890", "percentage": "43.69%", "elapsed_time": "1h 22m 18s", "remaining_time": "1h 46m 5s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786457} {"loss": 0.12332456, "grad_norm": 2.37065101, "learning_rate": 6.435e-05, "token_acc": 0.96223317, "epoch": 4.37007874, "global_step/max_steps": "3885/8890", "percentage": "43.70%", "elapsed_time": "1h 22m 19s", "remaining_time": "1h 46m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786495} {"loss": 0.10530294, "grad_norm": 1.9957906, "learning_rate": 6.433e-05, "token_acc": 0.97292994, "epoch": 4.3712036, "global_step/max_steps": "3886/8890", "percentage": "43.71%", "elapsed_time": "1h 22m 20s", "remaining_time": "1h 46m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786533} {"loss": 0.17466134, "grad_norm": 2.221174, "learning_rate": 6.431e-05, "token_acc": 0.94749795, "epoch": 4.37232846, "global_step/max_steps": "3887/8890", "percentage": "43.72%", "elapsed_time": "1h 22m 21s", "remaining_time": "1h 46m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786525} {"loss": 0.16015306, "grad_norm": 2.3391006, "learning_rate": 6.43e-05, "token_acc": 0.9520897, "epoch": 4.37345332, "global_step/max_steps": "3888/8890", "percentage": "43.73%", "elapsed_time": "1h 22m 23s", "remaining_time": "1h 45m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786542} {"loss": 0.19475642, "grad_norm": 2.75808072, "learning_rate": 6.428e-05, "token_acc": 0.92919255, "epoch": 4.37457818, "global_step/max_steps": "3889/8890", "percentage": "43.75%", "elapsed_time": "1h 22m 24s", "remaining_time": "1h 45m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786561} {"loss": 0.15256672, "grad_norm": 2.36616087, "learning_rate": 6.426e-05, "token_acc": 0.94646681, "epoch": 4.37570304, "global_step/max_steps": "3890/8890", "percentage": "43.76%", "elapsed_time": "1h 22m 25s", "remaining_time": "1h 45m 56s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786596} {"loss": 0.17991798, "grad_norm": 2.3419838, "learning_rate": 6.424e-05, "token_acc": 0.94329897, "epoch": 4.3768279, "global_step/max_steps": "3891/8890", "percentage": "43.77%", "elapsed_time": "1h 22m 26s", "remaining_time": "1h 45m 55s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786597} {"loss": 0.15760596, "grad_norm": 2.47845435, "learning_rate": 6.423e-05, "token_acc": 0.94275701, "epoch": 4.37795276, "global_step/max_steps": "3892/8890", "percentage": "43.78%", "elapsed_time": "1h 22m 27s", "remaining_time": "1h 45m 53s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786632} {"loss": 0.17719755, "grad_norm": 3.0852344, "learning_rate": 6.421e-05, "token_acc": 0.94405594, "epoch": 4.37907762, "global_step/max_steps": "3893/8890", "percentage": "43.79%", "elapsed_time": "1h 22m 28s", "remaining_time": "1h 45m 51s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786682} {"loss": 0.22081862, "grad_norm": 3.49530315, "learning_rate": 6.419e-05, "token_acc": 0.92134831, "epoch": 4.38020247, "global_step/max_steps": "3894/8890", "percentage": "43.80%", "elapsed_time": "1h 22m 29s", "remaining_time": "1h 45m 50s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786707} {"loss": 0.13393615, "grad_norm": 2.00028777, "learning_rate": 6.417e-05, "token_acc": 0.95440415, "epoch": 4.38132733, "global_step/max_steps": "3895/8890", "percentage": "43.81%", "elapsed_time": "1h 22m 30s", "remaining_time": "1h 45m 49s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786732} {"loss": 0.23036566, "grad_norm": 2.41726494, "learning_rate": 6.415e-05, "token_acc": 0.92951128, "epoch": 4.38245219, "global_step/max_steps": "3896/8890", "percentage": "43.82%", "elapsed_time": "1h 22m 32s", "remaining_time": "1h 45m 47s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786747} {"loss": 0.17510152, "grad_norm": 2.66208744, "learning_rate": 6.414e-05, "token_acc": 0.94371257, "epoch": 4.38357705, "global_step/max_steps": "3897/8890", "percentage": "43.84%", "elapsed_time": "1h 22m 33s", "remaining_time": "1h 45m 46s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786764} {"loss": 0.1896131, "grad_norm": 3.0056088, "learning_rate": 6.412e-05, "token_acc": 0.93791281, "epoch": 4.38470191, "global_step/max_steps": "3898/8890", "percentage": "43.85%", "elapsed_time": "1h 22m 34s", "remaining_time": "1h 45m 44s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786788} {"loss": 0.15832782, "grad_norm": 2.55949497, "learning_rate": 6.41e-05, "token_acc": 0.9437751, "epoch": 4.38582677, "global_step/max_steps": "3899/8890", "percentage": "43.86%", "elapsed_time": "1h 22m 35s", "remaining_time": "1h 45m 43s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786823} {"loss": 0.14649136, "grad_norm": 2.46616411, "learning_rate": 6.408e-05, "token_acc": 0.95744681, "epoch": 4.38695163, "global_step/max_steps": "3900/8890", "percentage": "43.87%", "elapsed_time": "1h 22m 36s", "remaining_time": "1h 45m 41s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.786861} {"eval_loss": 1.29416406, "eval_runtime": 31.6586, "eval_samples_per_second": 25.364, "eval_steps_per_second": 3.19, "eval_token_acc": 0.73075989, "epoch": 4.38695163, "global_step/max_steps": "3900/8890", "percentage": "43.87%", "elapsed_time": "1h 23m 8s", "remaining_time": "1h 46m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.781866} {"loss": 0.19394794, "grad_norm": 2.68548632, "learning_rate": 6.406e-05, "token_acc": 0.93711968, "epoch": 4.38807649, "global_step/max_steps": "3901/8890", "percentage": "43.88%", "elapsed_time": "1h 23m 22s", "remaining_time": "1h 46m 38s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779744} {"loss": 0.17563194, "grad_norm": 2.40176392, "learning_rate": 6.405e-05, "token_acc": 0.9375, "epoch": 4.38920135, "global_step/max_steps": "3902/8890", "percentage": "43.89%", "elapsed_time": "1h 23m 23s", "remaining_time": "1h 46m 36s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779781} {"loss": 0.16521817, "grad_norm": 2.46724916, "learning_rate": 6.403e-05, "token_acc": 0.94419134, "epoch": 4.39032621, "global_step/max_steps": "3903/8890", "percentage": "43.90%", "elapsed_time": "1h 23m 24s", "remaining_time": "1h 46m 34s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779841} {"loss": 0.11506025, "grad_norm": 2.30787706, "learning_rate": 6.401e-05, "token_acc": 0.96995708, "epoch": 4.39145107, "global_step/max_steps": "3904/8890", "percentage": "43.91%", "elapsed_time": "1h 23m 25s", "remaining_time": "1h 46m 33s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779867} {"loss": 0.19502945, "grad_norm": 2.62524462, "learning_rate": 6.399e-05, "token_acc": 0.94300518, "epoch": 4.39257593, "global_step/max_steps": "3905/8890", "percentage": "43.93%", "elapsed_time": "1h 23m 27s", "remaining_time": "1h 46m 31s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.77989} {"loss": 0.18861078, "grad_norm": 3.32840824, "learning_rate": 6.398e-05, "token_acc": 0.93121019, "epoch": 4.39370079, "global_step/max_steps": "3906/8890", "percentage": "43.94%", "elapsed_time": "1h 23m 28s", "remaining_time": "1h 46m 30s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779917} {"loss": 0.2254585, "grad_norm": 2.77012801, "learning_rate": 6.396e-05, "token_acc": 0.9406037, "epoch": 4.39482565, "global_step/max_steps": "3907/8890", "percentage": "43.95%", "elapsed_time": "1h 23m 29s", "remaining_time": "1h 46m 28s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779939} {"loss": 0.22164635, "grad_norm": 2.49636936, "learning_rate": 6.394e-05, "token_acc": 0.92803347, "epoch": 4.39595051, "global_step/max_steps": "3908/8890", "percentage": "43.96%", "elapsed_time": "1h 23m 30s", "remaining_time": "1h 46m 27s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779954} {"loss": 0.18328682, "grad_norm": 3.0847652, "learning_rate": 6.392e-05, "token_acc": 0.92831962, "epoch": 4.39707537, "global_step/max_steps": "3909/8890", "percentage": "43.97%", "elapsed_time": "1h 23m 31s", "remaining_time": "1h 46m 25s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.779991} {"loss": 0.20508072, "grad_norm": 2.95277739, "learning_rate": 6.39e-05, "token_acc": 0.93453725, "epoch": 4.39820022, "global_step/max_steps": "3910/8890", "percentage": "43.98%", "elapsed_time": "1h 23m 32s", "remaining_time": "1h 46m 24s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780029} {"loss": 0.17717628, "grad_norm": 2.83672976, "learning_rate": 6.389e-05, "token_acc": 0.94803371, "epoch": 4.39932508, "global_step/max_steps": "3911/8890", "percentage": "43.99%", "elapsed_time": "1h 23m 33s", "remaining_time": "1h 46m 22s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780054} {"loss": 0.19745672, "grad_norm": 2.84150696, "learning_rate": 6.387e-05, "token_acc": 0.93619792, "epoch": 4.40044994, "global_step/max_steps": "3912/8890", "percentage": "44.00%", "elapsed_time": "1h 23m 34s", "remaining_time": "1h 46m 21s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780089} {"loss": 0.1888265, "grad_norm": 2.6562047, "learning_rate": 6.385e-05, "token_acc": 0.93415638, "epoch": 4.4015748, "global_step/max_steps": "3913/8890", "percentage": "44.02%", "elapsed_time": "1h 23m 35s", "remaining_time": "1h 46m 19s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780108} {"loss": 0.19319499, "grad_norm": 2.75411916, "learning_rate": 6.383e-05, "token_acc": 0.93589744, "epoch": 4.40269966, "global_step/max_steps": "3914/8890", "percentage": "44.03%", "elapsed_time": "1h 23m 37s", "remaining_time": "1h 46m 18s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.78012} {"loss": 0.19043915, "grad_norm": 2.67777205, "learning_rate": 6.381e-05, "token_acc": 0.94040524, "epoch": 4.40382452, "global_step/max_steps": "3915/8890", "percentage": "44.04%", "elapsed_time": "1h 23m 38s", "remaining_time": "1h 46m 17s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780141} {"loss": 0.15197399, "grad_norm": 2.4343853, "learning_rate": 6.38e-05, "token_acc": 0.95016611, "epoch": 4.40494938, "global_step/max_steps": "3916/8890", "percentage": "44.05%", "elapsed_time": "1h 23m 39s", "remaining_time": "1h 46m 15s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780179} {"loss": 0.15606797, "grad_norm": 2.68151283, "learning_rate": 6.378e-05, "token_acc": 0.95328467, "epoch": 4.40607424, "global_step/max_steps": "3917/8890", "percentage": "44.06%", "elapsed_time": "1h 23m 40s", "remaining_time": "1h 46m 13s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780269} {"loss": 0.22847237, "grad_norm": 2.90820336, "learning_rate": 6.376e-05, "token_acc": 0.92657005, "epoch": 4.4071991, "global_step/max_steps": "3918/8890", "percentage": "44.07%", "elapsed_time": "1h 23m 41s", "remaining_time": "1h 46m 12s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780278} {"loss": 0.1639618, "grad_norm": 2.16195822, "learning_rate": 6.374e-05, "token_acc": 0.94891945, "epoch": 4.40832396, "global_step/max_steps": "3919/8890", "percentage": "44.08%", "elapsed_time": "1h 23m 42s", "remaining_time": "1h 46m 10s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780274} {"loss": 0.20395875, "grad_norm": 2.65860009, "learning_rate": 6.373e-05, "token_acc": 0.93706294, "epoch": 4.40944882, "global_step/max_steps": "3920/8890", "percentage": "44.09%", "elapsed_time": "1h 23m 43s", "remaining_time": "1h 46m 9s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780302} {"loss": 0.20800731, "grad_norm": 2.70592284, "learning_rate": 6.371e-05, "token_acc": 0.94467497, "epoch": 4.41057368, "global_step/max_steps": "3921/8890", "percentage": "44.11%", "elapsed_time": "1h 23m 44s", "remaining_time": "1h 46m 7s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780322} {"loss": 0.14530233, "grad_norm": 2.46632576, "learning_rate": 6.369e-05, "token_acc": 0.95130641, "epoch": 4.41169854, "global_step/max_steps": "3922/8890", "percentage": "44.12%", "elapsed_time": "1h 23m 45s", "remaining_time": "1h 46m 6s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780347} {"loss": 0.15462807, "grad_norm": 2.17653203, "learning_rate": 6.367e-05, "token_acc": 0.95716034, "epoch": 4.4128234, "global_step/max_steps": "3923/8890", "percentage": "44.13%", "elapsed_time": "1h 23m 47s", "remaining_time": "1h 46m 4s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780367} {"loss": 0.1380251, "grad_norm": 2.59221148, "learning_rate": 6.365e-05, "token_acc": 0.95342466, "epoch": 4.41394826, "global_step/max_steps": "3924/8890", "percentage": "44.14%", "elapsed_time": "1h 23m 48s", "remaining_time": "1h 46m 3s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780387} {"loss": 0.18936506, "grad_norm": 2.31895232, "learning_rate": 6.364e-05, "token_acc": 0.93462247, "epoch": 4.41507312, "global_step/max_steps": "3925/8890", "percentage": "44.15%", "elapsed_time": "1h 23m 49s", "remaining_time": "1h 46m 2s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780369} {"loss": 0.17825297, "grad_norm": 2.84231305, "learning_rate": 6.362e-05, "token_acc": 0.94075829, "epoch": 4.41619798, "global_step/max_steps": "3926/8890", "percentage": "44.16%", "elapsed_time": "1h 23m 50s", "remaining_time": "1h 46m 0s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780386} {"loss": 0.26855272, "grad_norm": 3.02859068, "learning_rate": 6.36e-05, "token_acc": 0.91692308, "epoch": 4.41732283, "global_step/max_steps": "3927/8890", "percentage": "44.17%", "elapsed_time": "1h 23m 52s", "remaining_time": "1h 45m 59s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780385} {"loss": 0.16696124, "grad_norm": 2.86049843, "learning_rate": 6.358e-05, "token_acc": 0.93532338, "epoch": 4.41844769, "global_step/max_steps": "3928/8890", "percentage": "44.18%", "elapsed_time": "1h 23m 53s", "remaining_time": "1h 45m 58s", "memory(GiB)": 23.1, "train_speed(iter/s)": 0.780424}