{ "best_global_step": 1500, "best_metric": 0.88732374, "best_model_checkpoint": "/workspace/Audio-Agent/train/checkpoints/audio_verify_lr1e-4_ep10_20251231_064922/v0-20251231-064941/checkpoint-1500", "epoch": 3.712035995500562, "eval_steps": 300, "global_step": 3300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0011248593925759281, "grad_norm": 1.3470062017440796, "learning_rate": 2.2471910112359554e-07, "loss": 1.4515883922576904, "step": 1, "token_acc": 0.660377358490566 }, { "epoch": 0.0022497187851518562, "grad_norm": 1.0744874477386475, "learning_rate": 4.494382022471911e-07, "loss": 1.7230136394500732, "step": 2, "token_acc": 0.5858050847457628 }, { "epoch": 0.003374578177727784, "grad_norm": 1.2829084396362305, "learning_rate": 6.741573033707866e-07, "loss": 1.585991621017456, "step": 3, "token_acc": 0.6180257510729614 }, { "epoch": 0.0044994375703037125, "grad_norm": 1.0637013912200928, "learning_rate": 8.988764044943822e-07, "loss": 1.4914261102676392, "step": 4, "token_acc": 0.6329113924050633 }, { "epoch": 0.00562429696287964, "grad_norm": 1.0405091047286987, "learning_rate": 1.1235955056179775e-06, "loss": 1.2105140686035156, "step": 5, "token_acc": 0.6929411764705883 }, { "epoch": 0.006749156355455568, "grad_norm": 1.1803241968154907, "learning_rate": 1.3483146067415732e-06, "loss": 1.3100998401641846, "step": 6, "token_acc": 0.6727037516170763 }, { "epoch": 0.007874015748031496, "grad_norm": 1.1456297636032104, "learning_rate": 1.5730337078651688e-06, "loss": 1.4409693479537964, "step": 7, "token_acc": 0.6466591166477916 }, { "epoch": 0.008998875140607425, "grad_norm": 1.3265265226364136, "learning_rate": 1.7977528089887643e-06, "loss": 1.4683387279510498, "step": 8, "token_acc": 0.6706408345752608 }, { "epoch": 0.010123734533183352, "grad_norm": 1.1536260843276978, "learning_rate": 2.0224719101123594e-06, "loss": 1.3861145973205566, "step": 9, "token_acc": 0.6600496277915633 }, { "epoch": 0.01124859392575928, "grad_norm": 1.0403997898101807, "learning_rate": 2.247191011235955e-06, "loss": 1.4533417224884033, "step": 10, "token_acc": 0.6336166194523135 }, { "epoch": 0.012373453318335208, "grad_norm": 1.214721918106079, "learning_rate": 2.4719101123595505e-06, "loss": 1.8591206073760986, "step": 11, "token_acc": 0.5719257540603249 }, { "epoch": 0.013498312710911136, "grad_norm": 1.3084474802017212, "learning_rate": 2.6966292134831465e-06, "loss": 1.4078933000564575, "step": 12, "token_acc": 0.6354466858789626 }, { "epoch": 0.014623172103487065, "grad_norm": 1.013925552368164, "learning_rate": 2.921348314606742e-06, "loss": 1.7323365211486816, "step": 13, "token_acc": 0.5812310797174571 }, { "epoch": 0.015748031496062992, "grad_norm": 1.0962743759155273, "learning_rate": 3.1460674157303375e-06, "loss": 1.8560429811477661, "step": 14, "token_acc": 0.5661538461538461 }, { "epoch": 0.01687289088863892, "grad_norm": 1.2141329050064087, "learning_rate": 3.3707865168539327e-06, "loss": 1.8151319026947021, "step": 15, "token_acc": 0.5662042875157629 }, { "epoch": 0.01799775028121485, "grad_norm": 1.034948706626892, "learning_rate": 3.5955056179775286e-06, "loss": 1.7918034791946411, "step": 16, "token_acc": 0.5778175313059034 }, { "epoch": 0.019122609673790775, "grad_norm": 1.205761194229126, "learning_rate": 3.820224719101124e-06, "loss": 1.5968117713928223, "step": 17, "token_acc": 0.6270543615676359 }, { "epoch": 0.020247469066366704, "grad_norm": 1.4006555080413818, "learning_rate": 4.044943820224719e-06, "loss": 1.4657845497131348, "step": 18, "token_acc": 0.6362275449101796 }, { "epoch": 0.021372328458942633, "grad_norm": 0.9612377882003784, "learning_rate": 4.269662921348315e-06, "loss": 1.4240102767944336, "step": 19, "token_acc": 0.6454033771106942 }, { "epoch": 0.02249718785151856, "grad_norm": 1.3567054271697998, "learning_rate": 4.49438202247191e-06, "loss": 1.6458466053009033, "step": 20, "token_acc": 0.5810473815461347 }, { "epoch": 0.023622047244094488, "grad_norm": 1.2446835041046143, "learning_rate": 4.719101123595506e-06, "loss": 1.630324363708496, "step": 21, "token_acc": 0.5908045977011495 }, { "epoch": 0.024746906636670417, "grad_norm": 1.08684241771698, "learning_rate": 4.943820224719101e-06, "loss": 1.5199756622314453, "step": 22, "token_acc": 0.6175609756097561 }, { "epoch": 0.025871766029246346, "grad_norm": 1.20151686668396, "learning_rate": 5.168539325842697e-06, "loss": 1.5218403339385986, "step": 23, "token_acc": 0.6300813008130082 }, { "epoch": 0.02699662542182227, "grad_norm": 1.2422466278076172, "learning_rate": 5.393258426966293e-06, "loss": 1.7252498865127563, "step": 24, "token_acc": 0.5941176470588235 }, { "epoch": 0.0281214848143982, "grad_norm": 1.0478876829147339, "learning_rate": 5.617977528089888e-06, "loss": 1.3684557676315308, "step": 25, "token_acc": 0.6493055555555556 }, { "epoch": 0.02924634420697413, "grad_norm": 1.2565279006958008, "learning_rate": 5.842696629213484e-06, "loss": 1.7030420303344727, "step": 26, "token_acc": 0.5712643678160919 }, { "epoch": 0.030371203599550055, "grad_norm": 1.2267240285873413, "learning_rate": 6.067415730337079e-06, "loss": 1.5101313591003418, "step": 27, "token_acc": 0.6076833527357393 }, { "epoch": 0.031496062992125984, "grad_norm": 1.557116985321045, "learning_rate": 6.292134831460675e-06, "loss": 1.573581576347351, "step": 28, "token_acc": 0.5878084179970973 }, { "epoch": 0.03262092238470191, "grad_norm": 1.1201764345169067, "learning_rate": 6.51685393258427e-06, "loss": 1.607724905014038, "step": 29, "token_acc": 0.6097814776274714 }, { "epoch": 0.03374578177727784, "grad_norm": 1.314414620399475, "learning_rate": 6.741573033707865e-06, "loss": 1.5429556369781494, "step": 30, "token_acc": 0.5941845764854614 }, { "epoch": 0.03487064116985377, "grad_norm": 1.0729260444641113, "learning_rate": 6.96629213483146e-06, "loss": 1.53144371509552, "step": 31, "token_acc": 0.6041257367387033 }, { "epoch": 0.0359955005624297, "grad_norm": 1.2891017198562622, "learning_rate": 7.191011235955057e-06, "loss": 1.5921709537506104, "step": 32, "token_acc": 0.5945945945945946 }, { "epoch": 0.03712035995500562, "grad_norm": 1.6150786876678467, "learning_rate": 7.415730337078652e-06, "loss": 1.2785022258758545, "step": 33, "token_acc": 0.6637781629116117 }, { "epoch": 0.03824521934758155, "grad_norm": 1.2646970748901367, "learning_rate": 7.640449438202247e-06, "loss": 1.4817782640457153, "step": 34, "token_acc": 0.6160714285714286 }, { "epoch": 0.03937007874015748, "grad_norm": 1.3901053667068481, "learning_rate": 7.865168539325843e-06, "loss": 1.3237351179122925, "step": 35, "token_acc": 0.6415620641562064 }, { "epoch": 0.04049493813273341, "grad_norm": 1.3028812408447266, "learning_rate": 8.089887640449438e-06, "loss": 1.7080365419387817, "step": 36, "token_acc": 0.5869324473975637 }, { "epoch": 0.04161979752530934, "grad_norm": 1.353768229484558, "learning_rate": 8.314606741573035e-06, "loss": 1.3069013357162476, "step": 37, "token_acc": 0.6426174496644296 }, { "epoch": 0.04274465691788527, "grad_norm": 1.2163562774658203, "learning_rate": 8.53932584269663e-06, "loss": 1.5081114768981934, "step": 38, "token_acc": 0.6177558569667078 }, { "epoch": 0.043869516310461196, "grad_norm": 1.528554916381836, "learning_rate": 8.764044943820226e-06, "loss": 1.1947182416915894, "step": 39, "token_acc": 0.6569678407350689 }, { "epoch": 0.04499437570303712, "grad_norm": 1.1245859861373901, "learning_rate": 8.98876404494382e-06, "loss": 1.506086826324463, "step": 40, "token_acc": 0.6189473684210526 }, { "epoch": 0.04611923509561305, "grad_norm": 1.236761212348938, "learning_rate": 9.213483146067416e-06, "loss": 1.4641669988632202, "step": 41, "token_acc": 0.6288156288156288 }, { "epoch": 0.047244094488188976, "grad_norm": 1.0923482179641724, "learning_rate": 9.438202247191012e-06, "loss": 1.1512141227722168, "step": 42, "token_acc": 0.6916780354706685 }, { "epoch": 0.048368953880764905, "grad_norm": 1.0135164260864258, "learning_rate": 9.662921348314608e-06, "loss": 1.4230196475982666, "step": 43, "token_acc": 0.6210790464240903 }, { "epoch": 0.049493813273340834, "grad_norm": 1.01791512966156, "learning_rate": 9.887640449438202e-06, "loss": 1.3022429943084717, "step": 44, "token_acc": 0.6258064516129033 }, { "epoch": 0.05061867266591676, "grad_norm": 1.0668046474456787, "learning_rate": 1.0112359550561798e-05, "loss": 1.3496047258377075, "step": 45, "token_acc": 0.609642301710731 }, { "epoch": 0.05174353205849269, "grad_norm": 0.9278742074966431, "learning_rate": 1.0337078651685394e-05, "loss": 1.345524549484253, "step": 46, "token_acc": 0.6465005931198102 }, { "epoch": 0.052868391451068614, "grad_norm": 0.873041570186615, "learning_rate": 1.056179775280899e-05, "loss": 1.1755421161651611, "step": 47, "token_acc": 0.6793349168646081 }, { "epoch": 0.05399325084364454, "grad_norm": 0.8777185678482056, "learning_rate": 1.0786516853932586e-05, "loss": 1.4298007488250732, "step": 48, "token_acc": 0.6218097447795824 }, { "epoch": 0.05511811023622047, "grad_norm": 0.7685665488243103, "learning_rate": 1.101123595505618e-05, "loss": 1.4883549213409424, "step": 49, "token_acc": 0.6263577118030412 }, { "epoch": 0.0562429696287964, "grad_norm": 0.8328217267990112, "learning_rate": 1.1235955056179776e-05, "loss": 1.2712420225143433, "step": 50, "token_acc": 0.6729810568295115 }, { "epoch": 0.05736782902137233, "grad_norm": 0.8639029860496521, "learning_rate": 1.146067415730337e-05, "loss": 1.3554656505584717, "step": 51, "token_acc": 0.6535947712418301 }, { "epoch": 0.05849268841394826, "grad_norm": 0.9579947590827942, "learning_rate": 1.1685393258426968e-05, "loss": 1.584376573562622, "step": 52, "token_acc": 0.6009174311926605 }, { "epoch": 0.05961754780652419, "grad_norm": 0.8351174592971802, "learning_rate": 1.1910112359550562e-05, "loss": 1.0528104305267334, "step": 53, "token_acc": 0.717546362339515 }, { "epoch": 0.06074240719910011, "grad_norm": 0.9072550535202026, "learning_rate": 1.2134831460674158e-05, "loss": 1.4246162176132202, "step": 54, "token_acc": 0.6538461538461539 }, { "epoch": 0.06186726659167604, "grad_norm": 0.9756302833557129, "learning_rate": 1.2359550561797752e-05, "loss": 1.3396657705307007, "step": 55, "token_acc": 0.6724700761697497 }, { "epoch": 0.06299212598425197, "grad_norm": 1.0073281526565552, "learning_rate": 1.258426966292135e-05, "loss": 1.3235889673233032, "step": 56, "token_acc": 0.6330935251798561 }, { "epoch": 0.06411698537682789, "grad_norm": 1.0675331354141235, "learning_rate": 1.2808988764044943e-05, "loss": 1.3443609476089478, "step": 57, "token_acc": 0.6590604026845638 }, { "epoch": 0.06524184476940383, "grad_norm": 0.8426030278205872, "learning_rate": 1.303370786516854e-05, "loss": 1.3574962615966797, "step": 58, "token_acc": 0.6383881230116649 }, { "epoch": 0.06636670416197975, "grad_norm": 0.9359647631645203, "learning_rate": 1.3258426966292136e-05, "loss": 1.42192542552948, "step": 59, "token_acc": 0.6515986769570011 }, { "epoch": 0.06749156355455568, "grad_norm": 0.9200222492218018, "learning_rate": 1.348314606741573e-05, "loss": 1.0539551973342896, "step": 60, "token_acc": 0.7100840336134454 }, { "epoch": 0.0686164229471316, "grad_norm": 0.9137537479400635, "learning_rate": 1.3707865168539327e-05, "loss": 1.3222023248672485, "step": 61, "token_acc": 0.655421686746988 }, { "epoch": 0.06974128233970754, "grad_norm": 0.8464084267616272, "learning_rate": 1.393258426966292e-05, "loss": 1.366803765296936, "step": 62, "token_acc": 0.6375727348295926 }, { "epoch": 0.07086614173228346, "grad_norm": 0.7894864082336426, "learning_rate": 1.4157303370786518e-05, "loss": 1.2143940925598145, "step": 63, "token_acc": 0.6943396226415094 }, { "epoch": 0.0719910011248594, "grad_norm": 0.807745099067688, "learning_rate": 1.4382022471910114e-05, "loss": 1.2509465217590332, "step": 64, "token_acc": 0.6897506925207756 }, { "epoch": 0.07311586051743532, "grad_norm": 0.8646552562713623, "learning_rate": 1.4606741573033709e-05, "loss": 1.1909130811691284, "step": 65, "token_acc": 0.6817102137767221 }, { "epoch": 0.07424071991001124, "grad_norm": 0.9171661734580994, "learning_rate": 1.4831460674157305e-05, "loss": 1.2457125186920166, "step": 66, "token_acc": 0.6857142857142857 }, { "epoch": 0.07536557930258718, "grad_norm": 0.9627224802970886, "learning_rate": 1.5056179775280899e-05, "loss": 1.2644917964935303, "step": 67, "token_acc": 0.6632390745501285 }, { "epoch": 0.0764904386951631, "grad_norm": 0.8993395566940308, "learning_rate": 1.5280898876404495e-05, "loss": 1.173771619796753, "step": 68, "token_acc": 0.6857463524130191 }, { "epoch": 0.07761529808773904, "grad_norm": 0.8648054599761963, "learning_rate": 1.5505617977528093e-05, "loss": 1.0487421751022339, "step": 69, "token_acc": 0.7262210796915167 }, { "epoch": 0.07874015748031496, "grad_norm": 0.8609387874603271, "learning_rate": 1.5730337078651687e-05, "loss": 1.1142604351043701, "step": 70, "token_acc": 0.7005649717514124 }, { "epoch": 0.0798650168728909, "grad_norm": 0.7601842880249023, "learning_rate": 1.595505617977528e-05, "loss": 1.2593642473220825, "step": 71, "token_acc": 0.6753574432296047 }, { "epoch": 0.08098987626546682, "grad_norm": 0.9326526522636414, "learning_rate": 1.6179775280898875e-05, "loss": 1.3107202053070068, "step": 72, "token_acc": 0.6585648148148148 }, { "epoch": 0.08211473565804274, "grad_norm": 0.9307423830032349, "learning_rate": 1.6404494382022473e-05, "loss": 1.458076000213623, "step": 73, "token_acc": 0.6427718040621266 }, { "epoch": 0.08323959505061868, "grad_norm": 0.8788859248161316, "learning_rate": 1.662921348314607e-05, "loss": 1.1724023818969727, "step": 74, "token_acc": 0.6904761904761905 }, { "epoch": 0.0843644544431946, "grad_norm": 0.9591565728187561, "learning_rate": 1.6853932584269665e-05, "loss": 1.2950127124786377, "step": 75, "token_acc": 0.658351409978308 }, { "epoch": 0.08548931383577053, "grad_norm": 1.0698617696762085, "learning_rate": 1.707865168539326e-05, "loss": 1.2595783472061157, "step": 76, "token_acc": 0.655076495132128 }, { "epoch": 0.08661417322834646, "grad_norm": 1.0414600372314453, "learning_rate": 1.7303370786516853e-05, "loss": 1.3235305547714233, "step": 77, "token_acc": 0.6440944881889764 }, { "epoch": 0.08773903262092239, "grad_norm": 0.8756704926490784, "learning_rate": 1.752808988764045e-05, "loss": 1.1890376806259155, "step": 78, "token_acc": 0.6693227091633466 }, { "epoch": 0.08886389201349831, "grad_norm": 0.9239323139190674, "learning_rate": 1.7752808988764045e-05, "loss": 0.9975174069404602, "step": 79, "token_acc": 0.6989409984871406 }, { "epoch": 0.08998875140607424, "grad_norm": 0.8732433915138245, "learning_rate": 1.797752808988764e-05, "loss": 1.3525408506393433, "step": 80, "token_acc": 0.6482142857142857 }, { "epoch": 0.09111361079865017, "grad_norm": 0.7412431240081787, "learning_rate": 1.8202247191011237e-05, "loss": 0.9761897921562195, "step": 81, "token_acc": 0.7436974789915967 }, { "epoch": 0.0922384701912261, "grad_norm": 0.8692667484283447, "learning_rate": 1.842696629213483e-05, "loss": 1.3313086032867432, "step": 82, "token_acc": 0.6524008350730689 }, { "epoch": 0.09336332958380203, "grad_norm": 0.866227388381958, "learning_rate": 1.865168539325843e-05, "loss": 1.1848812103271484, "step": 83, "token_acc": 0.682233502538071 }, { "epoch": 0.09448818897637795, "grad_norm": 0.873860239982605, "learning_rate": 1.8876404494382024e-05, "loss": 1.3195668458938599, "step": 84, "token_acc": 0.648506151142355 }, { "epoch": 0.09561304836895389, "grad_norm": 0.9415932297706604, "learning_rate": 1.9101123595505618e-05, "loss": 1.0525541305541992, "step": 85, "token_acc": 0.7005988023952096 }, { "epoch": 0.09673790776152981, "grad_norm": 0.8630489706993103, "learning_rate": 1.9325842696629215e-05, "loss": 1.1755139827728271, "step": 86, "token_acc": 0.6885572139303483 }, { "epoch": 0.09786276715410573, "grad_norm": 0.9275174140930176, "learning_rate": 1.955056179775281e-05, "loss": 1.2663335800170898, "step": 87, "token_acc": 0.6625971143174251 }, { "epoch": 0.09898762654668167, "grad_norm": 0.8576812148094177, "learning_rate": 1.9775280898876404e-05, "loss": 0.9503990411758423, "step": 88, "token_acc": 0.7406060606060606 }, { "epoch": 0.10011248593925759, "grad_norm": 0.9007286429405212, "learning_rate": 2e-05, "loss": 1.341196060180664, "step": 89, "token_acc": 0.6458699472759226 }, { "epoch": 0.10123734533183353, "grad_norm": 0.8723718523979187, "learning_rate": 2.0224719101123596e-05, "loss": 1.1962220668792725, "step": 90, "token_acc": 0.6802083333333333 }, { "epoch": 0.10236220472440945, "grad_norm": 0.8859781622886658, "learning_rate": 2.0449438202247194e-05, "loss": 1.10768461227417, "step": 91, "token_acc": 0.6877113866967306 }, { "epoch": 0.10348706411698538, "grad_norm": 0.8425499796867371, "learning_rate": 2.0674157303370788e-05, "loss": 1.2387433052062988, "step": 92, "token_acc": 0.6778523489932886 }, { "epoch": 0.1046119235095613, "grad_norm": 1.2268575429916382, "learning_rate": 2.0898876404494382e-05, "loss": 1.2949299812316895, "step": 93, "token_acc": 0.6575144508670521 }, { "epoch": 0.10573678290213723, "grad_norm": 1.0701266527175903, "learning_rate": 2.112359550561798e-05, "loss": 1.0500776767730713, "step": 94, "token_acc": 0.7061469265367316 }, { "epoch": 0.10686164229471316, "grad_norm": 1.0066039562225342, "learning_rate": 2.1348314606741574e-05, "loss": 1.3312232494354248, "step": 95, "token_acc": 0.6386651323360184 }, { "epoch": 0.10798650168728909, "grad_norm": 1.0397088527679443, "learning_rate": 2.157303370786517e-05, "loss": 1.1703020334243774, "step": 96, "token_acc": 0.6789473684210526 }, { "epoch": 0.10911136107986502, "grad_norm": 0.9855154752731323, "learning_rate": 2.1797752808988763e-05, "loss": 1.2575390338897705, "step": 97, "token_acc": 0.6602357984994641 }, { "epoch": 0.11023622047244094, "grad_norm": 1.0435550212860107, "learning_rate": 2.202247191011236e-05, "loss": 1.2061662673950195, "step": 98, "token_acc": 0.6706875753920386 }, { "epoch": 0.11136107986501688, "grad_norm": 1.095645546913147, "learning_rate": 2.2247191011235958e-05, "loss": 1.1129781007766724, "step": 99, "token_acc": 0.6853055916775033 }, { "epoch": 0.1124859392575928, "grad_norm": 0.9641037583351135, "learning_rate": 2.2471910112359552e-05, "loss": 1.1178693771362305, "step": 100, "token_acc": 0.6949317738791423 }, { "epoch": 0.11361079865016872, "grad_norm": 1.1352159976959229, "learning_rate": 2.2696629213483146e-05, "loss": 1.1637626886367798, "step": 101, "token_acc": 0.6752767527675276 }, { "epoch": 0.11473565804274466, "grad_norm": 0.9879117012023926, "learning_rate": 2.292134831460674e-05, "loss": 1.3453770875930786, "step": 102, "token_acc": 0.6631878557874763 }, { "epoch": 0.11586051743532058, "grad_norm": 1.0432580709457397, "learning_rate": 2.314606741573034e-05, "loss": 1.141789197921753, "step": 103, "token_acc": 0.6924778761061947 }, { "epoch": 0.11698537682789652, "grad_norm": 1.1465791463851929, "learning_rate": 2.3370786516853936e-05, "loss": 1.2137408256530762, "step": 104, "token_acc": 0.689703808180536 }, { "epoch": 0.11811023622047244, "grad_norm": 1.1648608446121216, "learning_rate": 2.359550561797753e-05, "loss": 1.185311198234558, "step": 105, "token_acc": 0.6949458483754513 }, { "epoch": 0.11923509561304838, "grad_norm": 1.1294857263565063, "learning_rate": 2.3820224719101125e-05, "loss": 1.3137633800506592, "step": 106, "token_acc": 0.6497524752475248 }, { "epoch": 0.1203599550056243, "grad_norm": 1.152543544769287, "learning_rate": 2.404494382022472e-05, "loss": 1.15995192527771, "step": 107, "token_acc": 0.6845714285714286 }, { "epoch": 0.12148481439820022, "grad_norm": 1.0809537172317505, "learning_rate": 2.4269662921348316e-05, "loss": 1.1918110847473145, "step": 108, "token_acc": 0.6875699888017918 }, { "epoch": 0.12260967379077616, "grad_norm": 1.2295557260513306, "learning_rate": 2.449438202247191e-05, "loss": 1.0209273099899292, "step": 109, "token_acc": 0.7163958641063516 }, { "epoch": 0.12373453318335208, "grad_norm": 1.1573926210403442, "learning_rate": 2.4719101123595505e-05, "loss": 0.9357096552848816, "step": 110, "token_acc": 0.743046357615894 }, { "epoch": 0.12485939257592801, "grad_norm": 1.0812256336212158, "learning_rate": 2.4943820224719103e-05, "loss": 1.1519246101379395, "step": 111, "token_acc": 0.6920322291853178 }, { "epoch": 0.12598425196850394, "grad_norm": 1.1762783527374268, "learning_rate": 2.51685393258427e-05, "loss": 1.0361957550048828, "step": 112, "token_acc": 0.7275204359673024 }, { "epoch": 0.12710911136107986, "grad_norm": 0.9341350197792053, "learning_rate": 2.5393258426966295e-05, "loss": 0.973387598991394, "step": 113, "token_acc": 0.7045235803657363 }, { "epoch": 0.12823397075365578, "grad_norm": 1.1407618522644043, "learning_rate": 2.5617977528089885e-05, "loss": 0.9553889036178589, "step": 114, "token_acc": 0.7139107611548556 }, { "epoch": 0.12935883014623173, "grad_norm": 1.1805106401443481, "learning_rate": 2.5842696629213486e-05, "loss": 1.1164418458938599, "step": 115, "token_acc": 0.699228791773779 }, { "epoch": 0.13048368953880765, "grad_norm": 1.0841456651687622, "learning_rate": 2.606741573033708e-05, "loss": 1.3218328952789307, "step": 116, "token_acc": 0.6614535418583257 }, { "epoch": 0.13160854893138357, "grad_norm": 1.2005935907363892, "learning_rate": 2.6292134831460675e-05, "loss": 1.1711252927780151, "step": 117, "token_acc": 0.6847172081829122 }, { "epoch": 0.1327334083239595, "grad_norm": 1.2443832159042358, "learning_rate": 2.6516853932584273e-05, "loss": 1.0283989906311035, "step": 118, "token_acc": 0.6900726392251816 }, { "epoch": 0.13385826771653545, "grad_norm": 1.1818649768829346, "learning_rate": 2.6741573033707867e-05, "loss": 1.0905885696411133, "step": 119, "token_acc": 0.6885026737967914 }, { "epoch": 0.13498312710911137, "grad_norm": 1.0101176500320435, "learning_rate": 2.696629213483146e-05, "loss": 0.9934240579605103, "step": 120, "token_acc": 0.7314629258517034 }, { "epoch": 0.1361079865016873, "grad_norm": 1.0105328559875488, "learning_rate": 2.7191011235955055e-05, "loss": 1.0867502689361572, "step": 121, "token_acc": 0.7080689029918404 }, { "epoch": 0.1372328458942632, "grad_norm": 1.234389066696167, "learning_rate": 2.7415730337078653e-05, "loss": 1.3222153186798096, "step": 122, "token_acc": 0.6514423076923077 }, { "epoch": 0.13835770528683913, "grad_norm": 0.9625383615493774, "learning_rate": 2.7640449438202247e-05, "loss": 0.9026889801025391, "step": 123, "token_acc": 0.7589545014520813 }, { "epoch": 0.13948256467941508, "grad_norm": 1.3555859327316284, "learning_rate": 2.786516853932584e-05, "loss": 1.2767902612686157, "step": 124, "token_acc": 0.654292343387471 }, { "epoch": 0.140607424071991, "grad_norm": 1.0983104705810547, "learning_rate": 2.8089887640449443e-05, "loss": 1.0246994495391846, "step": 125, "token_acc": 0.7241379310344828 }, { "epoch": 0.14173228346456693, "grad_norm": 1.4023245573043823, "learning_rate": 2.8314606741573037e-05, "loss": 1.1399857997894287, "step": 126, "token_acc": 0.6701434159061278 }, { "epoch": 0.14285714285714285, "grad_norm": 1.1901196241378784, "learning_rate": 2.8539325842696628e-05, "loss": 1.194018840789795, "step": 127, "token_acc": 0.6815703380588877 }, { "epoch": 0.1439820022497188, "grad_norm": 1.333336353302002, "learning_rate": 2.876404494382023e-05, "loss": 1.0130341053009033, "step": 128, "token_acc": 0.681877444589309 }, { "epoch": 0.14510686164229472, "grad_norm": 1.2532967329025269, "learning_rate": 2.8988764044943823e-05, "loss": 1.0948811769485474, "step": 129, "token_acc": 0.6923076923076923 }, { "epoch": 0.14623172103487064, "grad_norm": 1.1796437501907349, "learning_rate": 2.9213483146067417e-05, "loss": 0.9313086271286011, "step": 130, "token_acc": 0.732484076433121 }, { "epoch": 0.14735658042744657, "grad_norm": 1.1279302835464478, "learning_rate": 2.9438202247191012e-05, "loss": 0.994928777217865, "step": 131, "token_acc": 0.7309417040358744 }, { "epoch": 0.1484814398200225, "grad_norm": 1.2470624446868896, "learning_rate": 2.966292134831461e-05, "loss": 0.9514794945716858, "step": 132, "token_acc": 0.7225063938618926 }, { "epoch": 0.14960629921259844, "grad_norm": 1.2036556005477905, "learning_rate": 2.9887640449438204e-05, "loss": 1.0783672332763672, "step": 133, "token_acc": 0.7064220183486238 }, { "epoch": 0.15073115860517436, "grad_norm": 1.1745200157165527, "learning_rate": 3.0112359550561798e-05, "loss": 1.0109422206878662, "step": 134, "token_acc": 0.7108571428571429 }, { "epoch": 0.15185601799775028, "grad_norm": 1.3174582719802856, "learning_rate": 3.0337078651685396e-05, "loss": 0.9573544263839722, "step": 135, "token_acc": 0.7203007518796992 }, { "epoch": 0.1529808773903262, "grad_norm": 1.4643034934997559, "learning_rate": 3.056179775280899e-05, "loss": 1.0923986434936523, "step": 136, "token_acc": 0.7106325706594886 }, { "epoch": 0.15410573678290213, "grad_norm": 1.0450596809387207, "learning_rate": 3.0786516853932584e-05, "loss": 1.083590030670166, "step": 137, "token_acc": 0.6999168744804655 }, { "epoch": 0.15523059617547807, "grad_norm": 1.2416679859161377, "learning_rate": 3.1011235955056185e-05, "loss": 1.036879301071167, "step": 138, "token_acc": 0.7113062568605928 }, { "epoch": 0.156355455568054, "grad_norm": 1.2384499311447144, "learning_rate": 3.123595505617978e-05, "loss": 1.2078543901443481, "step": 139, "token_acc": 0.6785714285714286 }, { "epoch": 0.15748031496062992, "grad_norm": 1.2196382284164429, "learning_rate": 3.1460674157303374e-05, "loss": 1.2164185047149658, "step": 140, "token_acc": 0.6873126873126874 }, { "epoch": 0.15860517435320584, "grad_norm": 1.4095600843429565, "learning_rate": 3.168539325842697e-05, "loss": 1.1642543077468872, "step": 141, "token_acc": 0.6783691959229898 }, { "epoch": 0.1597300337457818, "grad_norm": 1.517809271812439, "learning_rate": 3.191011235955056e-05, "loss": 0.968543291091919, "step": 142, "token_acc": 0.719562243502052 }, { "epoch": 0.1608548931383577, "grad_norm": 1.383565068244934, "learning_rate": 3.2134831460674156e-05, "loss": 1.1296303272247314, "step": 143, "token_acc": 0.6738305941845765 }, { "epoch": 0.16197975253093364, "grad_norm": 1.1711764335632324, "learning_rate": 3.235955056179775e-05, "loss": 1.1616754531860352, "step": 144, "token_acc": 0.6960072595281307 }, { "epoch": 0.16310461192350956, "grad_norm": 1.796638011932373, "learning_rate": 3.258426966292135e-05, "loss": 1.177704930305481, "step": 145, "token_acc": 0.6975308641975309 }, { "epoch": 0.16422947131608548, "grad_norm": 1.3379077911376953, "learning_rate": 3.2808988764044946e-05, "loss": 1.1560606956481934, "step": 146, "token_acc": 0.6720183486238532 }, { "epoch": 0.16535433070866143, "grad_norm": 1.4044950008392334, "learning_rate": 3.303370786516854e-05, "loss": 1.1479580402374268, "step": 147, "token_acc": 0.6802800466744457 }, { "epoch": 0.16647919010123735, "grad_norm": 1.0599268674850464, "learning_rate": 3.325842696629214e-05, "loss": 1.042324185371399, "step": 148, "token_acc": 0.7101328903654485 }, { "epoch": 0.16760404949381327, "grad_norm": 1.2861645221710205, "learning_rate": 3.3483146067415736e-05, "loss": 1.202694296836853, "step": 149, "token_acc": 0.6694599627560521 }, { "epoch": 0.1687289088863892, "grad_norm": 1.7750128507614136, "learning_rate": 3.370786516853933e-05, "loss": 0.9196437001228333, "step": 150, "token_acc": 0.7142857142857143 }, { "epoch": 0.16985376827896512, "grad_norm": 1.460810899734497, "learning_rate": 3.393258426966292e-05, "loss": 1.1052911281585693, "step": 151, "token_acc": 0.6988265971316818 }, { "epoch": 0.17097862767154107, "grad_norm": 1.6421442031860352, "learning_rate": 3.415730337078652e-05, "loss": 1.196292519569397, "step": 152, "token_acc": 0.6590584878744651 }, { "epoch": 0.172103487064117, "grad_norm": 1.2288559675216675, "learning_rate": 3.438202247191011e-05, "loss": 1.0444562435150146, "step": 153, "token_acc": 0.7091988130563798 }, { "epoch": 0.1732283464566929, "grad_norm": 1.326352596282959, "learning_rate": 3.460674157303371e-05, "loss": 1.2127468585968018, "step": 154, "token_acc": 0.6641366223908919 }, { "epoch": 0.17435320584926883, "grad_norm": 1.2999740839004517, "learning_rate": 3.483146067415731e-05, "loss": 1.10992431640625, "step": 155, "token_acc": 0.6965226554267651 }, { "epoch": 0.17547806524184478, "grad_norm": 1.5829814672470093, "learning_rate": 3.50561797752809e-05, "loss": 1.0603529214859009, "step": 156, "token_acc": 0.6796116504854369 }, { "epoch": 0.1766029246344207, "grad_norm": 1.200981616973877, "learning_rate": 3.5280898876404497e-05, "loss": 0.7394691705703735, "step": 157, "token_acc": 0.7876312718786465 }, { "epoch": 0.17772778402699663, "grad_norm": 1.4524919986724854, "learning_rate": 3.550561797752809e-05, "loss": 1.0136172771453857, "step": 158, "token_acc": 0.7043478260869566 }, { "epoch": 0.17885264341957255, "grad_norm": 1.2919158935546875, "learning_rate": 3.5730337078651685e-05, "loss": 0.9664339423179626, "step": 159, "token_acc": 0.7358695652173913 }, { "epoch": 0.17997750281214847, "grad_norm": 1.561129093170166, "learning_rate": 3.595505617977528e-05, "loss": 1.0433273315429688, "step": 160, "token_acc": 0.6956521739130435 }, { "epoch": 0.18110236220472442, "grad_norm": 1.2990273237228394, "learning_rate": 3.6179775280898874e-05, "loss": 1.0112664699554443, "step": 161, "token_acc": 0.7276381909547739 }, { "epoch": 0.18222722159730034, "grad_norm": 1.222436547279358, "learning_rate": 3.6404494382022475e-05, "loss": 0.9978511333465576, "step": 162, "token_acc": 0.7262872628726287 }, { "epoch": 0.18335208098987626, "grad_norm": 1.4752572774887085, "learning_rate": 3.662921348314607e-05, "loss": 0.9457265138626099, "step": 163, "token_acc": 0.7262411347517731 }, { "epoch": 0.1844769403824522, "grad_norm": 1.5078872442245483, "learning_rate": 3.685393258426966e-05, "loss": 0.9952787160873413, "step": 164, "token_acc": 0.6931407942238267 }, { "epoch": 0.1856017997750281, "grad_norm": 1.4856443405151367, "learning_rate": 3.7078651685393264e-05, "loss": 0.9975625276565552, "step": 165, "token_acc": 0.7159235668789808 }, { "epoch": 0.18672665916760406, "grad_norm": 1.4090070724487305, "learning_rate": 3.730337078651686e-05, "loss": 1.0132325887680054, "step": 166, "token_acc": 0.7055616139585605 }, { "epoch": 0.18785151856017998, "grad_norm": 1.5479835271835327, "learning_rate": 3.752808988764045e-05, "loss": 1.0504162311553955, "step": 167, "token_acc": 0.7179818887451488 }, { "epoch": 0.1889763779527559, "grad_norm": 1.5861842632293701, "learning_rate": 3.775280898876405e-05, "loss": 0.8709264993667603, "step": 168, "token_acc": 0.7297297297297297 }, { "epoch": 0.19010123734533182, "grad_norm": 1.5409457683563232, "learning_rate": 3.797752808988764e-05, "loss": 0.9984395503997803, "step": 169, "token_acc": 0.6970128022759602 }, { "epoch": 0.19122609673790777, "grad_norm": 1.535334587097168, "learning_rate": 3.8202247191011236e-05, "loss": 1.0092041492462158, "step": 170, "token_acc": 0.7275031685678074 }, { "epoch": 0.1923509561304837, "grad_norm": 1.49639892578125, "learning_rate": 3.842696629213483e-05, "loss": 1.222269058227539, "step": 171, "token_acc": 0.6683621566632757 }, { "epoch": 0.19347581552305962, "grad_norm": 1.4776557683944702, "learning_rate": 3.865168539325843e-05, "loss": 0.9650889039039612, "step": 172, "token_acc": 0.7281553398058253 }, { "epoch": 0.19460067491563554, "grad_norm": 1.4936684370040894, "learning_rate": 3.8876404494382025e-05, "loss": 0.9272334575653076, "step": 173, "token_acc": 0.7233766233766233 }, { "epoch": 0.19572553430821146, "grad_norm": 1.3645389080047607, "learning_rate": 3.910112359550562e-05, "loss": 0.9794991612434387, "step": 174, "token_acc": 0.7165991902834008 }, { "epoch": 0.1968503937007874, "grad_norm": 1.4073173999786377, "learning_rate": 3.9325842696629214e-05, "loss": 1.0961869955062866, "step": 175, "token_acc": 0.7091078066914498 }, { "epoch": 0.19797525309336333, "grad_norm": 1.5859163999557495, "learning_rate": 3.955056179775281e-05, "loss": 1.0338952541351318, "step": 176, "token_acc": 0.7139272271016311 }, { "epoch": 0.19910011248593926, "grad_norm": 1.6331827640533447, "learning_rate": 3.97752808988764e-05, "loss": 1.0368452072143555, "step": 177, "token_acc": 0.6807980049875312 }, { "epoch": 0.20022497187851518, "grad_norm": 1.1969928741455078, "learning_rate": 4e-05, "loss": 0.9349050521850586, "step": 178, "token_acc": 0.7432306255835668 }, { "epoch": 0.2013498312710911, "grad_norm": 1.5990660190582275, "learning_rate": 4.02247191011236e-05, "loss": 0.9452978372573853, "step": 179, "token_acc": 0.7187910643889619 }, { "epoch": 0.20247469066366705, "grad_norm": 1.465158224105835, "learning_rate": 4.044943820224719e-05, "loss": 0.9628027677536011, "step": 180, "token_acc": 0.7404761904761905 }, { "epoch": 0.20359955005624297, "grad_norm": 1.4929207563400269, "learning_rate": 4.067415730337079e-05, "loss": 0.996096134185791, "step": 181, "token_acc": 0.7208791208791209 }, { "epoch": 0.2047244094488189, "grad_norm": 1.6284878253936768, "learning_rate": 4.089887640449439e-05, "loss": 1.1094181537628174, "step": 182, "token_acc": 0.7040816326530612 }, { "epoch": 0.20584926884139482, "grad_norm": 1.6403576135635376, "learning_rate": 4.112359550561798e-05, "loss": 0.9799474477767944, "step": 183, "token_acc": 0.7105263157894737 }, { "epoch": 0.20697412823397077, "grad_norm": 1.54055655002594, "learning_rate": 4.1348314606741576e-05, "loss": 1.1097341775894165, "step": 184, "token_acc": 0.6930022573363431 }, { "epoch": 0.2080989876265467, "grad_norm": 1.4876576662063599, "learning_rate": 4.157303370786517e-05, "loss": 1.1267961263656616, "step": 185, "token_acc": 0.7024185068349106 }, { "epoch": 0.2092238470191226, "grad_norm": 1.4596658945083618, "learning_rate": 4.1797752808988764e-05, "loss": 1.067931890487671, "step": 186, "token_acc": 0.70199370409234 }, { "epoch": 0.21034870641169853, "grad_norm": 1.3585259914398193, "learning_rate": 4.202247191011236e-05, "loss": 0.8209952116012573, "step": 187, "token_acc": 0.7585421412300684 }, { "epoch": 0.21147356580427445, "grad_norm": 1.8261454105377197, "learning_rate": 4.224719101123596e-05, "loss": 1.3683371543884277, "step": 188, "token_acc": 0.6275 }, { "epoch": 0.2125984251968504, "grad_norm": 1.5620837211608887, "learning_rate": 4.2471910112359554e-05, "loss": 0.8445907235145569, "step": 189, "token_acc": 0.7658402203856749 }, { "epoch": 0.21372328458942633, "grad_norm": 1.6832116842269897, "learning_rate": 4.269662921348315e-05, "loss": 0.8920148611068726, "step": 190, "token_acc": 0.7372159090909091 }, { "epoch": 0.21484814398200225, "grad_norm": 1.6320570707321167, "learning_rate": 4.292134831460675e-05, "loss": 0.8787733912467957, "step": 191, "token_acc": 0.7416201117318436 }, { "epoch": 0.21597300337457817, "grad_norm": 1.4882495403289795, "learning_rate": 4.314606741573034e-05, "loss": 1.1128981113433838, "step": 192, "token_acc": 0.7134778510838832 }, { "epoch": 0.2170978627671541, "grad_norm": 1.2778202295303345, "learning_rate": 4.337078651685393e-05, "loss": 0.8565435409545898, "step": 193, "token_acc": 0.7615571776155717 }, { "epoch": 0.21822272215973004, "grad_norm": 1.313296914100647, "learning_rate": 4.3595505617977525e-05, "loss": 0.8638637065887451, "step": 194, "token_acc": 0.7392156862745098 }, { "epoch": 0.21934758155230596, "grad_norm": 1.5724983215332031, "learning_rate": 4.3820224719101126e-05, "loss": 0.9607068300247192, "step": 195, "token_acc": 0.7100238663484487 }, { "epoch": 0.2204724409448819, "grad_norm": 1.5530091524124146, "learning_rate": 4.404494382022472e-05, "loss": 1.1073720455169678, "step": 196, "token_acc": 0.7025862068965517 }, { "epoch": 0.2215973003374578, "grad_norm": 1.4681856632232666, "learning_rate": 4.4269662921348315e-05, "loss": 0.9957531690597534, "step": 197, "token_acc": 0.7148080438756855 }, { "epoch": 0.22272215973003376, "grad_norm": 1.7927360534667969, "learning_rate": 4.4494382022471916e-05, "loss": 1.0535670518875122, "step": 198, "token_acc": 0.684863523573201 }, { "epoch": 0.22384701912260968, "grad_norm": 1.6400867700576782, "learning_rate": 4.471910112359551e-05, "loss": 0.9668567180633545, "step": 199, "token_acc": 0.7231404958677686 }, { "epoch": 0.2249718785151856, "grad_norm": 1.5202151536941528, "learning_rate": 4.4943820224719104e-05, "loss": 1.0982716083526611, "step": 200, "token_acc": 0.7091273821464393 }, { "epoch": 0.22609673790776152, "grad_norm": 2.0355868339538574, "learning_rate": 4.51685393258427e-05, "loss": 1.1191651821136475, "step": 201, "token_acc": 0.6730158730158731 }, { "epoch": 0.22722159730033745, "grad_norm": 1.6517218351364136, "learning_rate": 4.539325842696629e-05, "loss": 0.9790687561035156, "step": 202, "token_acc": 0.7148648648648649 }, { "epoch": 0.2283464566929134, "grad_norm": 1.5549157857894897, "learning_rate": 4.561797752808989e-05, "loss": 1.0393073558807373, "step": 203, "token_acc": 0.6968463886063072 }, { "epoch": 0.22947131608548932, "grad_norm": 1.7443811893463135, "learning_rate": 4.584269662921348e-05, "loss": 0.896649956703186, "step": 204, "token_acc": 0.7334152334152334 }, { "epoch": 0.23059617547806524, "grad_norm": 1.7286280393600464, "learning_rate": 4.606741573033708e-05, "loss": 1.0137832164764404, "step": 205, "token_acc": 0.7217165149544863 }, { "epoch": 0.23172103487064116, "grad_norm": 1.519325613975525, "learning_rate": 4.629213483146068e-05, "loss": 0.8844738006591797, "step": 206, "token_acc": 0.7472256473489519 }, { "epoch": 0.23284589426321708, "grad_norm": 1.8163905143737793, "learning_rate": 4.651685393258427e-05, "loss": 1.1151843070983887, "step": 207, "token_acc": 0.6818181818181818 }, { "epoch": 0.23397075365579303, "grad_norm": 1.4301449060440063, "learning_rate": 4.674157303370787e-05, "loss": 0.8535497188568115, "step": 208, "token_acc": 0.7511792452830188 }, { "epoch": 0.23509561304836896, "grad_norm": 1.641652226448059, "learning_rate": 4.6966292134831466e-05, "loss": 1.0581762790679932, "step": 209, "token_acc": 0.6932270916334662 }, { "epoch": 0.23622047244094488, "grad_norm": 1.5515364408493042, "learning_rate": 4.719101123595506e-05, "loss": 1.1171159744262695, "step": 210, "token_acc": 0.6984615384615385 }, { "epoch": 0.2373453318335208, "grad_norm": 1.3472788333892822, "learning_rate": 4.7415730337078655e-05, "loss": 0.9588379859924316, "step": 211, "token_acc": 0.7540842648323302 }, { "epoch": 0.23847019122609675, "grad_norm": 1.3716918230056763, "learning_rate": 4.764044943820225e-05, "loss": 0.8613770604133606, "step": 212, "token_acc": 0.744621141253508 }, { "epoch": 0.23959505061867267, "grad_norm": 1.3678066730499268, "learning_rate": 4.786516853932584e-05, "loss": 0.7591139078140259, "step": 213, "token_acc": 0.767580452920143 }, { "epoch": 0.2407199100112486, "grad_norm": 1.7037216424942017, "learning_rate": 4.808988764044944e-05, "loss": 1.1322269439697266, "step": 214, "token_acc": 0.6867469879518072 }, { "epoch": 0.24184476940382452, "grad_norm": 1.7985085248947144, "learning_rate": 4.831460674157304e-05, "loss": 0.9342286586761475, "step": 215, "token_acc": 0.7201946472019465 }, { "epoch": 0.24296962879640044, "grad_norm": 1.5648308992385864, "learning_rate": 4.853932584269663e-05, "loss": 1.059760570526123, "step": 216, "token_acc": 0.6858638743455497 }, { "epoch": 0.2440944881889764, "grad_norm": 1.6522356271743774, "learning_rate": 4.876404494382023e-05, "loss": 0.8647735118865967, "step": 217, "token_acc": 0.755688622754491 }, { "epoch": 0.2452193475815523, "grad_norm": 1.6507322788238525, "learning_rate": 4.898876404494382e-05, "loss": 0.9860743880271912, "step": 218, "token_acc": 0.7375145180023229 }, { "epoch": 0.24634420697412823, "grad_norm": 1.507715106010437, "learning_rate": 4.9213483146067416e-05, "loss": 1.020752191543579, "step": 219, "token_acc": 0.7306910569105691 }, { "epoch": 0.24746906636670415, "grad_norm": 1.5559320449829102, "learning_rate": 4.943820224719101e-05, "loss": 1.0118601322174072, "step": 220, "token_acc": 0.695303550973654 }, { "epoch": 0.24859392575928008, "grad_norm": 1.5786669254302979, "learning_rate": 4.966292134831461e-05, "loss": 0.8872907161712646, "step": 221, "token_acc": 0.75 }, { "epoch": 0.24971878515185603, "grad_norm": 1.7283838987350464, "learning_rate": 4.9887640449438205e-05, "loss": 1.0629130601882935, "step": 222, "token_acc": 0.6876379690949227 }, { "epoch": 0.2508436445444319, "grad_norm": 1.9204140901565552, "learning_rate": 5.0112359550561806e-05, "loss": 1.0288273096084595, "step": 223, "token_acc": 0.7167630057803468 }, { "epoch": 0.25196850393700787, "grad_norm": 1.7573752403259277, "learning_rate": 5.03370786516854e-05, "loss": 0.8554309606552124, "step": 224, "token_acc": 0.7343065693430657 }, { "epoch": 0.2530933633295838, "grad_norm": 2.0009403228759766, "learning_rate": 5.0561797752808995e-05, "loss": 1.0911678075790405, "step": 225, "token_acc": 0.6853932584269663 }, { "epoch": 0.2542182227221597, "grad_norm": 1.8616971969604492, "learning_rate": 5.078651685393259e-05, "loss": 1.1401443481445312, "step": 226, "token_acc": 0.7134767836919592 }, { "epoch": 0.25534308211473566, "grad_norm": 1.8860188722610474, "learning_rate": 5.101123595505618e-05, "loss": 0.9511814117431641, "step": 227, "token_acc": 0.7357910906298003 }, { "epoch": 0.25646794150731156, "grad_norm": 1.841572880744934, "learning_rate": 5.123595505617977e-05, "loss": 1.0661301612854004, "step": 228, "token_acc": 0.7149700598802395 }, { "epoch": 0.2575928008998875, "grad_norm": 1.5527392625808716, "learning_rate": 5.1460674157303365e-05, "loss": 1.2313668727874756, "step": 229, "token_acc": 0.6793002915451894 }, { "epoch": 0.25871766029246346, "grad_norm": 2.0452404022216797, "learning_rate": 5.168539325842697e-05, "loss": 0.9756563901901245, "step": 230, "token_acc": 0.7208271787296898 }, { "epoch": 0.25984251968503935, "grad_norm": 1.6925112009048462, "learning_rate": 5.191011235955057e-05, "loss": 1.1303825378417969, "step": 231, "token_acc": 0.6800825593395253 }, { "epoch": 0.2609673790776153, "grad_norm": 1.8258121013641357, "learning_rate": 5.213483146067416e-05, "loss": 1.0046674013137817, "step": 232, "token_acc": 0.7106842737094838 }, { "epoch": 0.26209223847019125, "grad_norm": 1.808469295501709, "learning_rate": 5.2359550561797756e-05, "loss": 0.9644273519515991, "step": 233, "token_acc": 0.7215686274509804 }, { "epoch": 0.26321709786276715, "grad_norm": 1.916642665863037, "learning_rate": 5.258426966292135e-05, "loss": 1.0017303228378296, "step": 234, "token_acc": 0.7277556440903055 }, { "epoch": 0.2643419572553431, "grad_norm": 1.6663633584976196, "learning_rate": 5.2808988764044944e-05, "loss": 1.059980869293213, "step": 235, "token_acc": 0.7119675456389453 }, { "epoch": 0.265466816647919, "grad_norm": 1.6864794492721558, "learning_rate": 5.3033707865168545e-05, "loss": 0.9881404638290405, "step": 236, "token_acc": 0.710412147505423 }, { "epoch": 0.26659167604049494, "grad_norm": 1.7018659114837646, "learning_rate": 5.325842696629214e-05, "loss": 0.8766814470291138, "step": 237, "token_acc": 0.7363344051446945 }, { "epoch": 0.2677165354330709, "grad_norm": 1.794450044631958, "learning_rate": 5.3483146067415734e-05, "loss": 1.0207915306091309, "step": 238, "token_acc": 0.7174193548387097 }, { "epoch": 0.2688413948256468, "grad_norm": 1.7520678043365479, "learning_rate": 5.370786516853933e-05, "loss": 1.032090187072754, "step": 239, "token_acc": 0.7212918660287081 }, { "epoch": 0.26996625421822273, "grad_norm": 1.4683079719543457, "learning_rate": 5.393258426966292e-05, "loss": 1.1560401916503906, "step": 240, "token_acc": 0.6895652173913044 }, { "epoch": 0.27109111361079863, "grad_norm": 1.622818946838379, "learning_rate": 5.415730337078652e-05, "loss": 0.8906047344207764, "step": 241, "token_acc": 0.7425373134328358 }, { "epoch": 0.2722159730033746, "grad_norm": 1.5881969928741455, "learning_rate": 5.438202247191011e-05, "loss": 0.8468738794326782, "step": 242, "token_acc": 0.7654320987654321 }, { "epoch": 0.27334083239595053, "grad_norm": 1.537872314453125, "learning_rate": 5.460674157303371e-05, "loss": 0.7266538143157959, "step": 243, "token_acc": 0.7805164319248826 }, { "epoch": 0.2744656917885264, "grad_norm": 1.63555908203125, "learning_rate": 5.4831460674157306e-05, "loss": 0.795522153377533, "step": 244, "token_acc": 0.7616707616707616 }, { "epoch": 0.2755905511811024, "grad_norm": 1.7763481140136719, "learning_rate": 5.50561797752809e-05, "loss": 0.9175059795379639, "step": 245, "token_acc": 0.7369696969696969 }, { "epoch": 0.27671541057367827, "grad_norm": 1.761465072631836, "learning_rate": 5.5280898876404495e-05, "loss": 0.9194624423980713, "step": 246, "token_acc": 0.762051282051282 }, { "epoch": 0.2778402699662542, "grad_norm": 1.7746719121932983, "learning_rate": 5.550561797752809e-05, "loss": 1.001786708831787, "step": 247, "token_acc": 0.7490247074122237 }, { "epoch": 0.27896512935883017, "grad_norm": 1.8339561223983765, "learning_rate": 5.573033707865168e-05, "loss": 1.036380410194397, "step": 248, "token_acc": 0.6951807228915663 }, { "epoch": 0.28008998875140606, "grad_norm": 1.681343674659729, "learning_rate": 5.595505617977528e-05, "loss": 1.0626839399337769, "step": 249, "token_acc": 0.7024704618689581 }, { "epoch": 0.281214848143982, "grad_norm": 1.6359878778457642, "learning_rate": 5.6179775280898885e-05, "loss": 0.9372226595878601, "step": 250, "token_acc": 0.7089371980676329 }, { "epoch": 0.2823397075365579, "grad_norm": 1.7748621702194214, "learning_rate": 5.640449438202248e-05, "loss": 1.0176749229431152, "step": 251, "token_acc": 0.7149643705463183 }, { "epoch": 0.28346456692913385, "grad_norm": 1.9758368730545044, "learning_rate": 5.6629213483146074e-05, "loss": 0.8915600776672363, "step": 252, "token_acc": 0.7470414201183432 }, { "epoch": 0.2845894263217098, "grad_norm": 1.4802197217941284, "learning_rate": 5.685393258426966e-05, "loss": 1.042349100112915, "step": 253, "token_acc": 0.7091412742382271 }, { "epoch": 0.2857142857142857, "grad_norm": 1.710125207901001, "learning_rate": 5.7078651685393256e-05, "loss": 1.0485045909881592, "step": 254, "token_acc": 0.6938144329896907 }, { "epoch": 0.28683914510686165, "grad_norm": 1.8449538946151733, "learning_rate": 5.730337078651685e-05, "loss": 1.0510667562484741, "step": 255, "token_acc": 0.7074340527577938 }, { "epoch": 0.2879640044994376, "grad_norm": 1.6770274639129639, "learning_rate": 5.752808988764046e-05, "loss": 1.0163381099700928, "step": 256, "token_acc": 0.7171717171717171 }, { "epoch": 0.2890888638920135, "grad_norm": 1.5033226013183594, "learning_rate": 5.775280898876405e-05, "loss": 1.120741605758667, "step": 257, "token_acc": 0.7128060263653484 }, { "epoch": 0.29021372328458944, "grad_norm": 1.6510515213012695, "learning_rate": 5.7977528089887646e-05, "loss": 0.8928357362747192, "step": 258, "token_acc": 0.7439759036144579 }, { "epoch": 0.29133858267716534, "grad_norm": 1.8823243379592896, "learning_rate": 5.820224719101124e-05, "loss": 1.0084930658340454, "step": 259, "token_acc": 0.7076326002587322 }, { "epoch": 0.2924634420697413, "grad_norm": 1.592637300491333, "learning_rate": 5.8426966292134835e-05, "loss": 0.937751829624176, "step": 260, "token_acc": 0.7403314917127072 }, { "epoch": 0.29358830146231724, "grad_norm": 1.9713650941848755, "learning_rate": 5.865168539325843e-05, "loss": 1.1369144916534424, "step": 261, "token_acc": 0.6893523600439078 }, { "epoch": 0.29471316085489313, "grad_norm": 1.780164122581482, "learning_rate": 5.8876404494382023e-05, "loss": 1.0930724143981934, "step": 262, "token_acc": 0.7052631578947368 }, { "epoch": 0.2958380202474691, "grad_norm": 2.009315252304077, "learning_rate": 5.9101123595505624e-05, "loss": 1.047447919845581, "step": 263, "token_acc": 0.7038123167155426 }, { "epoch": 0.296962879640045, "grad_norm": 1.770580768585205, "learning_rate": 5.932584269662922e-05, "loss": 1.1637544631958008, "step": 264, "token_acc": 0.6759259259259259 }, { "epoch": 0.2980877390326209, "grad_norm": 1.7873228788375854, "learning_rate": 5.955056179775281e-05, "loss": 0.9982095956802368, "step": 265, "token_acc": 0.7155172413793104 }, { "epoch": 0.2992125984251969, "grad_norm": 2.202678918838501, "learning_rate": 5.977528089887641e-05, "loss": 1.0060601234436035, "step": 266, "token_acc": 0.7179856115107913 }, { "epoch": 0.30033745781777277, "grad_norm": 1.9659627676010132, "learning_rate": 6e-05, "loss": 1.0722196102142334, "step": 267, "token_acc": 0.7055771725032426 }, { "epoch": 0.3014623172103487, "grad_norm": 2.012859344482422, "learning_rate": 6.0224719101123596e-05, "loss": 1.0905499458312988, "step": 268, "token_acc": 0.6884236453201971 }, { "epoch": 0.3025871766029246, "grad_norm": 1.8015167713165283, "learning_rate": 6.04494382022472e-05, "loss": 1.0018097162246704, "step": 269, "token_acc": 0.7231121281464531 }, { "epoch": 0.30371203599550056, "grad_norm": 1.6542035341262817, "learning_rate": 6.067415730337079e-05, "loss": 0.8605320453643799, "step": 270, "token_acc": 0.7417218543046358 }, { "epoch": 0.3048368953880765, "grad_norm": 1.8816765546798706, "learning_rate": 6.0898876404494385e-05, "loss": 1.0386288166046143, "step": 271, "token_acc": 0.6997742663656885 }, { "epoch": 0.3059617547806524, "grad_norm": 1.5924571752548218, "learning_rate": 6.112359550561798e-05, "loss": 1.0360441207885742, "step": 272, "token_acc": 0.7199297629499561 }, { "epoch": 0.30708661417322836, "grad_norm": 1.6721726655960083, "learning_rate": 6.134831460674157e-05, "loss": 0.951246976852417, "step": 273, "token_acc": 0.7331838565022422 }, { "epoch": 0.30821147356580425, "grad_norm": 1.808548092842102, "learning_rate": 6.157303370786517e-05, "loss": 0.9140626192092896, "step": 274, "token_acc": 0.73375 }, { "epoch": 0.3093363329583802, "grad_norm": 2.127815008163452, "learning_rate": 6.179775280898876e-05, "loss": 0.9632056355476379, "step": 275, "token_acc": 0.7345132743362832 }, { "epoch": 0.31046119235095615, "grad_norm": 2.049995183944702, "learning_rate": 6.202247191011237e-05, "loss": 0.9856493473052979, "step": 276, "token_acc": 0.7092511013215859 }, { "epoch": 0.31158605174353204, "grad_norm": 1.6235735416412354, "learning_rate": 6.224719101123596e-05, "loss": 1.1092243194580078, "step": 277, "token_acc": 0.6776937618147448 }, { "epoch": 0.312710911136108, "grad_norm": 1.5486937761306763, "learning_rate": 6.247191011235956e-05, "loss": 0.9321156740188599, "step": 278, "token_acc": 0.7408793264733395 }, { "epoch": 0.3138357705286839, "grad_norm": 2.319556951522827, "learning_rate": 6.269662921348315e-05, "loss": 0.9952074885368347, "step": 279, "token_acc": 0.6968174204355109 }, { "epoch": 0.31496062992125984, "grad_norm": 1.9556807279586792, "learning_rate": 6.292134831460675e-05, "loss": 1.0810304880142212, "step": 280, "token_acc": 0.6894803548795945 }, { "epoch": 0.3160854893138358, "grad_norm": 1.8779759407043457, "learning_rate": 6.314606741573034e-05, "loss": 1.013759970664978, "step": 281, "token_acc": 0.7102908277404921 }, { "epoch": 0.3172103487064117, "grad_norm": 1.6576989889144897, "learning_rate": 6.337078651685394e-05, "loss": 0.954249382019043, "step": 282, "token_acc": 0.7224975222993062 }, { "epoch": 0.31833520809898763, "grad_norm": 1.6991381645202637, "learning_rate": 6.359550561797753e-05, "loss": 1.022630214691162, "step": 283, "token_acc": 0.7176339285714286 }, { "epoch": 0.3194600674915636, "grad_norm": 2.097963809967041, "learning_rate": 6.382022471910112e-05, "loss": 1.0457804203033447, "step": 284, "token_acc": 0.6935483870967742 }, { "epoch": 0.3205849268841395, "grad_norm": 1.7630845308303833, "learning_rate": 6.404494382022472e-05, "loss": 1.0383262634277344, "step": 285, "token_acc": 0.7066666666666667 }, { "epoch": 0.3217097862767154, "grad_norm": 1.4628931283950806, "learning_rate": 6.426966292134831e-05, "loss": 1.145449161529541, "step": 286, "token_acc": 0.6795212765957447 }, { "epoch": 0.3228346456692913, "grad_norm": 1.8212125301361084, "learning_rate": 6.449438202247191e-05, "loss": 1.0778967142105103, "step": 287, "token_acc": 0.7066246056782335 }, { "epoch": 0.32395950506186727, "grad_norm": 1.551231026649475, "learning_rate": 6.47191011235955e-05, "loss": 0.824447751045227, "step": 288, "token_acc": 0.7572614107883817 }, { "epoch": 0.3250843644544432, "grad_norm": 1.8474342823028564, "learning_rate": 6.494382022471911e-05, "loss": 1.1201366186141968, "step": 289, "token_acc": 0.6835443037974683 }, { "epoch": 0.3262092238470191, "grad_norm": 1.7069413661956787, "learning_rate": 6.51685393258427e-05, "loss": 0.9277028441429138, "step": 290, "token_acc": 0.7473182359952324 }, { "epoch": 0.32733408323959506, "grad_norm": 1.885452389717102, "learning_rate": 6.53932584269663e-05, "loss": 1.1327459812164307, "step": 291, "token_acc": 0.7015590200445434 }, { "epoch": 0.32845894263217096, "grad_norm": 2.0194458961486816, "learning_rate": 6.561797752808989e-05, "loss": 0.9612037539482117, "step": 292, "token_acc": 0.710334788937409 }, { "epoch": 0.3295838020247469, "grad_norm": 1.5913290977478027, "learning_rate": 6.584269662921349e-05, "loss": 1.0670043230056763, "step": 293, "token_acc": 0.7152133580705009 }, { "epoch": 0.33070866141732286, "grad_norm": 1.9731364250183105, "learning_rate": 6.606741573033708e-05, "loss": 1.144604206085205, "step": 294, "token_acc": 0.6872682323856613 }, { "epoch": 0.33183352080989875, "grad_norm": 1.727447748184204, "learning_rate": 6.629213483146067e-05, "loss": 0.7939117550849915, "step": 295, "token_acc": 0.759235668789809 }, { "epoch": 0.3329583802024747, "grad_norm": 1.2714818716049194, "learning_rate": 6.651685393258428e-05, "loss": 0.946199893951416, "step": 296, "token_acc": 0.7303135888501742 }, { "epoch": 0.3340832395950506, "grad_norm": 2.0224123001098633, "learning_rate": 6.674157303370788e-05, "loss": 1.0812444686889648, "step": 297, "token_acc": 0.7071129707112971 }, { "epoch": 0.33520809898762655, "grad_norm": 1.717506766319275, "learning_rate": 6.696629213483147e-05, "loss": 0.9123365879058838, "step": 298, "token_acc": 0.7222222222222222 }, { "epoch": 0.3363329583802025, "grad_norm": 1.8696712255477905, "learning_rate": 6.719101123595507e-05, "loss": 1.019747018814087, "step": 299, "token_acc": 0.7309184993531694 }, { "epoch": 0.3374578177727784, "grad_norm": 1.8168563842773438, "learning_rate": 6.741573033707866e-05, "loss": 1.1613306999206543, "step": 300, "token_acc": 0.6941431670281996 }, { "epoch": 0.3374578177727784, "eval_loss": 0.9690737724304199, "eval_runtime": 32.013, "eval_samples_per_second": 25.084, "eval_steps_per_second": 3.155, "eval_token_acc": 0.7199280083033368, "step": 300 }, { "epoch": 0.33858267716535434, "grad_norm": 1.699651837348938, "learning_rate": 6.764044943820224e-05, "loss": 0.9115865230560303, "step": 301, "token_acc": 0.7372685185185185 }, { "epoch": 0.33970753655793023, "grad_norm": 1.656445026397705, "learning_rate": 6.786516853932583e-05, "loss": 0.8629164695739746, "step": 302, "token_acc": 0.7462311557788944 }, { "epoch": 0.3408323959505062, "grad_norm": 1.5174970626831055, "learning_rate": 6.808988764044944e-05, "loss": 1.112040400505066, "step": 303, "token_acc": 0.6707692307692308 }, { "epoch": 0.34195725534308213, "grad_norm": 1.8591654300689697, "learning_rate": 6.831460674157304e-05, "loss": 1.1517301797866821, "step": 304, "token_acc": 0.6832358674463938 }, { "epoch": 0.34308211473565803, "grad_norm": 1.8383805751800537, "learning_rate": 6.853932584269663e-05, "loss": 1.00364351272583, "step": 305, "token_acc": 0.7106854838709677 }, { "epoch": 0.344206974128234, "grad_norm": 1.8278290033340454, "learning_rate": 6.876404494382023e-05, "loss": 0.9950717091560364, "step": 306, "token_acc": 0.7236403995560489 }, { "epoch": 0.34533183352080987, "grad_norm": 1.6931618452072144, "learning_rate": 6.898876404494382e-05, "loss": 1.0169693231582642, "step": 307, "token_acc": 0.7147239263803681 }, { "epoch": 0.3464566929133858, "grad_norm": 1.7178689241409302, "learning_rate": 6.921348314606741e-05, "loss": 1.0618685483932495, "step": 308, "token_acc": 0.6957964601769911 }, { "epoch": 0.34758155230596177, "grad_norm": 1.796687126159668, "learning_rate": 6.943820224719102e-05, "loss": 1.0777299404144287, "step": 309, "token_acc": 0.6878147029204431 }, { "epoch": 0.34870641169853767, "grad_norm": 1.9142061471939087, "learning_rate": 6.966292134831462e-05, "loss": 0.9578858017921448, "step": 310, "token_acc": 0.7296678121420389 }, { "epoch": 0.3498312710911136, "grad_norm": 2.1318767070770264, "learning_rate": 6.988764044943821e-05, "loss": 0.9280487298965454, "step": 311, "token_acc": 0.718887262079063 }, { "epoch": 0.35095613048368957, "grad_norm": 1.9099323749542236, "learning_rate": 7.01123595505618e-05, "loss": 1.1335875988006592, "step": 312, "token_acc": 0.6960486322188449 }, { "epoch": 0.35208098987626546, "grad_norm": 1.5989108085632324, "learning_rate": 7.03370786516854e-05, "loss": 1.0432809591293335, "step": 313, "token_acc": 0.7213930348258707 }, { "epoch": 0.3532058492688414, "grad_norm": 1.878498911857605, "learning_rate": 7.056179775280899e-05, "loss": 1.206588625907898, "step": 314, "token_acc": 0.688622754491018 }, { "epoch": 0.3543307086614173, "grad_norm": 1.6848019361495972, "learning_rate": 7.078651685393259e-05, "loss": 0.9105646014213562, "step": 315, "token_acc": 0.7540816326530613 }, { "epoch": 0.35545556805399325, "grad_norm": 1.6208447217941284, "learning_rate": 7.101123595505618e-05, "loss": 1.1419382095336914, "step": 316, "token_acc": 0.7121771217712177 }, { "epoch": 0.3565804274465692, "grad_norm": 1.7517368793487549, "learning_rate": 7.123595505617978e-05, "loss": 0.9713804125785828, "step": 317, "token_acc": 0.7206235011990407 }, { "epoch": 0.3577052868391451, "grad_norm": 2.20463228225708, "learning_rate": 7.146067415730337e-05, "loss": 1.1000707149505615, "step": 318, "token_acc": 0.6858710562414266 }, { "epoch": 0.35883014623172105, "grad_norm": 1.8240197896957397, "learning_rate": 7.168539325842696e-05, "loss": 0.8322272300720215, "step": 319, "token_acc": 0.7605263157894737 }, { "epoch": 0.35995500562429694, "grad_norm": 1.8866313695907593, "learning_rate": 7.191011235955056e-05, "loss": 1.0307772159576416, "step": 320, "token_acc": 0.7053020961775586 }, { "epoch": 0.3610798650168729, "grad_norm": 1.7463196516036987, "learning_rate": 7.213483146067415e-05, "loss": 1.1182823181152344, "step": 321, "token_acc": 0.6871961102106969 }, { "epoch": 0.36220472440944884, "grad_norm": 1.9792118072509766, "learning_rate": 7.235955056179775e-05, "loss": 0.9322708249092102, "step": 322, "token_acc": 0.7416545718432511 }, { "epoch": 0.36332958380202474, "grad_norm": 1.9789454936981201, "learning_rate": 7.258426966292136e-05, "loss": 0.8996306657791138, "step": 323, "token_acc": 0.7329286798179059 }, { "epoch": 0.3644544431946007, "grad_norm": 1.9040738344192505, "learning_rate": 7.280898876404495e-05, "loss": 1.2223901748657227, "step": 324, "token_acc": 0.656989247311828 }, { "epoch": 0.3655793025871766, "grad_norm": 1.8699519634246826, "learning_rate": 7.303370786516854e-05, "loss": 0.9465277194976807, "step": 325, "token_acc": 0.7031055900621118 }, { "epoch": 0.36670416197975253, "grad_norm": 1.7525314092636108, "learning_rate": 7.325842696629214e-05, "loss": 0.757322371006012, "step": 326, "token_acc": 0.7699004975124378 }, { "epoch": 0.3678290213723285, "grad_norm": 1.9640742540359497, "learning_rate": 7.348314606741573e-05, "loss": 0.8582019805908203, "step": 327, "token_acc": 0.724709784411277 }, { "epoch": 0.3689538807649044, "grad_norm": 1.6053048372268677, "learning_rate": 7.370786516853933e-05, "loss": 1.0158510208129883, "step": 328, "token_acc": 0.716304347826087 }, { "epoch": 0.3700787401574803, "grad_norm": 1.981948733329773, "learning_rate": 7.393258426966293e-05, "loss": 1.0746012926101685, "step": 329, "token_acc": 0.6973518284993695 }, { "epoch": 0.3712035995500562, "grad_norm": 1.920090675354004, "learning_rate": 7.415730337078653e-05, "loss": 0.932982325553894, "step": 330, "token_acc": 0.7493261455525606 }, { "epoch": 0.37232845894263217, "grad_norm": 1.8550628423690796, "learning_rate": 7.438202247191012e-05, "loss": 0.9158258438110352, "step": 331, "token_acc": 0.7354368932038835 }, { "epoch": 0.3734533183352081, "grad_norm": 1.7758638858795166, "learning_rate": 7.460674157303372e-05, "loss": 1.051654577255249, "step": 332, "token_acc": 0.7169421487603306 }, { "epoch": 0.374578177727784, "grad_norm": 1.558099389076233, "learning_rate": 7.483146067415731e-05, "loss": 0.9272782802581787, "step": 333, "token_acc": 0.7339449541284404 }, { "epoch": 0.37570303712035996, "grad_norm": 1.9125303030014038, "learning_rate": 7.50561797752809e-05, "loss": 0.9680390954017639, "step": 334, "token_acc": 0.7368421052631579 }, { "epoch": 0.37682789651293586, "grad_norm": 1.672597885131836, "learning_rate": 7.52808988764045e-05, "loss": 1.056138515472412, "step": 335, "token_acc": 0.7212435233160622 }, { "epoch": 0.3779527559055118, "grad_norm": 1.922539472579956, "learning_rate": 7.55056179775281e-05, "loss": 0.943455696105957, "step": 336, "token_acc": 0.7170542635658915 }, { "epoch": 0.37907761529808776, "grad_norm": 1.9560731649398804, "learning_rate": 7.573033707865169e-05, "loss": 0.9698992967605591, "step": 337, "token_acc": 0.7110091743119266 }, { "epoch": 0.38020247469066365, "grad_norm": 1.8730480670928955, "learning_rate": 7.595505617977528e-05, "loss": 1.1020320653915405, "step": 338, "token_acc": 0.6963906581740976 }, { "epoch": 0.3813273340832396, "grad_norm": 1.9586926698684692, "learning_rate": 7.617977528089888e-05, "loss": 1.0178933143615723, "step": 339, "token_acc": 0.7207977207977208 }, { "epoch": 0.38245219347581555, "grad_norm": 1.5615214109420776, "learning_rate": 7.640449438202247e-05, "loss": 0.7984320521354675, "step": 340, "token_acc": 0.7723840345199569 }, { "epoch": 0.38357705286839144, "grad_norm": 2.0847864151000977, "learning_rate": 7.662921348314607e-05, "loss": 0.9584349989891052, "step": 341, "token_acc": 0.7353801169590644 }, { "epoch": 0.3847019122609674, "grad_norm": 1.8125011920928955, "learning_rate": 7.685393258426966e-05, "loss": 1.0064764022827148, "step": 342, "token_acc": 0.7226980728051392 }, { "epoch": 0.3858267716535433, "grad_norm": 1.6432133913040161, "learning_rate": 7.707865168539327e-05, "loss": 0.9315385818481445, "step": 343, "token_acc": 0.7305585980284776 }, { "epoch": 0.38695163104611924, "grad_norm": 1.5467262268066406, "learning_rate": 7.730337078651686e-05, "loss": 0.921647310256958, "step": 344, "token_acc": 0.7279187817258883 }, { "epoch": 0.3880764904386952, "grad_norm": 1.7176563739776611, "learning_rate": 7.752808988764046e-05, "loss": 0.966290295124054, "step": 345, "token_acc": 0.712682379349046 }, { "epoch": 0.3892013498312711, "grad_norm": 1.471426248550415, "learning_rate": 7.775280898876405e-05, "loss": 0.9627429246902466, "step": 346, "token_acc": 0.7335733573357336 }, { "epoch": 0.39032620922384703, "grad_norm": 1.5558898448944092, "learning_rate": 7.797752808988764e-05, "loss": 1.0369640588760376, "step": 347, "token_acc": 0.7137580794090489 }, { "epoch": 0.3914510686164229, "grad_norm": 1.614942193031311, "learning_rate": 7.820224719101124e-05, "loss": 1.0253329277038574, "step": 348, "token_acc": 0.7139830508474576 }, { "epoch": 0.3925759280089989, "grad_norm": 1.7214652299880981, "learning_rate": 7.842696629213485e-05, "loss": 1.0779566764831543, "step": 349, "token_acc": 0.7184567257559958 }, { "epoch": 0.3937007874015748, "grad_norm": 1.9547094106674194, "learning_rate": 7.865168539325843e-05, "loss": 1.0406358242034912, "step": 350, "token_acc": 0.7296969696969697 }, { "epoch": 0.3948256467941507, "grad_norm": 1.868483304977417, "learning_rate": 7.887640449438202e-05, "loss": 0.867817223072052, "step": 351, "token_acc": 0.7328605200945626 }, { "epoch": 0.39595050618672667, "grad_norm": 1.7770098447799683, "learning_rate": 7.910112359550562e-05, "loss": 0.6769880056381226, "step": 352, "token_acc": 0.7975646879756468 }, { "epoch": 0.39707536557930256, "grad_norm": 1.9091870784759521, "learning_rate": 7.932584269662921e-05, "loss": 1.01605224609375, "step": 353, "token_acc": 0.7281553398058253 }, { "epoch": 0.3982002249718785, "grad_norm": 2.0503270626068115, "learning_rate": 7.95505617977528e-05, "loss": 0.9596544504165649, "step": 354, "token_acc": 0.7522796352583586 }, { "epoch": 0.39932508436445446, "grad_norm": 2.105534315109253, "learning_rate": 7.97752808988764e-05, "loss": 1.1765918731689453, "step": 355, "token_acc": 0.6855263157894737 }, { "epoch": 0.40044994375703036, "grad_norm": 1.9282594919204712, "learning_rate": 8e-05, "loss": 1.1241587400436401, "step": 356, "token_acc": 0.6868131868131868 }, { "epoch": 0.4015748031496063, "grad_norm": 1.7851557731628418, "learning_rate": 8.02247191011236e-05, "loss": 1.099661946296692, "step": 357, "token_acc": 0.7196969696969697 }, { "epoch": 0.4026996625421822, "grad_norm": 1.5959067344665527, "learning_rate": 8.04494382022472e-05, "loss": 1.1076245307922363, "step": 358, "token_acc": 0.7123745819397993 }, { "epoch": 0.40382452193475815, "grad_norm": 1.8876570463180542, "learning_rate": 8.067415730337079e-05, "loss": 1.0498151779174805, "step": 359, "token_acc": 0.7051153460381143 }, { "epoch": 0.4049493813273341, "grad_norm": 1.8181655406951904, "learning_rate": 8.089887640449438e-05, "loss": 1.035964012145996, "step": 360, "token_acc": 0.7100456621004566 }, { "epoch": 0.40607424071991, "grad_norm": 2.2006852626800537, "learning_rate": 8.112359550561798e-05, "loss": 1.0154075622558594, "step": 361, "token_acc": 0.6906158357771262 }, { "epoch": 0.40719910011248595, "grad_norm": 1.7384244203567505, "learning_rate": 8.134831460674159e-05, "loss": 1.0056748390197754, "step": 362, "token_acc": 0.7269807280513919 }, { "epoch": 0.40832395950506184, "grad_norm": 1.829088807106018, "learning_rate": 8.157303370786518e-05, "loss": 0.9836400747299194, "step": 363, "token_acc": 0.732183908045977 }, { "epoch": 0.4094488188976378, "grad_norm": 1.6430420875549316, "learning_rate": 8.179775280898877e-05, "loss": 1.068252444267273, "step": 364, "token_acc": 0.6889352818371608 }, { "epoch": 0.41057367829021374, "grad_norm": 2.019324541091919, "learning_rate": 8.202247191011237e-05, "loss": 1.014890193939209, "step": 365, "token_acc": 0.7144432194046306 }, { "epoch": 0.41169853768278963, "grad_norm": 1.7325459718704224, "learning_rate": 8.224719101123596e-05, "loss": 0.9834113121032715, "step": 366, "token_acc": 0.7365728900255755 }, { "epoch": 0.4128233970753656, "grad_norm": 1.455896258354187, "learning_rate": 8.247191011235956e-05, "loss": 0.8023439049720764, "step": 367, "token_acc": 0.7737154150197628 }, { "epoch": 0.41394825646794153, "grad_norm": 1.9293467998504639, "learning_rate": 8.269662921348315e-05, "loss": 0.8868751525878906, "step": 368, "token_acc": 0.7318116975748931 }, { "epoch": 0.4150731158605174, "grad_norm": 1.818696141242981, "learning_rate": 8.292134831460675e-05, "loss": 1.0851002931594849, "step": 369, "token_acc": 0.6848249027237354 }, { "epoch": 0.4161979752530934, "grad_norm": 1.912016749382019, "learning_rate": 8.314606741573034e-05, "loss": 1.010711431503296, "step": 370, "token_acc": 0.7126567844925884 }, { "epoch": 0.41732283464566927, "grad_norm": 1.9387363195419312, "learning_rate": 8.337078651685393e-05, "loss": 0.8610193729400635, "step": 371, "token_acc": 0.7370572207084468 }, { "epoch": 0.4184476940382452, "grad_norm": 2.034956693649292, "learning_rate": 8.359550561797753e-05, "loss": 1.0351221561431885, "step": 372, "token_acc": 0.6982182628062361 }, { "epoch": 0.41957255343082117, "grad_norm": 2.1406519412994385, "learning_rate": 8.382022471910112e-05, "loss": 0.9714844822883606, "step": 373, "token_acc": 0.7264276228419655 }, { "epoch": 0.42069741282339707, "grad_norm": 1.868467926979065, "learning_rate": 8.404494382022472e-05, "loss": 1.0296168327331543, "step": 374, "token_acc": 0.6955093099671413 }, { "epoch": 0.421822272215973, "grad_norm": 1.7831248044967651, "learning_rate": 8.426966292134831e-05, "loss": 1.0324971675872803, "step": 375, "token_acc": 0.7209756097560975 }, { "epoch": 0.4229471316085489, "grad_norm": 2.2927117347717285, "learning_rate": 8.449438202247192e-05, "loss": 0.9213206768035889, "step": 376, "token_acc": 0.7431972789115646 }, { "epoch": 0.42407199100112486, "grad_norm": 1.7975807189941406, "learning_rate": 8.471910112359551e-05, "loss": 0.8254147171974182, "step": 377, "token_acc": 0.7637028014616322 }, { "epoch": 0.4251968503937008, "grad_norm": 1.6855989694595337, "learning_rate": 8.494382022471911e-05, "loss": 0.9416998624801636, "step": 378, "token_acc": 0.7358870967741935 }, { "epoch": 0.4263217097862767, "grad_norm": 1.60006844997406, "learning_rate": 8.51685393258427e-05, "loss": 1.0888185501098633, "step": 379, "token_acc": 0.7177225340817963 }, { "epoch": 0.42744656917885265, "grad_norm": 2.02270245552063, "learning_rate": 8.53932584269663e-05, "loss": 1.0484315156936646, "step": 380, "token_acc": 0.7083825265643447 }, { "epoch": 0.42857142857142855, "grad_norm": 1.8429011106491089, "learning_rate": 8.561797752808989e-05, "loss": 0.8209081888198853, "step": 381, "token_acc": 0.7730307076101469 }, { "epoch": 0.4296962879640045, "grad_norm": 1.9690057039260864, "learning_rate": 8.58426966292135e-05, "loss": 0.9226238131523132, "step": 382, "token_acc": 0.734640522875817 }, { "epoch": 0.43082114735658045, "grad_norm": 1.7952994108200073, "learning_rate": 8.606741573033709e-05, "loss": 0.9780460596084595, "step": 383, "token_acc": 0.7221052631578947 }, { "epoch": 0.43194600674915634, "grad_norm": 1.9885238409042358, "learning_rate": 8.629213483146069e-05, "loss": 0.9729599952697754, "step": 384, "token_acc": 0.7125340599455041 }, { "epoch": 0.4330708661417323, "grad_norm": 1.9388115406036377, "learning_rate": 8.651685393258427e-05, "loss": 1.0330853462219238, "step": 385, "token_acc": 0.702176403207331 }, { "epoch": 0.4341957255343082, "grad_norm": 1.6072205305099487, "learning_rate": 8.674157303370786e-05, "loss": 0.898686408996582, "step": 386, "token_acc": 0.7454545454545455 }, { "epoch": 0.43532058492688414, "grad_norm": 1.9306304454803467, "learning_rate": 8.696629213483146e-05, "loss": 1.0237057209014893, "step": 387, "token_acc": 0.7163677130044843 }, { "epoch": 0.4364454443194601, "grad_norm": 1.7127808332443237, "learning_rate": 8.719101123595505e-05, "loss": 0.9338386058807373, "step": 388, "token_acc": 0.7218124341412012 }, { "epoch": 0.437570303712036, "grad_norm": 2.274869203567505, "learning_rate": 8.741573033707866e-05, "loss": 0.9225839972496033, "step": 389, "token_acc": 0.7272727272727273 }, { "epoch": 0.43869516310461193, "grad_norm": 1.7930642366409302, "learning_rate": 8.764044943820225e-05, "loss": 0.8145026564598083, "step": 390, "token_acc": 0.7727759914255091 }, { "epoch": 0.4398200224971879, "grad_norm": 2.251962423324585, "learning_rate": 8.786516853932585e-05, "loss": 0.9820806384086609, "step": 391, "token_acc": 0.7216828478964401 }, { "epoch": 0.4409448818897638, "grad_norm": 1.8158506155014038, "learning_rate": 8.808988764044944e-05, "loss": 0.7901725172996521, "step": 392, "token_acc": 0.7571964956195244 }, { "epoch": 0.4420697412823397, "grad_norm": 1.9554229974746704, "learning_rate": 8.831460674157304e-05, "loss": 0.8527881503105164, "step": 393, "token_acc": 0.7568740955137482 }, { "epoch": 0.4431946006749156, "grad_norm": 1.9779621362686157, "learning_rate": 8.853932584269663e-05, "loss": 0.8595879077911377, "step": 394, "token_acc": 0.7398477157360406 }, { "epoch": 0.44431946006749157, "grad_norm": 1.6131535768508911, "learning_rate": 8.876404494382022e-05, "loss": 0.7572569847106934, "step": 395, "token_acc": 0.776 }, { "epoch": 0.4454443194600675, "grad_norm": 1.7298252582550049, "learning_rate": 8.898876404494383e-05, "loss": 0.8614919185638428, "step": 396, "token_acc": 0.7378947368421053 }, { "epoch": 0.4465691788526434, "grad_norm": 1.9181554317474365, "learning_rate": 8.921348314606743e-05, "loss": 0.9360592365264893, "step": 397, "token_acc": 0.7457627118644068 }, { "epoch": 0.44769403824521936, "grad_norm": 1.6104971170425415, "learning_rate": 8.943820224719102e-05, "loss": 1.02340829372406, "step": 398, "token_acc": 0.7105022831050228 }, { "epoch": 0.44881889763779526, "grad_norm": 1.8410289287567139, "learning_rate": 8.966292134831461e-05, "loss": 0.7776477336883545, "step": 399, "token_acc": 0.7552631578947369 }, { "epoch": 0.4499437570303712, "grad_norm": 2.059274911880493, "learning_rate": 8.988764044943821e-05, "loss": 1.0617574453353882, "step": 400, "token_acc": 0.6994949494949495 }, { "epoch": 0.45106861642294716, "grad_norm": 1.794189214706421, "learning_rate": 9.01123595505618e-05, "loss": 1.0806286334991455, "step": 401, "token_acc": 0.6902834008097166 }, { "epoch": 0.45219347581552305, "grad_norm": 1.7676398754119873, "learning_rate": 9.03370786516854e-05, "loss": 0.9557969570159912, "step": 402, "token_acc": 0.733477789815818 }, { "epoch": 0.453318335208099, "grad_norm": 2.020718574523926, "learning_rate": 9.056179775280899e-05, "loss": 1.1698455810546875, "step": 403, "token_acc": 0.6738197424892703 }, { "epoch": 0.4544431946006749, "grad_norm": 1.7821087837219238, "learning_rate": 9.078651685393259e-05, "loss": 0.907133936882019, "step": 404, "token_acc": 0.7420494699646644 }, { "epoch": 0.45556805399325084, "grad_norm": 1.636863112449646, "learning_rate": 9.101123595505618e-05, "loss": 1.0124187469482422, "step": 405, "token_acc": 0.7067927773000859 }, { "epoch": 0.4566929133858268, "grad_norm": 1.915420651435852, "learning_rate": 9.123595505617977e-05, "loss": 0.7591054439544678, "step": 406, "token_acc": 0.7510316368638239 }, { "epoch": 0.4578177727784027, "grad_norm": 1.986689805984497, "learning_rate": 9.146067415730337e-05, "loss": 0.8669993877410889, "step": 407, "token_acc": 0.752 }, { "epoch": 0.45894263217097864, "grad_norm": 1.7488821744918823, "learning_rate": 9.168539325842696e-05, "loss": 0.9363869428634644, "step": 408, "token_acc": 0.7400215749730313 }, { "epoch": 0.46006749156355453, "grad_norm": 1.7887603044509888, "learning_rate": 9.191011235955057e-05, "loss": 0.8656275272369385, "step": 409, "token_acc": 0.7451868629671574 }, { "epoch": 0.4611923509561305, "grad_norm": 2.161726474761963, "learning_rate": 9.213483146067416e-05, "loss": 0.8968093991279602, "step": 410, "token_acc": 0.7546296296296297 }, { "epoch": 0.46231721034870643, "grad_norm": 1.7886883020401, "learning_rate": 9.235955056179776e-05, "loss": 0.7522919178009033, "step": 411, "token_acc": 0.7438423645320197 }, { "epoch": 0.4634420697412823, "grad_norm": 1.574604868888855, "learning_rate": 9.258426966292135e-05, "loss": 0.9448544383049011, "step": 412, "token_acc": 0.7268128161888702 }, { "epoch": 0.4645669291338583, "grad_norm": 2.0669820308685303, "learning_rate": 9.280898876404495e-05, "loss": 1.1227214336395264, "step": 413, "token_acc": 0.6804123711340206 }, { "epoch": 0.46569178852643417, "grad_norm": 1.869429349899292, "learning_rate": 9.303370786516854e-05, "loss": 1.0686094760894775, "step": 414, "token_acc": 0.7309812568908489 }, { "epoch": 0.4668166479190101, "grad_norm": 2.0130107402801514, "learning_rate": 9.325842696629214e-05, "loss": 0.7158048152923584, "step": 415, "token_acc": 0.7748031496062993 }, { "epoch": 0.46794150731158607, "grad_norm": 1.8650795221328735, "learning_rate": 9.348314606741574e-05, "loss": 1.0499544143676758, "step": 416, "token_acc": 0.7188183807439825 }, { "epoch": 0.46906636670416196, "grad_norm": 2.0034749507904053, "learning_rate": 9.370786516853934e-05, "loss": 0.8283402323722839, "step": 417, "token_acc": 0.7344461305007587 }, { "epoch": 0.4701912260967379, "grad_norm": 1.7073888778686523, "learning_rate": 9.393258426966293e-05, "loss": 0.9045039415359497, "step": 418, "token_acc": 0.7352272727272727 }, { "epoch": 0.47131608548931386, "grad_norm": 1.781975269317627, "learning_rate": 9.415730337078653e-05, "loss": 0.9237880706787109, "step": 419, "token_acc": 0.7188888888888889 }, { "epoch": 0.47244094488188976, "grad_norm": 1.7918028831481934, "learning_rate": 9.438202247191012e-05, "loss": 1.2798149585723877, "step": 420, "token_acc": 0.6542502387774594 }, { "epoch": 0.4735658042744657, "grad_norm": 1.4722131490707397, "learning_rate": 9.46067415730337e-05, "loss": 0.9814735651016235, "step": 421, "token_acc": 0.7354538401861909 }, { "epoch": 0.4746906636670416, "grad_norm": 1.839726448059082, "learning_rate": 9.483146067415731e-05, "loss": 0.9781888723373413, "step": 422, "token_acc": 0.7141104294478527 }, { "epoch": 0.47581552305961755, "grad_norm": 1.6538442373275757, "learning_rate": 9.50561797752809e-05, "loss": 1.0824756622314453, "step": 423, "token_acc": 0.6988304093567251 }, { "epoch": 0.4769403824521935, "grad_norm": 1.758972406387329, "learning_rate": 9.52808988764045e-05, "loss": 0.9529639482498169, "step": 424, "token_acc": 0.74364896073903 }, { "epoch": 0.4780652418447694, "grad_norm": 1.7316654920578003, "learning_rate": 9.550561797752809e-05, "loss": 0.8819442987442017, "step": 425, "token_acc": 0.7511682242990654 }, { "epoch": 0.47919010123734535, "grad_norm": 1.8331114053726196, "learning_rate": 9.573033707865169e-05, "loss": 0.9198839664459229, "step": 426, "token_acc": 0.7414403778040142 }, { "epoch": 0.48031496062992124, "grad_norm": 1.531410813331604, "learning_rate": 9.595505617977528e-05, "loss": 1.058027982711792, "step": 427, "token_acc": 0.7096774193548387 }, { "epoch": 0.4814398200224972, "grad_norm": 1.7391997575759888, "learning_rate": 9.617977528089888e-05, "loss": 1.0260260105133057, "step": 428, "token_acc": 0.7330779054916986 }, { "epoch": 0.48256467941507314, "grad_norm": 1.6551629304885864, "learning_rate": 9.640449438202248e-05, "loss": 0.9597284197807312, "step": 429, "token_acc": 0.7258771929824561 }, { "epoch": 0.48368953880764903, "grad_norm": 1.7633756399154663, "learning_rate": 9.662921348314608e-05, "loss": 1.0959086418151855, "step": 430, "token_acc": 0.6887486855941115 }, { "epoch": 0.484814398200225, "grad_norm": 1.6424648761749268, "learning_rate": 9.685393258426967e-05, "loss": 0.9129697680473328, "step": 431, "token_acc": 0.7368972746331237 }, { "epoch": 0.4859392575928009, "grad_norm": 1.7010430097579956, "learning_rate": 9.707865168539327e-05, "loss": 1.1168239116668701, "step": 432, "token_acc": 0.675531914893617 }, { "epoch": 0.4870641169853768, "grad_norm": 1.8887126445770264, "learning_rate": 9.730337078651686e-05, "loss": 0.9141265153884888, "step": 433, "token_acc": 0.7424242424242424 }, { "epoch": 0.4881889763779528, "grad_norm": 1.657232403755188, "learning_rate": 9.752808988764045e-05, "loss": 1.0545477867126465, "step": 434, "token_acc": 0.7080504364694471 }, { "epoch": 0.48931383577052867, "grad_norm": 1.9192181825637817, "learning_rate": 9.775280898876405e-05, "loss": 1.1103620529174805, "step": 435, "token_acc": 0.6975446428571429 }, { "epoch": 0.4904386951631046, "grad_norm": 2.0899574756622314, "learning_rate": 9.797752808988764e-05, "loss": 1.0185210704803467, "step": 436, "token_acc": 0.7020057306590258 }, { "epoch": 0.4915635545556805, "grad_norm": 1.9788764715194702, "learning_rate": 9.820224719101124e-05, "loss": 1.0533381700515747, "step": 437, "token_acc": 0.7117572692793932 }, { "epoch": 0.49268841394825647, "grad_norm": 1.689030647277832, "learning_rate": 9.842696629213483e-05, "loss": 0.8496209979057312, "step": 438, "token_acc": 0.7497181510710259 }, { "epoch": 0.4938132733408324, "grad_norm": 1.7298907041549683, "learning_rate": 9.865168539325843e-05, "loss": 0.8335422277450562, "step": 439, "token_acc": 0.7525773195876289 }, { "epoch": 0.4949381327334083, "grad_norm": 1.8018159866333008, "learning_rate": 9.887640449438202e-05, "loss": 1.1020152568817139, "step": 440, "token_acc": 0.693050193050193 }, { "epoch": 0.49606299212598426, "grad_norm": 1.5440486669540405, "learning_rate": 9.910112359550561e-05, "loss": 0.9385735988616943, "step": 441, "token_acc": 0.7160493827160493 }, { "epoch": 0.49718785151856015, "grad_norm": 1.5364229679107666, "learning_rate": 9.932584269662922e-05, "loss": 0.9202176332473755, "step": 442, "token_acc": 0.7567567567567568 }, { "epoch": 0.4983127109111361, "grad_norm": 1.9762901067733765, "learning_rate": 9.955056179775282e-05, "loss": 0.9439225196838379, "step": 443, "token_acc": 0.7472677595628415 }, { "epoch": 0.49943757030371205, "grad_norm": 1.904464602470398, "learning_rate": 9.977528089887641e-05, "loss": 0.9775711297988892, "step": 444, "token_acc": 0.7166437414030261 }, { "epoch": 0.500562429696288, "grad_norm": 1.5895994901657104, "learning_rate": 0.0001, "loss": 0.9630712270736694, "step": 445, "token_acc": 0.72931654676259 }, { "epoch": 0.5016872890888638, "grad_norm": 1.4701722860336304, "learning_rate": 9.999999654028407e-05, "loss": 0.81491619348526, "step": 446, "token_acc": 0.7509090909090909 }, { "epoch": 0.5028121484814398, "grad_norm": 1.6926226615905762, "learning_rate": 9.999998616113678e-05, "loss": 1.126811146736145, "step": 447, "token_acc": 0.6873156342182891 }, { "epoch": 0.5039370078740157, "grad_norm": 1.877213716506958, "learning_rate": 9.99999688625595e-05, "loss": 0.8925565481185913, "step": 448, "token_acc": 0.7211934156378601 }, { "epoch": 0.5050618672665916, "grad_norm": 1.6865899562835693, "learning_rate": 9.999994464455469e-05, "loss": 0.8218963146209717, "step": 449, "token_acc": 0.7443249701314217 }, { "epoch": 0.5061867266591676, "grad_norm": 1.7684983015060425, "learning_rate": 9.99999135071257e-05, "loss": 1.0055979490280151, "step": 450, "token_acc": 0.7297592997811816 }, { "epoch": 0.5073115860517435, "grad_norm": 1.9021574258804321, "learning_rate": 9.999987545027681e-05, "loss": 0.8266463279724121, "step": 451, "token_acc": 0.7439490445859872 }, { "epoch": 0.5084364454443194, "grad_norm": 1.6723664999008179, "learning_rate": 9.999983047401329e-05, "loss": 0.9505488276481628, "step": 452, "token_acc": 0.7516268980477223 }, { "epoch": 0.5095613048368954, "grad_norm": 1.6765656471252441, "learning_rate": 9.999977857834137e-05, "loss": 0.889122486114502, "step": 453, "token_acc": 0.7408184679958028 }, { "epoch": 0.5106861642294713, "grad_norm": 1.7963595390319824, "learning_rate": 9.999971976326824e-05, "loss": 0.9195739030838013, "step": 454, "token_acc": 0.7409326424870466 }, { "epoch": 0.5118110236220472, "grad_norm": 1.7931852340698242, "learning_rate": 9.999965402880201e-05, "loss": 1.1055867671966553, "step": 455, "token_acc": 0.7002341920374707 }, { "epoch": 0.5129358830146231, "grad_norm": 1.677232265472412, "learning_rate": 9.999958137495182e-05, "loss": 1.0808690786361694, "step": 456, "token_acc": 0.6951456310679611 }, { "epoch": 0.5140607424071991, "grad_norm": 1.758243441581726, "learning_rate": 9.99995018017277e-05, "loss": 0.8689571619033813, "step": 457, "token_acc": 0.7573812580231065 }, { "epoch": 0.515185601799775, "grad_norm": 1.8511914014816284, "learning_rate": 9.999941530914065e-05, "loss": 0.9207149744033813, "step": 458, "token_acc": 0.7430555555555556 }, { "epoch": 0.5163104611923509, "grad_norm": 1.7980667352676392, "learning_rate": 9.999932189720268e-05, "loss": 0.8377244472503662, "step": 459, "token_acc": 0.7681528662420383 }, { "epoch": 0.5174353205849269, "grad_norm": 2.112494707107544, "learning_rate": 9.999922156592667e-05, "loss": 0.7185490131378174, "step": 460, "token_acc": 0.796 }, { "epoch": 0.5185601799775028, "grad_norm": 1.6323167085647583, "learning_rate": 9.999911431532654e-05, "loss": 1.089188575744629, "step": 461, "token_acc": 0.6920289855072463 }, { "epoch": 0.5196850393700787, "grad_norm": 1.8774174451828003, "learning_rate": 9.999900014541711e-05, "loss": 0.8106789588928223, "step": 462, "token_acc": 0.7645466847090663 }, { "epoch": 0.5208098987626547, "grad_norm": 2.1977460384368896, "learning_rate": 9.999887905621419e-05, "loss": 1.1363319158554077, "step": 463, "token_acc": 0.6930946291560103 }, { "epoch": 0.5219347581552306, "grad_norm": 1.7885620594024658, "learning_rate": 9.999875104773456e-05, "loss": 1.0321249961853027, "step": 464, "token_acc": 0.7023809523809523 }, { "epoch": 0.5230596175478065, "grad_norm": 1.7947347164154053, "learning_rate": 9.999861611999588e-05, "loss": 1.0891461372375488, "step": 465, "token_acc": 0.7056323060573858 }, { "epoch": 0.5241844769403825, "grad_norm": 2.0213098526000977, "learning_rate": 9.999847427301686e-05, "loss": 1.003814458847046, "step": 466, "token_acc": 0.7151248164464024 }, { "epoch": 0.5253093363329584, "grad_norm": 1.865444540977478, "learning_rate": 9.999832550681714e-05, "loss": 0.9717703461647034, "step": 467, "token_acc": 0.7171945701357466 }, { "epoch": 0.5264341957255343, "grad_norm": 1.67775559425354, "learning_rate": 9.999816982141726e-05, "loss": 0.8342432975769043, "step": 468, "token_acc": 0.7519729425028185 }, { "epoch": 0.5275590551181102, "grad_norm": 1.7553514242172241, "learning_rate": 9.999800721683883e-05, "loss": 0.8562053442001343, "step": 469, "token_acc": 0.7460978147762747 }, { "epoch": 0.5286839145106862, "grad_norm": 2.0482115745544434, "learning_rate": 9.99978376931043e-05, "loss": 0.8899391889572144, "step": 470, "token_acc": 0.7373134328358208 }, { "epoch": 0.5298087739032621, "grad_norm": 1.9522459506988525, "learning_rate": 9.999766125023715e-05, "loss": 0.9233064651489258, "step": 471, "token_acc": 0.7448979591836735 }, { "epoch": 0.530933633295838, "grad_norm": 1.8382872343063354, "learning_rate": 9.99974778882618e-05, "loss": 0.9245150089263916, "step": 472, "token_acc": 0.7295980511571255 }, { "epoch": 0.532058492688414, "grad_norm": 1.5698022842407227, "learning_rate": 9.999728760720362e-05, "loss": 0.9364007711410522, "step": 473, "token_acc": 0.7370892018779343 }, { "epoch": 0.5331833520809899, "grad_norm": 1.7962346076965332, "learning_rate": 9.999709040708894e-05, "loss": 0.941253662109375, "step": 474, "token_acc": 0.7089371980676329 }, { "epoch": 0.5343082114735658, "grad_norm": 1.57411527633667, "learning_rate": 9.999688628794506e-05, "loss": 0.8269374370574951, "step": 475, "token_acc": 0.7657082002129926 }, { "epoch": 0.5354330708661418, "grad_norm": 1.8577425479888916, "learning_rate": 9.999667524980023e-05, "loss": 0.9406548142433167, "step": 476, "token_acc": 0.734304932735426 }, { "epoch": 0.5365579302587177, "grad_norm": 1.743920922279358, "learning_rate": 9.999645729268363e-05, "loss": 0.9093344211578369, "step": 477, "token_acc": 0.7286821705426356 }, { "epoch": 0.5376827896512936, "grad_norm": 1.8215903043746948, "learning_rate": 9.999623241662545e-05, "loss": 0.9506005644798279, "step": 478, "token_acc": 0.7281767955801105 }, { "epoch": 0.5388076490438695, "grad_norm": 1.66728675365448, "learning_rate": 9.999600062165682e-05, "loss": 0.7967265248298645, "step": 479, "token_acc": 0.7580275229357798 }, { "epoch": 0.5399325084364455, "grad_norm": 1.8375515937805176, "learning_rate": 9.999576190780977e-05, "loss": 0.9914565086364746, "step": 480, "token_acc": 0.7283163265306123 }, { "epoch": 0.5410573678290214, "grad_norm": 1.8452866077423096, "learning_rate": 9.999551627511738e-05, "loss": 0.8631696701049805, "step": 481, "token_acc": 0.7343565525383707 }, { "epoch": 0.5421822272215973, "grad_norm": 2.05641508102417, "learning_rate": 9.99952637236136e-05, "loss": 0.9694948792457581, "step": 482, "token_acc": 0.7076700434153401 }, { "epoch": 0.5433070866141733, "grad_norm": 1.874616265296936, "learning_rate": 9.999500425333344e-05, "loss": 0.901570200920105, "step": 483, "token_acc": 0.7372448979591837 }, { "epoch": 0.5444319460067492, "grad_norm": 1.694012999534607, "learning_rate": 9.999473786431277e-05, "loss": 0.9330540299415588, "step": 484, "token_acc": 0.7313883299798792 }, { "epoch": 0.545556805399325, "grad_norm": 1.9718025922775269, "learning_rate": 9.999446455658845e-05, "loss": 0.9848131537437439, "step": 485, "token_acc": 0.6977329974811083 }, { "epoch": 0.5466816647919011, "grad_norm": 1.5472242832183838, "learning_rate": 9.999418433019831e-05, "loss": 0.6157548427581787, "step": 486, "token_acc": 0.8248520710059172 }, { "epoch": 0.547806524184477, "grad_norm": 1.7450768947601318, "learning_rate": 9.999389718518114e-05, "loss": 0.9147881269454956, "step": 487, "token_acc": 0.7351991388589881 }, { "epoch": 0.5489313835770528, "grad_norm": 1.9537683725357056, "learning_rate": 9.999360312157668e-05, "loss": 0.7767515182495117, "step": 488, "token_acc": 0.7803234501347709 }, { "epoch": 0.5500562429696289, "grad_norm": 1.6519399881362915, "learning_rate": 9.99933021394256e-05, "loss": 0.9353740215301514, "step": 489, "token_acc": 0.7497393117831074 }, { "epoch": 0.5511811023622047, "grad_norm": 1.6420589685440063, "learning_rate": 9.999299423876958e-05, "loss": 0.8979807496070862, "step": 490, "token_acc": 0.7451403887688985 }, { "epoch": 0.5523059617547806, "grad_norm": 1.969567060470581, "learning_rate": 9.99926794196512e-05, "loss": 0.9774200320243835, "step": 491, "token_acc": 0.7034482758620689 }, { "epoch": 0.5534308211473565, "grad_norm": 1.5917003154754639, "learning_rate": 9.999235768211406e-05, "loss": 0.9690948724746704, "step": 492, "token_acc": 0.7288481141692151 }, { "epoch": 0.5545556805399325, "grad_norm": 1.9127694368362427, "learning_rate": 9.999202902620267e-05, "loss": 0.9238312244415283, "step": 493, "token_acc": 0.7436260623229461 }, { "epoch": 0.5556805399325084, "grad_norm": 1.59657621383667, "learning_rate": 9.99916934519625e-05, "loss": 0.8397965431213379, "step": 494, "token_acc": 0.757396449704142 }, { "epoch": 0.5568053993250843, "grad_norm": 1.4615120887756348, "learning_rate": 9.999135095944002e-05, "loss": 0.8714455366134644, "step": 495, "token_acc": 0.7555753791257805 }, { "epoch": 0.5579302587176603, "grad_norm": 1.9949431419372559, "learning_rate": 9.99910015486826e-05, "loss": 0.9581304788589478, "step": 496, "token_acc": 0.7230769230769231 }, { "epoch": 0.5590551181102362, "grad_norm": 1.7024974822998047, "learning_rate": 9.99906452197386e-05, "loss": 0.7987035512924194, "step": 497, "token_acc": 0.7754137115839244 }, { "epoch": 0.5601799775028121, "grad_norm": 1.7098333835601807, "learning_rate": 9.999028197265734e-05, "loss": 0.9702333211898804, "step": 498, "token_acc": 0.6972573839662447 }, { "epoch": 0.5613048368953881, "grad_norm": 1.5519360303878784, "learning_rate": 9.998991180748909e-05, "loss": 0.8312081694602966, "step": 499, "token_acc": 0.7491821155943293 }, { "epoch": 0.562429696287964, "grad_norm": 1.6157681941986084, "learning_rate": 9.998953472428505e-05, "loss": 0.969581127166748, "step": 500, "token_acc": 0.7130044843049327 }, { "epoch": 0.5635545556805399, "grad_norm": 1.766552209854126, "learning_rate": 9.998915072309745e-05, "loss": 1.016394853591919, "step": 501, "token_acc": 0.7217787913340935 }, { "epoch": 0.5646794150731158, "grad_norm": 1.749894380569458, "learning_rate": 9.99887598039794e-05, "loss": 0.9122739434242249, "step": 502, "token_acc": 0.7413793103448276 }, { "epoch": 0.5658042744656918, "grad_norm": 1.6881117820739746, "learning_rate": 9.998836196698498e-05, "loss": 0.9004767537117004, "step": 503, "token_acc": 0.7466666666666667 }, { "epoch": 0.5669291338582677, "grad_norm": 1.6709896326065063, "learning_rate": 9.99879572121693e-05, "loss": 0.8432103395462036, "step": 504, "token_acc": 0.748587570621469 }, { "epoch": 0.5680539932508436, "grad_norm": 1.6297454833984375, "learning_rate": 9.998754553958835e-05, "loss": 0.9130585789680481, "step": 505, "token_acc": 0.7442489851150202 }, { "epoch": 0.5691788526434196, "grad_norm": 1.80604887008667, "learning_rate": 9.998712694929909e-05, "loss": 1.0790188312530518, "step": 506, "token_acc": 0.6949327817993796 }, { "epoch": 0.5703037120359955, "grad_norm": 1.6757398843765259, "learning_rate": 9.998670144135944e-05, "loss": 1.0439094305038452, "step": 507, "token_acc": 0.7007150153217568 }, { "epoch": 0.5714285714285714, "grad_norm": 1.7482174634933472, "learning_rate": 9.99862690158283e-05, "loss": 0.9285833835601807, "step": 508, "token_acc": 0.7395079594790159 }, { "epoch": 0.5725534308211474, "grad_norm": 1.6277644634246826, "learning_rate": 9.998582967276553e-05, "loss": 0.9320547580718994, "step": 509, "token_acc": 0.7368421052631579 }, { "epoch": 0.5736782902137233, "grad_norm": 1.7589435577392578, "learning_rate": 9.998538341223191e-05, "loss": 0.8671249747276306, "step": 510, "token_acc": 0.7395957193816884 }, { "epoch": 0.5748031496062992, "grad_norm": 1.6167818307876587, "learning_rate": 9.998493023428919e-05, "loss": 1.0116883516311646, "step": 511, "token_acc": 0.7234972677595628 }, { "epoch": 0.5759280089988752, "grad_norm": 1.7206785678863525, "learning_rate": 9.99844701390001e-05, "loss": 1.1284027099609375, "step": 512, "token_acc": 0.6923076923076923 }, { "epoch": 0.5770528683914511, "grad_norm": 1.5367282629013062, "learning_rate": 9.99840031264283e-05, "loss": 0.847995400428772, "step": 513, "token_acc": 0.7322834645669292 }, { "epoch": 0.578177727784027, "grad_norm": 1.5670790672302246, "learning_rate": 9.998352919663844e-05, "loss": 1.1116704940795898, "step": 514, "token_acc": 0.6863070539419087 }, { "epoch": 0.5793025871766029, "grad_norm": 1.519879698753357, "learning_rate": 9.998304834969607e-05, "loss": 0.7546162605285645, "step": 515, "token_acc": 0.7642105263157895 }, { "epoch": 0.5804274465691789, "grad_norm": 1.5688236951828003, "learning_rate": 9.998256058566778e-05, "loss": 0.9205060601234436, "step": 516, "token_acc": 0.7423255813953489 }, { "epoch": 0.5815523059617548, "grad_norm": 1.6930930614471436, "learning_rate": 9.998206590462103e-05, "loss": 0.9309430718421936, "step": 517, "token_acc": 0.7373626373626374 }, { "epoch": 0.5826771653543307, "grad_norm": 1.8206543922424316, "learning_rate": 9.998156430662431e-05, "loss": 0.7952457666397095, "step": 518, "token_acc": 0.76010101010101 }, { "epoch": 0.5838020247469067, "grad_norm": 1.6631560325622559, "learning_rate": 9.9981055791747e-05, "loss": 0.8779273629188538, "step": 519, "token_acc": 0.755826859045505 }, { "epoch": 0.5849268841394826, "grad_norm": 1.6144158840179443, "learning_rate": 9.99805403600595e-05, "loss": 0.8259216547012329, "step": 520, "token_acc": 0.7569060773480663 }, { "epoch": 0.5860517435320585, "grad_norm": 1.6182746887207031, "learning_rate": 9.998001801163315e-05, "loss": 1.00669264793396, "step": 521, "token_acc": 0.7113970588235294 }, { "epoch": 0.5871766029246345, "grad_norm": 2.078774929046631, "learning_rate": 9.99794887465402e-05, "loss": 0.8442191481590271, "step": 522, "token_acc": 0.7205169628432956 }, { "epoch": 0.5883014623172104, "grad_norm": 1.7363700866699219, "learning_rate": 9.997895256485394e-05, "loss": 1.1360716819763184, "step": 523, "token_acc": 0.6995215311004784 }, { "epoch": 0.5894263217097863, "grad_norm": 1.6288493871688843, "learning_rate": 9.997840946664852e-05, "loss": 0.8118647336959839, "step": 524, "token_acc": 0.7647690655209453 }, { "epoch": 0.5905511811023622, "grad_norm": 1.8639870882034302, "learning_rate": 9.997785945199913e-05, "loss": 0.9957873225212097, "step": 525, "token_acc": 0.6961538461538461 }, { "epoch": 0.5916760404949382, "grad_norm": 1.887899398803711, "learning_rate": 9.997730252098189e-05, "loss": 0.8277996778488159, "step": 526, "token_acc": 0.7626886145404664 }, { "epoch": 0.592800899887514, "grad_norm": 1.6713125705718994, "learning_rate": 9.997673867367386e-05, "loss": 0.9706795811653137, "step": 527, "token_acc": 0.7215447154471545 }, { "epoch": 0.59392575928009, "grad_norm": 1.361349105834961, "learning_rate": 9.997616791015307e-05, "loss": 0.6854506134986877, "step": 528, "token_acc": 0.8075117370892019 }, { "epoch": 0.595050618672666, "grad_norm": 1.5325562953948975, "learning_rate": 9.997559023049851e-05, "loss": 1.0581411123275757, "step": 529, "token_acc": 0.7087198515769945 }, { "epoch": 0.5961754780652418, "grad_norm": 1.9656407833099365, "learning_rate": 9.997500563479012e-05, "loss": 0.9860319495201111, "step": 530, "token_acc": 0.7262357414448669 }, { "epoch": 0.5973003374578177, "grad_norm": 1.7364169359207153, "learning_rate": 9.997441412310881e-05, "loss": 0.8925040364265442, "step": 531, "token_acc": 0.7604871447902571 }, { "epoch": 0.5984251968503937, "grad_norm": 1.5898770093917847, "learning_rate": 9.997381569553645e-05, "loss": 1.0042163133621216, "step": 532, "token_acc": 0.7216396568160153 }, { "epoch": 0.5995500562429696, "grad_norm": 1.7038788795471191, "learning_rate": 9.997321035215583e-05, "loss": 0.8894066214561462, "step": 533, "token_acc": 0.7316129032258064 }, { "epoch": 0.6006749156355455, "grad_norm": 1.6255488395690918, "learning_rate": 9.997259809305072e-05, "loss": 1.0759609937667847, "step": 534, "token_acc": 0.7061657032755299 }, { "epoch": 0.6017997750281214, "grad_norm": 1.7801724672317505, "learning_rate": 9.997197891830588e-05, "loss": 0.9620454907417297, "step": 535, "token_acc": 0.7272727272727273 }, { "epoch": 0.6029246344206974, "grad_norm": 1.6321924924850464, "learning_rate": 9.997135282800699e-05, "loss": 0.935189962387085, "step": 536, "token_acc": 0.728448275862069 }, { "epoch": 0.6040494938132733, "grad_norm": 1.3237054347991943, "learning_rate": 9.997071982224065e-05, "loss": 0.7403424978256226, "step": 537, "token_acc": 0.792694965449161 }, { "epoch": 0.6051743532058492, "grad_norm": 1.742997169494629, "learning_rate": 9.997007990109451e-05, "loss": 0.9071752429008484, "step": 538, "token_acc": 0.7431850789096126 }, { "epoch": 0.6062992125984252, "grad_norm": 1.5787461996078491, "learning_rate": 9.996943306465711e-05, "loss": 0.9023919105529785, "step": 539, "token_acc": 0.7396788990825688 }, { "epoch": 0.6074240719910011, "grad_norm": 1.6242974996566772, "learning_rate": 9.996877931301798e-05, "loss": 0.8403110504150391, "step": 540, "token_acc": 0.7549751243781094 }, { "epoch": 0.608548931383577, "grad_norm": 1.6771514415740967, "learning_rate": 9.996811864626756e-05, "loss": 0.9229577779769897, "step": 541, "token_acc": 0.7278562259306803 }, { "epoch": 0.609673790776153, "grad_norm": 1.8539551496505737, "learning_rate": 9.996745106449732e-05, "loss": 1.014020323753357, "step": 542, "token_acc": 0.735632183908046 }, { "epoch": 0.6107986501687289, "grad_norm": 1.7265057563781738, "learning_rate": 9.99667765677996e-05, "loss": 0.9452383518218994, "step": 543, "token_acc": 0.7090694935217904 }, { "epoch": 0.6119235095613048, "grad_norm": 1.6910704374313354, "learning_rate": 9.996609515626778e-05, "loss": 0.8774662613868713, "step": 544, "token_acc": 0.7544581618655692 }, { "epoch": 0.6130483689538808, "grad_norm": 1.6120868921279907, "learning_rate": 9.996540682999613e-05, "loss": 1.0899629592895508, "step": 545, "token_acc": 0.7017543859649122 }, { "epoch": 0.6141732283464567, "grad_norm": 1.8012447357177734, "learning_rate": 9.996471158907994e-05, "loss": 1.0095912218093872, "step": 546, "token_acc": 0.7011642949547219 }, { "epoch": 0.6152980877390326, "grad_norm": 1.510349988937378, "learning_rate": 9.99640094336154e-05, "loss": 0.8258073925971985, "step": 547, "token_acc": 0.7636949516648764 }, { "epoch": 0.6164229471316085, "grad_norm": 1.462695837020874, "learning_rate": 9.996330036369967e-05, "loss": 1.0373156070709229, "step": 548, "token_acc": 0.7291666666666666 }, { "epoch": 0.6175478065241845, "grad_norm": 1.9315133094787598, "learning_rate": 9.996258437943092e-05, "loss": 0.969207227230072, "step": 549, "token_acc": 0.711340206185567 }, { "epoch": 0.6186726659167604, "grad_norm": 1.6201508045196533, "learning_rate": 9.996186148090818e-05, "loss": 1.1326305866241455, "step": 550, "token_acc": 0.7017873941674506 }, { "epoch": 0.6197975253093363, "grad_norm": 1.7361562252044678, "learning_rate": 9.996113166823153e-05, "loss": 1.1013364791870117, "step": 551, "token_acc": 0.6936585365853658 }, { "epoch": 0.6209223847019123, "grad_norm": 1.8964892625808716, "learning_rate": 9.996039494150196e-05, "loss": 0.9069967269897461, "step": 552, "token_acc": 0.73989898989899 }, { "epoch": 0.6220472440944882, "grad_norm": 1.6381678581237793, "learning_rate": 9.995965130082142e-05, "loss": 1.0879807472229004, "step": 553, "token_acc": 0.696913002806361 }, { "epoch": 0.6231721034870641, "grad_norm": 1.781816840171814, "learning_rate": 9.995890074629281e-05, "loss": 0.9341049194335938, "step": 554, "token_acc": 0.7319277108433735 }, { "epoch": 0.6242969628796401, "grad_norm": 1.552807092666626, "learning_rate": 9.995814327802002e-05, "loss": 0.8768585920333862, "step": 555, "token_acc": 0.7475845410628019 }, { "epoch": 0.625421822272216, "grad_norm": 1.5902745723724365, "learning_rate": 9.995737889610785e-05, "loss": 0.8733174800872803, "step": 556, "token_acc": 0.7555555555555555 }, { "epoch": 0.6265466816647919, "grad_norm": 1.6459444761276245, "learning_rate": 9.995660760066213e-05, "loss": 1.070095419883728, "step": 557, "token_acc": 0.6853490658800393 }, { "epoch": 0.6276715410573678, "grad_norm": 1.845428466796875, "learning_rate": 9.995582939178953e-05, "loss": 1.0706523656845093, "step": 558, "token_acc": 0.7106227106227107 }, { "epoch": 0.6287964004499438, "grad_norm": 1.5699331760406494, "learning_rate": 9.995504426959781e-05, "loss": 1.2219717502593994, "step": 559, "token_acc": 0.6803212851405622 }, { "epoch": 0.6299212598425197, "grad_norm": 1.5608389377593994, "learning_rate": 9.995425223419557e-05, "loss": 0.9056589603424072, "step": 560, "token_acc": 0.740990990990991 }, { "epoch": 0.6310461192350956, "grad_norm": 1.3714408874511719, "learning_rate": 9.995345328569245e-05, "loss": 0.7132170796394348, "step": 561, "token_acc": 0.7856440511307768 }, { "epoch": 0.6321709786276716, "grad_norm": 1.709624171257019, "learning_rate": 9.9952647424199e-05, "loss": 0.9965505599975586, "step": 562, "token_acc": 0.7056277056277056 }, { "epoch": 0.6332958380202475, "grad_norm": 1.7442611455917358, "learning_rate": 9.995183464982676e-05, "loss": 0.9463399648666382, "step": 563, "token_acc": 0.7262044653349001 }, { "epoch": 0.6344206974128234, "grad_norm": 1.7050620317459106, "learning_rate": 9.99510149626882e-05, "loss": 0.9136192798614502, "step": 564, "token_acc": 0.7314285714285714 }, { "epoch": 0.6355455568053994, "grad_norm": 1.6122019290924072, "learning_rate": 9.995018836289674e-05, "loss": 0.8809152841567993, "step": 565, "token_acc": 0.7446555819477435 }, { "epoch": 0.6366704161979753, "grad_norm": 1.7859869003295898, "learning_rate": 9.994935485056678e-05, "loss": 0.9276303052902222, "step": 566, "token_acc": 0.7297619047619047 }, { "epoch": 0.6377952755905512, "grad_norm": 1.5825804471969604, "learning_rate": 9.994851442581367e-05, "loss": 0.9995869398117065, "step": 567, "token_acc": 0.7342592592592593 }, { "epoch": 0.6389201349831272, "grad_norm": 1.6556930541992188, "learning_rate": 9.994766708875374e-05, "loss": 1.0218591690063477, "step": 568, "token_acc": 0.7249736564805058 }, { "epoch": 0.6400449943757031, "grad_norm": 1.7720814943313599, "learning_rate": 9.994681283950422e-05, "loss": 0.9387189149856567, "step": 569, "token_acc": 0.7264631043256997 }, { "epoch": 0.641169853768279, "grad_norm": 1.416343092918396, "learning_rate": 9.994595167818334e-05, "loss": 0.8320167064666748, "step": 570, "token_acc": 0.7643129770992366 }, { "epoch": 0.6422947131608548, "grad_norm": 1.5014926195144653, "learning_rate": 9.994508360491029e-05, "loss": 0.9738361835479736, "step": 571, "token_acc": 0.735632183908046 }, { "epoch": 0.6434195725534309, "grad_norm": 1.4267703294754028, "learning_rate": 9.994420861980515e-05, "loss": 0.7471256852149963, "step": 572, "token_acc": 0.7887323943661971 }, { "epoch": 0.6445444319460067, "grad_norm": 1.596143364906311, "learning_rate": 9.994332672298908e-05, "loss": 0.9780352115631104, "step": 573, "token_acc": 0.7319304666056725 }, { "epoch": 0.6456692913385826, "grad_norm": 2.0502495765686035, "learning_rate": 9.994243791458407e-05, "loss": 0.9598357677459717, "step": 574, "token_acc": 0.7254335260115607 }, { "epoch": 0.6467941507311586, "grad_norm": 1.7178117036819458, "learning_rate": 9.994154219471313e-05, "loss": 0.9414328336715698, "step": 575, "token_acc": 0.735224586288416 }, { "epoch": 0.6479190101237345, "grad_norm": 1.7631182670593262, "learning_rate": 9.994063956350023e-05, "loss": 0.9650301337242126, "step": 576, "token_acc": 0.7187864644107351 }, { "epoch": 0.6490438695163104, "grad_norm": 1.7284934520721436, "learning_rate": 9.99397300210703e-05, "loss": 0.7137409448623657, "step": 577, "token_acc": 0.8056951423785594 }, { "epoch": 0.6501687289088864, "grad_norm": 1.8959155082702637, "learning_rate": 9.993881356754916e-05, "loss": 0.8731468915939331, "step": 578, "token_acc": 0.7367624810892587 }, { "epoch": 0.6512935883014623, "grad_norm": 1.853358268737793, "learning_rate": 9.993789020306369e-05, "loss": 0.9127748012542725, "step": 579, "token_acc": 0.7441860465116279 }, { "epoch": 0.6524184476940382, "grad_norm": 1.470856785774231, "learning_rate": 9.993695992774164e-05, "loss": 0.833692193031311, "step": 580, "token_acc": 0.7660020986358866 }, { "epoch": 0.6535433070866141, "grad_norm": 1.375518560409546, "learning_rate": 9.993602274171177e-05, "loss": 0.783367931842804, "step": 581, "token_acc": 0.7761767531219981 }, { "epoch": 0.6546681664791901, "grad_norm": 1.7509196996688843, "learning_rate": 9.993507864510375e-05, "loss": 1.050197958946228, "step": 582, "token_acc": 0.6926713947990544 }, { "epoch": 0.655793025871766, "grad_norm": 1.8950722217559814, "learning_rate": 9.993412763804825e-05, "loss": 0.8626718521118164, "step": 583, "token_acc": 0.7371273712737128 }, { "epoch": 0.6569178852643419, "grad_norm": 1.5655560493469238, "learning_rate": 9.993316972067689e-05, "loss": 0.8241537809371948, "step": 584, "token_acc": 0.7549800796812749 }, { "epoch": 0.6580427446569179, "grad_norm": 1.6248893737792969, "learning_rate": 9.993220489312222e-05, "loss": 1.1170759201049805, "step": 585, "token_acc": 0.6857638888888888 }, { "epoch": 0.6591676040494938, "grad_norm": 1.7044464349746704, "learning_rate": 9.993123315551775e-05, "loss": 0.8878785371780396, "step": 586, "token_acc": 0.7234848484848485 }, { "epoch": 0.6602924634420697, "grad_norm": 1.7561858892440796, "learning_rate": 9.993025450799798e-05, "loss": 1.0174570083618164, "step": 587, "token_acc": 0.7289719626168224 }, { "epoch": 0.6614173228346457, "grad_norm": 1.772159218788147, "learning_rate": 9.992926895069833e-05, "loss": 1.0948477983474731, "step": 588, "token_acc": 0.6958128078817734 }, { "epoch": 0.6625421822272216, "grad_norm": 1.759687066078186, "learning_rate": 9.99282764837552e-05, "loss": 0.9941725730895996, "step": 589, "token_acc": 0.6952595936794582 }, { "epoch": 0.6636670416197975, "grad_norm": 1.9477722644805908, "learning_rate": 9.992727710730591e-05, "loss": 0.9429323673248291, "step": 590, "token_acc": 0.7318235995232419 }, { "epoch": 0.6647919010123734, "grad_norm": 1.6137140989303589, "learning_rate": 9.992627082148882e-05, "loss": 0.9824059009552002, "step": 591, "token_acc": 0.7124631992149166 }, { "epoch": 0.6659167604049494, "grad_norm": 1.6318740844726562, "learning_rate": 9.992525762644312e-05, "loss": 0.9558103084564209, "step": 592, "token_acc": 0.7164790174002047 }, { "epoch": 0.6670416197975253, "grad_norm": 2.089141607284546, "learning_rate": 9.992423752230906e-05, "loss": 1.0185669660568237, "step": 593, "token_acc": 0.7029702970297029 }, { "epoch": 0.6681664791901012, "grad_norm": 1.746010661125183, "learning_rate": 9.992321050922782e-05, "loss": 1.0430420637130737, "step": 594, "token_acc": 0.705114254624592 }, { "epoch": 0.6692913385826772, "grad_norm": 1.8080694675445557, "learning_rate": 9.99221765873415e-05, "loss": 0.782420814037323, "step": 595, "token_acc": 0.7691218130311614 }, { "epoch": 0.6704161979752531, "grad_norm": 1.623846173286438, "learning_rate": 9.99211357567932e-05, "loss": 1.023358941078186, "step": 596, "token_acc": 0.7222820236813778 }, { "epoch": 0.671541057367829, "grad_norm": 1.5481666326522827, "learning_rate": 9.992008801772694e-05, "loss": 1.1613426208496094, "step": 597, "token_acc": 0.672859450726979 }, { "epoch": 0.672665916760405, "grad_norm": 1.7090049982070923, "learning_rate": 9.991903337028775e-05, "loss": 1.0023084878921509, "step": 598, "token_acc": 0.7103960396039604 }, { "epoch": 0.6737907761529809, "grad_norm": 1.4533681869506836, "learning_rate": 9.991797181462156e-05, "loss": 0.8661315441131592, "step": 599, "token_acc": 0.7544351073762838 }, { "epoch": 0.6749156355455568, "grad_norm": 1.7757734060287476, "learning_rate": 9.991690335087527e-05, "loss": 0.828032910823822, "step": 600, "token_acc": 0.7541401273885351 }, { "epoch": 0.6749156355455568, "eval_loss": 0.9203294515609741, "eval_runtime": 31.6118, "eval_samples_per_second": 25.402, "eval_steps_per_second": 3.195, "eval_token_acc": 0.7332773888655787, "step": 600 }, { "epoch": 0.6760404949381328, "grad_norm": 1.6871212720870972, "learning_rate": 9.991582797919677e-05, "loss": 1.1202366352081299, "step": 601, "token_acc": 0.6911917098445596 }, { "epoch": 0.6771653543307087, "grad_norm": 1.7148067951202393, "learning_rate": 9.991474569973485e-05, "loss": 1.2003967761993408, "step": 602, "token_acc": 0.6835564053537285 }, { "epoch": 0.6782902137232846, "grad_norm": 1.7295185327529907, "learning_rate": 9.991365651263929e-05, "loss": 0.8256057500839233, "step": 603, "token_acc": 0.7884892086330936 }, { "epoch": 0.6794150731158605, "grad_norm": 1.697675108909607, "learning_rate": 9.991256041806084e-05, "loss": 0.8411420583724976, "step": 604, "token_acc": 0.7563805104408353 }, { "epoch": 0.6805399325084365, "grad_norm": 1.7424992322921753, "learning_rate": 9.991145741615115e-05, "loss": 1.1071228981018066, "step": 605, "token_acc": 0.704312114989733 }, { "epoch": 0.6816647919010124, "grad_norm": 1.7653928995132446, "learning_rate": 9.991034750706292e-05, "loss": 1.0255460739135742, "step": 606, "token_acc": 0.7158273381294964 }, { "epoch": 0.6827896512935883, "grad_norm": 1.9900344610214233, "learning_rate": 9.99092306909497e-05, "loss": 0.8718639612197876, "step": 607, "token_acc": 0.7348484848484849 }, { "epoch": 0.6839145106861643, "grad_norm": 2.0453877449035645, "learning_rate": 9.990810696796607e-05, "loss": 0.9366941452026367, "step": 608, "token_acc": 0.7398496240601504 }, { "epoch": 0.6850393700787402, "grad_norm": 1.8066927194595337, "learning_rate": 9.99069763382675e-05, "loss": 0.9588332772254944, "step": 609, "token_acc": 0.7342398022249691 }, { "epoch": 0.6861642294713161, "grad_norm": 1.748685598373413, "learning_rate": 9.990583880201052e-05, "loss": 1.0305440425872803, "step": 610, "token_acc": 0.7003444316877153 }, { "epoch": 0.6872890888638921, "grad_norm": 1.8023567199707031, "learning_rate": 9.990469435935252e-05, "loss": 0.9057629108428955, "step": 611, "token_acc": 0.7438867438867439 }, { "epoch": 0.688413948256468, "grad_norm": 1.533669352531433, "learning_rate": 9.990354301045187e-05, "loss": 0.9821720719337463, "step": 612, "token_acc": 0.726159230096238 }, { "epoch": 0.6895388076490439, "grad_norm": 1.435198426246643, "learning_rate": 9.990238475546792e-05, "loss": 0.8007915019989014, "step": 613, "token_acc": 0.7811579980372915 }, { "epoch": 0.6906636670416197, "grad_norm": 1.8252203464508057, "learning_rate": 9.990121959456094e-05, "loss": 0.8730412721633911, "step": 614, "token_acc": 0.7589670014347202 }, { "epoch": 0.6917885264341957, "grad_norm": 1.799217939376831, "learning_rate": 9.990004752789219e-05, "loss": 0.9269472360610962, "step": 615, "token_acc": 0.7329974811083123 }, { "epoch": 0.6929133858267716, "grad_norm": 1.5676546096801758, "learning_rate": 9.989886855562386e-05, "loss": 0.8684502840042114, "step": 616, "token_acc": 0.7471655328798186 }, { "epoch": 0.6940382452193475, "grad_norm": 1.7704493999481201, "learning_rate": 9.989768267791913e-05, "loss": 0.9220914840698242, "step": 617, "token_acc": 0.7231352718078382 }, { "epoch": 0.6951631046119235, "grad_norm": 1.7343316078186035, "learning_rate": 9.989648989494207e-05, "loss": 0.8531800508499146, "step": 618, "token_acc": 0.7555266579973993 }, { "epoch": 0.6962879640044994, "grad_norm": 1.6782164573669434, "learning_rate": 9.989529020685779e-05, "loss": 0.8284838199615479, "step": 619, "token_acc": 0.7444444444444445 }, { "epoch": 0.6974128233970753, "grad_norm": 1.5507652759552002, "learning_rate": 9.98940836138323e-05, "loss": 0.9065987467765808, "step": 620, "token_acc": 0.744378698224852 }, { "epoch": 0.6985376827896513, "grad_norm": 1.71058988571167, "learning_rate": 9.989287011603257e-05, "loss": 0.9154243469238281, "step": 621, "token_acc": 0.7457805907172996 }, { "epoch": 0.6996625421822272, "grad_norm": 1.7551006078720093, "learning_rate": 9.989164971362652e-05, "loss": 1.1124179363250732, "step": 622, "token_acc": 0.6992561105207227 }, { "epoch": 0.7007874015748031, "grad_norm": 1.5816679000854492, "learning_rate": 9.989042240678308e-05, "loss": 1.043070673942566, "step": 623, "token_acc": 0.7264248704663212 }, { "epoch": 0.7019122609673791, "grad_norm": 1.5650488138198853, "learning_rate": 9.988918819567206e-05, "loss": 0.8595724701881409, "step": 624, "token_acc": 0.749112426035503 }, { "epoch": 0.703037120359955, "grad_norm": 1.8314647674560547, "learning_rate": 9.988794708046429e-05, "loss": 1.0929772853851318, "step": 625, "token_acc": 0.7064471879286695 }, { "epoch": 0.7041619797525309, "grad_norm": 1.4743881225585938, "learning_rate": 9.98866990613315e-05, "loss": 0.9870479702949524, "step": 626, "token_acc": 0.7192374350086655 }, { "epoch": 0.7052868391451068, "grad_norm": 1.4710702896118164, "learning_rate": 9.988544413844642e-05, "loss": 0.8155512809753418, "step": 627, "token_acc": 0.758659217877095 }, { "epoch": 0.7064116985376828, "grad_norm": 1.7236777544021606, "learning_rate": 9.988418231198271e-05, "loss": 0.8563300371170044, "step": 628, "token_acc": 0.7647831800262812 }, { "epoch": 0.7075365579302587, "grad_norm": 1.6603912115097046, "learning_rate": 9.988291358211499e-05, "loss": 0.8777095079421997, "step": 629, "token_acc": 0.7497314715359829 }, { "epoch": 0.7086614173228346, "grad_norm": 1.6460970640182495, "learning_rate": 9.988163794901884e-05, "loss": 0.8464756011962891, "step": 630, "token_acc": 0.7440191387559809 }, { "epoch": 0.7097862767154106, "grad_norm": 1.4360506534576416, "learning_rate": 9.988035541287079e-05, "loss": 0.7962306141853333, "step": 631, "token_acc": 0.7682648401826484 }, { "epoch": 0.7109111361079865, "grad_norm": 1.5673412084579468, "learning_rate": 9.987906597384833e-05, "loss": 0.8309441804885864, "step": 632, "token_acc": 0.7577197149643705 }, { "epoch": 0.7120359955005624, "grad_norm": 1.6951905488967896, "learning_rate": 9.987776963212991e-05, "loss": 0.8247427344322205, "step": 633, "token_acc": 0.7701564380264742 }, { "epoch": 0.7131608548931384, "grad_norm": 1.6144626140594482, "learning_rate": 9.987646638789491e-05, "loss": 0.9494717121124268, "step": 634, "token_acc": 0.73568281938326 }, { "epoch": 0.7142857142857143, "grad_norm": 1.7777464389801025, "learning_rate": 9.987515624132373e-05, "loss": 0.7689136266708374, "step": 635, "token_acc": 0.7694656488549618 }, { "epoch": 0.7154105736782902, "grad_norm": 1.6475799083709717, "learning_rate": 9.987383919259763e-05, "loss": 0.8868165612220764, "step": 636, "token_acc": 0.7528662420382166 }, { "epoch": 0.7165354330708661, "grad_norm": 1.4750920534133911, "learning_rate": 9.987251524189889e-05, "loss": 0.787601888179779, "step": 637, "token_acc": 0.7670126874279123 }, { "epoch": 0.7176602924634421, "grad_norm": 1.4477356672286987, "learning_rate": 9.987118438941073e-05, "loss": 0.8339438438415527, "step": 638, "token_acc": 0.7576601671309192 }, { "epoch": 0.718785151856018, "grad_norm": 1.5622104406356812, "learning_rate": 9.986984663531733e-05, "loss": 0.8221710920333862, "step": 639, "token_acc": 0.7741573033707865 }, { "epoch": 0.7199100112485939, "grad_norm": 1.7420533895492554, "learning_rate": 9.986850197980382e-05, "loss": 0.9357887506484985, "step": 640, "token_acc": 0.7162891046386192 }, { "epoch": 0.7210348706411699, "grad_norm": 1.881798505783081, "learning_rate": 9.986715042305628e-05, "loss": 0.8823127746582031, "step": 641, "token_acc": 0.7547169811320755 }, { "epoch": 0.7221597300337458, "grad_norm": 1.7888858318328857, "learning_rate": 9.986579196526176e-05, "loss": 1.0072150230407715, "step": 642, "token_acc": 0.7107001321003963 }, { "epoch": 0.7232845894263217, "grad_norm": 1.6561788320541382, "learning_rate": 9.986442660660823e-05, "loss": 0.9054312705993652, "step": 643, "token_acc": 0.7605633802816901 }, { "epoch": 0.7244094488188977, "grad_norm": 1.6436656713485718, "learning_rate": 9.986305434728466e-05, "loss": 0.7733568549156189, "step": 644, "token_acc": 0.767379679144385 }, { "epoch": 0.7255343082114736, "grad_norm": 1.6398051977157593, "learning_rate": 9.986167518748097e-05, "loss": 1.1584968566894531, "step": 645, "token_acc": 0.6782363977485929 }, { "epoch": 0.7266591676040495, "grad_norm": 1.5582855939865112, "learning_rate": 9.986028912738799e-05, "loss": 1.0128488540649414, "step": 646, "token_acc": 0.7046632124352331 }, { "epoch": 0.7277840269966255, "grad_norm": 1.6884198188781738, "learning_rate": 9.985889616719754e-05, "loss": 0.776412308216095, "step": 647, "token_acc": 0.7879161528976573 }, { "epoch": 0.7289088863892014, "grad_norm": 1.5881149768829346, "learning_rate": 9.985749630710242e-05, "loss": 0.9134639501571655, "step": 648, "token_acc": 0.7522349936143039 }, { "epoch": 0.7300337457817773, "grad_norm": 1.5263575315475464, "learning_rate": 9.985608954729631e-05, "loss": 0.9450771808624268, "step": 649, "token_acc": 0.7345491388044579 }, { "epoch": 0.7311586051743532, "grad_norm": 1.3000426292419434, "learning_rate": 9.985467588797392e-05, "loss": 0.9460508227348328, "step": 650, "token_acc": 0.7363138686131386 }, { "epoch": 0.7322834645669292, "grad_norm": 1.4251229763031006, "learning_rate": 9.985325532933087e-05, "loss": 0.7406172752380371, "step": 651, "token_acc": 0.7777777777777778 }, { "epoch": 0.7334083239595051, "grad_norm": 1.775603175163269, "learning_rate": 9.985182787156377e-05, "loss": 0.9070613980293274, "step": 652, "token_acc": 0.732051282051282 }, { "epoch": 0.734533183352081, "grad_norm": 1.7369956970214844, "learning_rate": 9.985039351487013e-05, "loss": 1.1198804378509521, "step": 653, "token_acc": 0.6922222222222222 }, { "epoch": 0.735658042744657, "grad_norm": 1.5091458559036255, "learning_rate": 9.984895225944848e-05, "loss": 0.9040343165397644, "step": 654, "token_acc": 0.746641074856046 }, { "epoch": 0.7367829021372329, "grad_norm": 1.3977365493774414, "learning_rate": 9.984750410549825e-05, "loss": 0.8109418153762817, "step": 655, "token_acc": 0.7485265225933202 }, { "epoch": 0.7379077615298087, "grad_norm": 1.5838645696640015, "learning_rate": 9.984604905321988e-05, "loss": 0.9258980751037598, "step": 656, "token_acc": 0.7505592841163311 }, { "epoch": 0.7390326209223848, "grad_norm": 1.6488966941833496, "learning_rate": 9.98445871028147e-05, "loss": 0.9133172035217285, "step": 657, "token_acc": 0.7252358490566038 }, { "epoch": 0.7401574803149606, "grad_norm": 1.782692790031433, "learning_rate": 9.984311825448505e-05, "loss": 0.9303100109100342, "step": 658, "token_acc": 0.7182320441988951 }, { "epoch": 0.7412823397075365, "grad_norm": 1.6506186723709106, "learning_rate": 9.984164250843417e-05, "loss": 0.8974112272262573, "step": 659, "token_acc": 0.7293986636971047 }, { "epoch": 0.7424071991001124, "grad_norm": 1.7199149131774902, "learning_rate": 9.984015986486632e-05, "loss": 0.8681624531745911, "step": 660, "token_acc": 0.7326607818411097 }, { "epoch": 0.7435320584926884, "grad_norm": 1.6435526609420776, "learning_rate": 9.983867032398665e-05, "loss": 0.8464870452880859, "step": 661, "token_acc": 0.7446540880503144 }, { "epoch": 0.7446569178852643, "grad_norm": 1.3711308240890503, "learning_rate": 9.983717388600134e-05, "loss": 0.9272116422653198, "step": 662, "token_acc": 0.7307692307692307 }, { "epoch": 0.7457817772778402, "grad_norm": 1.8430347442626953, "learning_rate": 9.983567055111745e-05, "loss": 1.028254747390747, "step": 663, "token_acc": 0.7254335260115607 }, { "epoch": 0.7469066366704162, "grad_norm": 1.926883578300476, "learning_rate": 9.983416031954302e-05, "loss": 1.0768935680389404, "step": 664, "token_acc": 0.6926889714993805 }, { "epoch": 0.7480314960629921, "grad_norm": 1.8881605863571167, "learning_rate": 9.983264319148707e-05, "loss": 0.8603374361991882, "step": 665, "token_acc": 0.750733137829912 }, { "epoch": 0.749156355455568, "grad_norm": 1.7756173610687256, "learning_rate": 9.983111916715953e-05, "loss": 0.7770142555236816, "step": 666, "token_acc": 0.774468085106383 }, { "epoch": 0.750281214848144, "grad_norm": 1.6309565305709839, "learning_rate": 9.982958824677132e-05, "loss": 0.962420642375946, "step": 667, "token_acc": 0.7270718232044199 }, { "epoch": 0.7514060742407199, "grad_norm": 1.5220773220062256, "learning_rate": 9.982805043053431e-05, "loss": 0.6730574369430542, "step": 668, "token_acc": 0.7796610169491526 }, { "epoch": 0.7525309336332958, "grad_norm": 1.4767478704452515, "learning_rate": 9.982650571866129e-05, "loss": 0.6776600480079651, "step": 669, "token_acc": 0.7935034802784223 }, { "epoch": 0.7536557930258717, "grad_norm": 1.807747721672058, "learning_rate": 9.982495411136606e-05, "loss": 1.0547860860824585, "step": 670, "token_acc": 0.7178899082568807 }, { "epoch": 0.7547806524184477, "grad_norm": 1.5536174774169922, "learning_rate": 9.982339560886332e-05, "loss": 0.9395316243171692, "step": 671, "token_acc": 0.7328170377541142 }, { "epoch": 0.7559055118110236, "grad_norm": 1.800878643989563, "learning_rate": 9.982183021136877e-05, "loss": 0.880530834197998, "step": 672, "token_acc": 0.7250608272506083 }, { "epoch": 0.7570303712035995, "grad_norm": 1.671175479888916, "learning_rate": 9.982025791909903e-05, "loss": 0.8927554488182068, "step": 673, "token_acc": 0.7292882147024504 }, { "epoch": 0.7581552305961755, "grad_norm": 1.4729565382003784, "learning_rate": 9.98186787322717e-05, "loss": 0.7624427080154419, "step": 674, "token_acc": 0.7724820143884892 }, { "epoch": 0.7592800899887514, "grad_norm": 1.622411847114563, "learning_rate": 9.98170926511053e-05, "loss": 0.8477105498313904, "step": 675, "token_acc": 0.7346465816917729 }, { "epoch": 0.7604049493813273, "grad_norm": 1.797002911567688, "learning_rate": 9.981549967581936e-05, "loss": 0.8111937642097473, "step": 676, "token_acc": 0.7704697986577181 }, { "epoch": 0.7615298087739033, "grad_norm": 1.5816296339035034, "learning_rate": 9.981389980663429e-05, "loss": 0.9222838878631592, "step": 677, "token_acc": 0.744943820224719 }, { "epoch": 0.7626546681664792, "grad_norm": 1.7105611562728882, "learning_rate": 9.981229304377153e-05, "loss": 1.0897481441497803, "step": 678, "token_acc": 0.6934306569343066 }, { "epoch": 0.7637795275590551, "grad_norm": 1.5225849151611328, "learning_rate": 9.98106793874534e-05, "loss": 0.8202308416366577, "step": 679, "token_acc": 0.7380645161290322 }, { "epoch": 0.7649043869516311, "grad_norm": 1.5133634805679321, "learning_rate": 9.980905883790324e-05, "loss": 0.9089297652244568, "step": 680, "token_acc": 0.7304261645193261 }, { "epoch": 0.766029246344207, "grad_norm": 1.3593999147415161, "learning_rate": 9.980743139534531e-05, "loss": 0.9496070146560669, "step": 681, "token_acc": 0.7404255319148936 }, { "epoch": 0.7671541057367829, "grad_norm": 1.6307861804962158, "learning_rate": 9.980579706000481e-05, "loss": 0.975427508354187, "step": 682, "token_acc": 0.7138157894736842 }, { "epoch": 0.7682789651293588, "grad_norm": 1.7558391094207764, "learning_rate": 9.980415583210795e-05, "loss": 0.940104603767395, "step": 683, "token_acc": 0.7392857142857143 }, { "epoch": 0.7694038245219348, "grad_norm": 1.602516770362854, "learning_rate": 9.980250771188185e-05, "loss": 0.7774720191955566, "step": 684, "token_acc": 0.7944827586206896 }, { "epoch": 0.7705286839145107, "grad_norm": 1.6632109880447388, "learning_rate": 9.980085269955455e-05, "loss": 0.8395376801490784, "step": 685, "token_acc": 0.7457142857142857 }, { "epoch": 0.7716535433070866, "grad_norm": 1.4197174310684204, "learning_rate": 9.979919079535513e-05, "loss": 0.9459586143493652, "step": 686, "token_acc": 0.7468354430379747 }, { "epoch": 0.7727784026996626, "grad_norm": 1.5636050701141357, "learning_rate": 9.979752199951355e-05, "loss": 0.8553605079650879, "step": 687, "token_acc": 0.7431077694235589 }, { "epoch": 0.7739032620922385, "grad_norm": 1.7435897588729858, "learning_rate": 9.979584631226078e-05, "loss": 0.8470273017883301, "step": 688, "token_acc": 0.7420986093552465 }, { "epoch": 0.7750281214848144, "grad_norm": 1.480310082435608, "learning_rate": 9.979416373382869e-05, "loss": 0.7722948789596558, "step": 689, "token_acc": 0.7860759493670886 }, { "epoch": 0.7761529808773904, "grad_norm": 1.6640608310699463, "learning_rate": 9.979247426445014e-05, "loss": 1.0369232892990112, "step": 690, "token_acc": 0.7096774193548387 }, { "epoch": 0.7772778402699663, "grad_norm": 1.6229746341705322, "learning_rate": 9.979077790435893e-05, "loss": 0.9034793972969055, "step": 691, "token_acc": 0.71875 }, { "epoch": 0.7784026996625422, "grad_norm": 1.7050622701644897, "learning_rate": 9.978907465378984e-05, "loss": 1.0905029773712158, "step": 692, "token_acc": 0.7211764705882353 }, { "epoch": 0.7795275590551181, "grad_norm": 1.4811668395996094, "learning_rate": 9.978736451297854e-05, "loss": 0.8522363305091858, "step": 693, "token_acc": 0.7735849056603774 }, { "epoch": 0.7806524184476941, "grad_norm": 1.872910976409912, "learning_rate": 9.978564748216173e-05, "loss": 0.9270873069763184, "step": 694, "token_acc": 0.719435736677116 }, { "epoch": 0.78177727784027, "grad_norm": 1.7460299730300903, "learning_rate": 9.9783923561577e-05, "loss": 0.9118442535400391, "step": 695, "token_acc": 0.7250308261405672 }, { "epoch": 0.7829021372328459, "grad_norm": 1.7577446699142456, "learning_rate": 9.978219275146296e-05, "loss": 1.0068143606185913, "step": 696, "token_acc": 0.7065217391304348 }, { "epoch": 0.7840269966254219, "grad_norm": 1.5333383083343506, "learning_rate": 9.978045505205909e-05, "loss": 0.8041585683822632, "step": 697, "token_acc": 0.7416481069042317 }, { "epoch": 0.7851518560179978, "grad_norm": 1.9953259229660034, "learning_rate": 9.977871046360587e-05, "loss": 1.1652514934539795, "step": 698, "token_acc": 0.6883273164861613 }, { "epoch": 0.7862767154105736, "grad_norm": 1.7273812294006348, "learning_rate": 9.977695898634478e-05, "loss": 1.1697325706481934, "step": 699, "token_acc": 0.6918604651162791 }, { "epoch": 0.7874015748031497, "grad_norm": 1.5547844171524048, "learning_rate": 9.977520062051815e-05, "loss": 0.8925641775131226, "step": 700, "token_acc": 0.742914979757085 }, { "epoch": 0.7885264341957255, "grad_norm": 1.483377456665039, "learning_rate": 9.977343536636935e-05, "loss": 0.9111359715461731, "step": 701, "token_acc": 0.7281067556296914 }, { "epoch": 0.7896512935883014, "grad_norm": 1.483803391456604, "learning_rate": 9.977166322414267e-05, "loss": 0.9013761281967163, "step": 702, "token_acc": 0.7279767666989352 }, { "epoch": 0.7907761529808774, "grad_norm": 1.7905555963516235, "learning_rate": 9.976988419408332e-05, "loss": 0.8584107160568237, "step": 703, "token_acc": 0.7503410641200545 }, { "epoch": 0.7919010123734533, "grad_norm": 1.5431501865386963, "learning_rate": 9.976809827643754e-05, "loss": 0.7343605756759644, "step": 704, "token_acc": 0.783375314861461 }, { "epoch": 0.7930258717660292, "grad_norm": 1.5932780504226685, "learning_rate": 9.976630547145245e-05, "loss": 1.0355253219604492, "step": 705, "token_acc": 0.7035358114233907 }, { "epoch": 0.7941507311586051, "grad_norm": 1.3699849843978882, "learning_rate": 9.976450577937619e-05, "loss": 0.7811253070831299, "step": 706, "token_acc": 0.768280123583934 }, { "epoch": 0.7952755905511811, "grad_norm": 1.5678091049194336, "learning_rate": 9.976269920045777e-05, "loss": 0.7427414655685425, "step": 707, "token_acc": 0.7877030162412993 }, { "epoch": 0.796400449943757, "grad_norm": 1.5857065916061401, "learning_rate": 9.976088573494724e-05, "loss": 0.9531551599502563, "step": 708, "token_acc": 0.7407834101382489 }, { "epoch": 0.7975253093363329, "grad_norm": 1.3669800758361816, "learning_rate": 9.975906538309554e-05, "loss": 0.9558979868888855, "step": 709, "token_acc": 0.7338403041825095 }, { "epoch": 0.7986501687289089, "grad_norm": 1.5982035398483276, "learning_rate": 9.975723814515461e-05, "loss": 0.9170670509338379, "step": 710, "token_acc": 0.7419678714859438 }, { "epoch": 0.7997750281214848, "grad_norm": 1.5010428428649902, "learning_rate": 9.975540402137729e-05, "loss": 0.9495971202850342, "step": 711, "token_acc": 0.7228260869565217 }, { "epoch": 0.8008998875140607, "grad_norm": 1.4540221691131592, "learning_rate": 9.97535630120174e-05, "loss": 0.7954177260398865, "step": 712, "token_acc": 0.7766497461928934 }, { "epoch": 0.8020247469066367, "grad_norm": 1.5803943872451782, "learning_rate": 9.975171511732975e-05, "loss": 0.8053409457206726, "step": 713, "token_acc": 0.7446540880503144 }, { "epoch": 0.8031496062992126, "grad_norm": 1.6786961555480957, "learning_rate": 9.974986033757004e-05, "loss": 0.6855264902114868, "step": 714, "token_acc": 0.7986666666666666 }, { "epoch": 0.8042744656917885, "grad_norm": 1.8094286918640137, "learning_rate": 9.974799867299496e-05, "loss": 1.0188705921173096, "step": 715, "token_acc": 0.7002583979328165 }, { "epoch": 0.8053993250843644, "grad_norm": 1.6463004350662231, "learning_rate": 9.974613012386214e-05, "loss": 0.9506077766418457, "step": 716, "token_acc": 0.7201291711517761 }, { "epoch": 0.8065241844769404, "grad_norm": 1.3096622228622437, "learning_rate": 9.974425469043016e-05, "loss": 0.867017388343811, "step": 717, "token_acc": 0.7671439336850038 }, { "epoch": 0.8076490438695163, "grad_norm": 1.5648212432861328, "learning_rate": 9.974237237295855e-05, "loss": 1.0342178344726562, "step": 718, "token_acc": 0.7058823529411765 }, { "epoch": 0.8087739032620922, "grad_norm": 1.5106282234191895, "learning_rate": 9.974048317170786e-05, "loss": 1.068471074104309, "step": 719, "token_acc": 0.6971027216856892 }, { "epoch": 0.8098987626546682, "grad_norm": 1.6168782711029053, "learning_rate": 9.973858708693946e-05, "loss": 0.9292632341384888, "step": 720, "token_acc": 0.7369565217391304 }, { "epoch": 0.8110236220472441, "grad_norm": 2.018860101699829, "learning_rate": 9.973668411891578e-05, "loss": 0.951242983341217, "step": 721, "token_acc": 0.7300884955752213 }, { "epoch": 0.81214848143982, "grad_norm": 1.67953360080719, "learning_rate": 9.973477426790016e-05, "loss": 1.0490906238555908, "step": 722, "token_acc": 0.712776176753122 }, { "epoch": 0.813273340832396, "grad_norm": 1.6804797649383545, "learning_rate": 9.973285753415694e-05, "loss": 0.952556848526001, "step": 723, "token_acc": 0.7195121951219512 }, { "epoch": 0.8143982002249719, "grad_norm": 1.7161496877670288, "learning_rate": 9.97309339179513e-05, "loss": 0.9758242964744568, "step": 724, "token_acc": 0.7149532710280374 }, { "epoch": 0.8155230596175478, "grad_norm": 1.4742426872253418, "learning_rate": 9.972900341954952e-05, "loss": 0.7731319665908813, "step": 725, "token_acc": 0.7781065088757396 }, { "epoch": 0.8166479190101237, "grad_norm": 1.6221078634262085, "learning_rate": 9.97270660392187e-05, "loss": 0.758265495300293, "step": 726, "token_acc": 0.7643097643097643 }, { "epoch": 0.8177727784026997, "grad_norm": 1.872536063194275, "learning_rate": 9.9725121777227e-05, "loss": 0.9056326746940613, "step": 727, "token_acc": 0.7435233160621761 }, { "epoch": 0.8188976377952756, "grad_norm": 1.7379274368286133, "learning_rate": 9.972317063384346e-05, "loss": 0.9819019436836243, "step": 728, "token_acc": 0.7291910902696366 }, { "epoch": 0.8200224971878515, "grad_norm": 1.5034279823303223, "learning_rate": 9.972121260933808e-05, "loss": 0.7816739082336426, "step": 729, "token_acc": 0.7837573385518591 }, { "epoch": 0.8211473565804275, "grad_norm": 1.581443190574646, "learning_rate": 9.971924770398188e-05, "loss": 0.7718955278396606, "step": 730, "token_acc": 0.7792041078305519 }, { "epoch": 0.8222722159730034, "grad_norm": 1.5370904207229614, "learning_rate": 9.971727591804673e-05, "loss": 0.9543917775154114, "step": 731, "token_acc": 0.7170868347338936 }, { "epoch": 0.8233970753655793, "grad_norm": 1.7039759159088135, "learning_rate": 9.971529725180552e-05, "loss": 0.8479444980621338, "step": 732, "token_acc": 0.7520661157024794 }, { "epoch": 0.8245219347581553, "grad_norm": 1.5966161489486694, "learning_rate": 9.971331170553207e-05, "loss": 1.0822641849517822, "step": 733, "token_acc": 0.6962142197599261 }, { "epoch": 0.8256467941507312, "grad_norm": 1.2308140993118286, "learning_rate": 9.971131927950117e-05, "loss": 0.9742625951766968, "step": 734, "token_acc": 0.7328671328671329 }, { "epoch": 0.8267716535433071, "grad_norm": 1.4264508485794067, "learning_rate": 9.970931997398855e-05, "loss": 1.1039303541183472, "step": 735, "token_acc": 0.6928571428571428 }, { "epoch": 0.8278965129358831, "grad_norm": 1.843112587928772, "learning_rate": 9.970731378927088e-05, "loss": 0.9282817244529724, "step": 736, "token_acc": 0.7397881996974282 }, { "epoch": 0.829021372328459, "grad_norm": 1.7527915239334106, "learning_rate": 9.970530072562578e-05, "loss": 0.8724998235702515, "step": 737, "token_acc": 0.729483282674772 }, { "epoch": 0.8301462317210349, "grad_norm": 1.5846997499465942, "learning_rate": 9.970328078333186e-05, "loss": 0.7658471465110779, "step": 738, "token_acc": 0.7669654289372599 }, { "epoch": 0.8312710911136107, "grad_norm": 1.1861717700958252, "learning_rate": 9.970125396266866e-05, "loss": 0.7507541179656982, "step": 739, "token_acc": 0.7869481765834933 }, { "epoch": 0.8323959505061868, "grad_norm": 1.5491292476654053, "learning_rate": 9.969922026391664e-05, "loss": 0.9646942615509033, "step": 740, "token_acc": 0.7069327731092437 }, { "epoch": 0.8335208098987626, "grad_norm": 1.5323556661605835, "learning_rate": 9.969717968735726e-05, "loss": 0.9737841486930847, "step": 741, "token_acc": 0.7216494845360825 }, { "epoch": 0.8346456692913385, "grad_norm": 1.526326298713684, "learning_rate": 9.969513223327292e-05, "loss": 0.8701889514923096, "step": 742, "token_acc": 0.7361702127659574 }, { "epoch": 0.8357705286839145, "grad_norm": 1.1914316415786743, "learning_rate": 9.969307790194695e-05, "loss": 0.7698884010314941, "step": 743, "token_acc": 0.7639109697933227 }, { "epoch": 0.8368953880764904, "grad_norm": 1.4050217866897583, "learning_rate": 9.969101669366365e-05, "loss": 0.6698674559593201, "step": 744, "token_acc": 0.79182156133829 }, { "epoch": 0.8380202474690663, "grad_norm": 1.6130318641662598, "learning_rate": 9.968894860870827e-05, "loss": 0.884472131729126, "step": 745, "token_acc": 0.736904761904762 }, { "epoch": 0.8391451068616423, "grad_norm": 1.4663726091384888, "learning_rate": 9.968687364736701e-05, "loss": 0.9005415439605713, "step": 746, "token_acc": 0.7568421052631579 }, { "epoch": 0.8402699662542182, "grad_norm": 1.4262257814407349, "learning_rate": 9.968479180992701e-05, "loss": 0.8564801216125488, "step": 747, "token_acc": 0.7574421168687983 }, { "epoch": 0.8413948256467941, "grad_norm": 1.4572429656982422, "learning_rate": 9.96827030966764e-05, "loss": 0.7817294001579285, "step": 748, "token_acc": 0.7611630321910696 }, { "epoch": 0.84251968503937, "grad_norm": 1.510744333267212, "learning_rate": 9.96806075079042e-05, "loss": 0.8773659467697144, "step": 749, "token_acc": 0.7484885126964933 }, { "epoch": 0.843644544431946, "grad_norm": 1.5718351602554321, "learning_rate": 9.967850504390044e-05, "loss": 0.8011206388473511, "step": 750, "token_acc": 0.7782258064516129 }, { "epoch": 0.8447694038245219, "grad_norm": 1.4540677070617676, "learning_rate": 9.967639570495607e-05, "loss": 0.9682373404502869, "step": 751, "token_acc": 0.7142857142857143 }, { "epoch": 0.8458942632170978, "grad_norm": 1.3515613079071045, "learning_rate": 9.967427949136299e-05, "loss": 0.6535250544548035, "step": 752, "token_acc": 0.8050314465408805 }, { "epoch": 0.8470191226096738, "grad_norm": 1.5917747020721436, "learning_rate": 9.967215640341407e-05, "loss": 1.032170295715332, "step": 753, "token_acc": 0.7058823529411765 }, { "epoch": 0.8481439820022497, "grad_norm": 1.6343375444412231, "learning_rate": 9.967002644140312e-05, "loss": 0.9181756377220154, "step": 754, "token_acc": 0.7267876200640342 }, { "epoch": 0.8492688413948256, "grad_norm": 1.8903379440307617, "learning_rate": 9.96678896056249e-05, "loss": 1.042850375175476, "step": 755, "token_acc": 0.6890756302521008 }, { "epoch": 0.8503937007874016, "grad_norm": 1.6912719011306763, "learning_rate": 9.966574589637513e-05, "loss": 0.8738027811050415, "step": 756, "token_acc": 0.7481751824817519 }, { "epoch": 0.8515185601799775, "grad_norm": 1.510057806968689, "learning_rate": 9.966359531395046e-05, "loss": 0.9893558025360107, "step": 757, "token_acc": 0.7213254035683943 }, { "epoch": 0.8526434195725534, "grad_norm": 1.767295002937317, "learning_rate": 9.966143785864853e-05, "loss": 0.9305055737495422, "step": 758, "token_acc": 0.7205513784461153 }, { "epoch": 0.8537682789651294, "grad_norm": 1.4947887659072876, "learning_rate": 9.965927353076789e-05, "loss": 1.0346240997314453, "step": 759, "token_acc": 0.7050359712230215 }, { "epoch": 0.8548931383577053, "grad_norm": 1.4216176271438599, "learning_rate": 9.965710233060806e-05, "loss": 0.9809554219245911, "step": 760, "token_acc": 0.7331536388140162 }, { "epoch": 0.8560179977502812, "grad_norm": 1.6161465644836426, "learning_rate": 9.96549242584695e-05, "loss": 1.0711591243743896, "step": 761, "token_acc": 0.6956947162426614 }, { "epoch": 0.8571428571428571, "grad_norm": 1.572076439857483, "learning_rate": 9.965273931465366e-05, "loss": 0.9638874530792236, "step": 762, "token_acc": 0.7275711159737418 }, { "epoch": 0.8582677165354331, "grad_norm": 1.5193856954574585, "learning_rate": 9.965054749946288e-05, "loss": 0.8493506908416748, "step": 763, "token_acc": 0.7483660130718954 }, { "epoch": 0.859392575928009, "grad_norm": 1.50749933719635, "learning_rate": 9.964834881320051e-05, "loss": 0.9192872643470764, "step": 764, "token_acc": 0.7421965317919075 }, { "epoch": 0.8605174353205849, "grad_norm": 1.6562809944152832, "learning_rate": 9.964614325617079e-05, "loss": 0.8467086553573608, "step": 765, "token_acc": 0.7386934673366834 }, { "epoch": 0.8616422947131609, "grad_norm": 1.4968862533569336, "learning_rate": 9.964393082867898e-05, "loss": 1.0587122440338135, "step": 766, "token_acc": 0.7295873573309921 }, { "epoch": 0.8627671541057368, "grad_norm": 1.6049060821533203, "learning_rate": 9.964171153103123e-05, "loss": 0.9471059441566467, "step": 767, "token_acc": 0.732484076433121 }, { "epoch": 0.8638920134983127, "grad_norm": 1.7359353303909302, "learning_rate": 9.963948536353467e-05, "loss": 0.9745305776596069, "step": 768, "token_acc": 0.7111368909512761 }, { "epoch": 0.8650168728908887, "grad_norm": 1.313323974609375, "learning_rate": 9.963725232649739e-05, "loss": 1.0290915966033936, "step": 769, "token_acc": 0.7083333333333334 }, { "epoch": 0.8661417322834646, "grad_norm": 1.6653773784637451, "learning_rate": 9.96350124202284e-05, "loss": 0.7891260385513306, "step": 770, "token_acc": 0.7488479262672811 }, { "epoch": 0.8672665916760405, "grad_norm": 1.4906268119812012, "learning_rate": 9.963276564503767e-05, "loss": 0.8143094778060913, "step": 771, "token_acc": 0.7587587587587588 }, { "epoch": 0.8683914510686164, "grad_norm": 1.5970045328140259, "learning_rate": 9.963051200123616e-05, "loss": 0.7445891499519348, "step": 772, "token_acc": 0.7792553191489362 }, { "epoch": 0.8695163104611924, "grad_norm": 1.4699559211730957, "learning_rate": 9.962825148913573e-05, "loss": 0.737041711807251, "step": 773, "token_acc": 0.7881720430107527 }, { "epoch": 0.8706411698537683, "grad_norm": 1.5170071125030518, "learning_rate": 9.96259841090492e-05, "loss": 0.9504371881484985, "step": 774, "token_acc": 0.7203302373581011 }, { "epoch": 0.8717660292463442, "grad_norm": 1.7469736337661743, "learning_rate": 9.962370986129037e-05, "loss": 0.7988581657409668, "step": 775, "token_acc": 0.7528901734104047 }, { "epoch": 0.8728908886389202, "grad_norm": 1.6923271417617798, "learning_rate": 9.962142874617395e-05, "loss": 1.019576072692871, "step": 776, "token_acc": 0.7179215270413574 }, { "epoch": 0.8740157480314961, "grad_norm": 1.5707954168319702, "learning_rate": 9.961914076401563e-05, "loss": 0.8725646734237671, "step": 777, "token_acc": 0.7450523864959255 }, { "epoch": 0.875140607424072, "grad_norm": 1.533118486404419, "learning_rate": 9.961684591513205e-05, "loss": 0.8964871168136597, "step": 778, "token_acc": 0.7517814726840855 }, { "epoch": 0.876265466816648, "grad_norm": 1.5758445262908936, "learning_rate": 9.961454419984077e-05, "loss": 0.8917890787124634, "step": 779, "token_acc": 0.7447478991596639 }, { "epoch": 0.8773903262092239, "grad_norm": 1.3251349925994873, "learning_rate": 9.961223561846035e-05, "loss": 0.8006118535995483, "step": 780, "token_acc": 0.7680221811460258 }, { "epoch": 0.8785151856017998, "grad_norm": 1.384377121925354, "learning_rate": 9.960992017131024e-05, "loss": 1.0829888582229614, "step": 781, "token_acc": 0.6795201371036846 }, { "epoch": 0.8796400449943758, "grad_norm": 1.5556063652038574, "learning_rate": 9.96075978587109e-05, "loss": 0.7400499582290649, "step": 782, "token_acc": 0.7851123595505618 }, { "epoch": 0.8807649043869517, "grad_norm": 1.4509003162384033, "learning_rate": 9.960526868098368e-05, "loss": 1.0184321403503418, "step": 783, "token_acc": 0.7225929456625357 }, { "epoch": 0.8818897637795275, "grad_norm": 1.5230799913406372, "learning_rate": 9.960293263845094e-05, "loss": 0.6554890871047974, "step": 784, "token_acc": 0.7881996974281392 }, { "epoch": 0.8830146231721034, "grad_norm": 1.506176471710205, "learning_rate": 9.960058973143596e-05, "loss": 0.8796799778938293, "step": 785, "token_acc": 0.744131455399061 }, { "epoch": 0.8841394825646794, "grad_norm": 1.4744263887405396, "learning_rate": 9.959823996026294e-05, "loss": 0.7907198071479797, "step": 786, "token_acc": 0.7826617826617827 }, { "epoch": 0.8852643419572553, "grad_norm": 1.4473211765289307, "learning_rate": 9.95958833252571e-05, "loss": 0.9743695259094238, "step": 787, "token_acc": 0.7317813765182186 }, { "epoch": 0.8863892013498312, "grad_norm": 2.05336856842041, "learning_rate": 9.959351982674455e-05, "loss": 0.8996121287345886, "step": 788, "token_acc": 0.7247956403269755 }, { "epoch": 0.8875140607424072, "grad_norm": 1.4918614625930786, "learning_rate": 9.95911494650524e-05, "loss": 0.8537808656692505, "step": 789, "token_acc": 0.759956942949408 }, { "epoch": 0.8886389201349831, "grad_norm": 1.505074143409729, "learning_rate": 9.958877224050862e-05, "loss": 0.881984293460846, "step": 790, "token_acc": 0.7410228509249184 }, { "epoch": 0.889763779527559, "grad_norm": 1.5105830430984497, "learning_rate": 9.958638815344225e-05, "loss": 0.9520208835601807, "step": 791, "token_acc": 0.7330367074527252 }, { "epoch": 0.890888638920135, "grad_norm": 1.6071430444717407, "learning_rate": 9.958399720418321e-05, "loss": 1.087010383605957, "step": 792, "token_acc": 0.686358754027927 }, { "epoch": 0.8920134983127109, "grad_norm": 1.4803708791732788, "learning_rate": 9.958159939306236e-05, "loss": 0.8947211503982544, "step": 793, "token_acc": 0.7434456928838952 }, { "epoch": 0.8931383577052868, "grad_norm": 1.4485385417938232, "learning_rate": 9.957919472041152e-05, "loss": 0.9541038274765015, "step": 794, "token_acc": 0.741871921182266 }, { "epoch": 0.8942632170978627, "grad_norm": 1.5652638673782349, "learning_rate": 9.957678318656352e-05, "loss": 1.016803503036499, "step": 795, "token_acc": 0.7097435897435898 }, { "epoch": 0.8953880764904387, "grad_norm": 1.7476609945297241, "learning_rate": 9.957436479185205e-05, "loss": 0.906539261341095, "step": 796, "token_acc": 0.7506666666666667 }, { "epoch": 0.8965129358830146, "grad_norm": 1.571428894996643, "learning_rate": 9.957193953661179e-05, "loss": 1.0028326511383057, "step": 797, "token_acc": 0.7314453125 }, { "epoch": 0.8976377952755905, "grad_norm": 1.5751153230667114, "learning_rate": 9.956950742117838e-05, "loss": 0.9143102765083313, "step": 798, "token_acc": 0.7336197636949516 }, { "epoch": 0.8987626546681665, "grad_norm": 1.602103352546692, "learning_rate": 9.956706844588839e-05, "loss": 1.057873010635376, "step": 799, "token_acc": 0.7064881565396498 }, { "epoch": 0.8998875140607424, "grad_norm": 1.560016393661499, "learning_rate": 9.956462261107934e-05, "loss": 0.8799551725387573, "step": 800, "token_acc": 0.7333333333333333 }, { "epoch": 0.9010123734533183, "grad_norm": 1.6480873823165894, "learning_rate": 9.956216991708972e-05, "loss": 0.8209767937660217, "step": 801, "token_acc": 0.7605095541401274 }, { "epoch": 0.9021372328458943, "grad_norm": 1.4311964511871338, "learning_rate": 9.955971036425895e-05, "loss": 0.8596140146255493, "step": 802, "token_acc": 0.7435129740518962 }, { "epoch": 0.9032620922384702, "grad_norm": 1.4188926219940186, "learning_rate": 9.955724395292741e-05, "loss": 0.9253668785095215, "step": 803, "token_acc": 0.7294736842105263 }, { "epoch": 0.9043869516310461, "grad_norm": 1.4836534261703491, "learning_rate": 9.95547706834364e-05, "loss": 1.102060079574585, "step": 804, "token_acc": 0.6999101527403414 }, { "epoch": 0.905511811023622, "grad_norm": 1.642386794090271, "learning_rate": 9.955229055612823e-05, "loss": 0.9627521634101868, "step": 805, "token_acc": 0.7338217338217338 }, { "epoch": 0.906636670416198, "grad_norm": 1.3054146766662598, "learning_rate": 9.954980357134607e-05, "loss": 0.8918473720550537, "step": 806, "token_acc": 0.7470956210902592 }, { "epoch": 0.9077615298087739, "grad_norm": 1.5877634286880493, "learning_rate": 9.954730972943413e-05, "loss": 0.8013244271278381, "step": 807, "token_acc": 0.7730582524271845 }, { "epoch": 0.9088863892013498, "grad_norm": 1.8993204832077026, "learning_rate": 9.954480903073754e-05, "loss": 0.890419065952301, "step": 808, "token_acc": 0.7427258805513017 }, { "epoch": 0.9100112485939258, "grad_norm": 1.504558801651001, "learning_rate": 9.954230147560233e-05, "loss": 0.857495903968811, "step": 809, "token_acc": 0.7442307692307693 }, { "epoch": 0.9111361079865017, "grad_norm": 1.3997554779052734, "learning_rate": 9.953978706437554e-05, "loss": 0.6589900851249695, "step": 810, "token_acc": 0.793398533007335 }, { "epoch": 0.9122609673790776, "grad_norm": 1.7138512134552002, "learning_rate": 9.953726579740514e-05, "loss": 0.8823409080505371, "step": 811, "token_acc": 0.7517053206002728 }, { "epoch": 0.9133858267716536, "grad_norm": 1.6137418746948242, "learning_rate": 9.953473767504003e-05, "loss": 0.7747371196746826, "step": 812, "token_acc": 0.7635327635327636 }, { "epoch": 0.9145106861642295, "grad_norm": 1.4430102109909058, "learning_rate": 9.953220269763008e-05, "loss": 0.6885917782783508, "step": 813, "token_acc": 0.7711757269279393 }, { "epoch": 0.9156355455568054, "grad_norm": 1.5991657972335815, "learning_rate": 9.952966086552608e-05, "loss": 0.8961760997772217, "step": 814, "token_acc": 0.7333333333333333 }, { "epoch": 0.9167604049493814, "grad_norm": 1.5039392709732056, "learning_rate": 9.952711217907984e-05, "loss": 0.8511562347412109, "step": 815, "token_acc": 0.7303370786516854 }, { "epoch": 0.9178852643419573, "grad_norm": 1.6683748960494995, "learning_rate": 9.952455663864402e-05, "loss": 0.9073441624641418, "step": 816, "token_acc": 0.7494969818913481 }, { "epoch": 0.9190101237345332, "grad_norm": 1.5901216268539429, "learning_rate": 9.952199424457232e-05, "loss": 0.9718779921531677, "step": 817, "token_acc": 0.7273652085452695 }, { "epoch": 0.9201349831271091, "grad_norm": 1.5011212825775146, "learning_rate": 9.951942499721931e-05, "loss": 0.8849422931671143, "step": 818, "token_acc": 0.7380952380952381 }, { "epoch": 0.9212598425196851, "grad_norm": 1.6068732738494873, "learning_rate": 9.951684889694058e-05, "loss": 1.0318611860275269, "step": 819, "token_acc": 0.7073446327683616 }, { "epoch": 0.922384701912261, "grad_norm": 1.8417268991470337, "learning_rate": 9.951426594409259e-05, "loss": 0.815395712852478, "step": 820, "token_acc": 0.7354260089686099 }, { "epoch": 0.9235095613048369, "grad_norm": 1.8921175003051758, "learning_rate": 9.951167613903283e-05, "loss": 1.0238498449325562, "step": 821, "token_acc": 0.7067238912732475 }, { "epoch": 0.9246344206974129, "grad_norm": 1.580751657485962, "learning_rate": 9.950907948211967e-05, "loss": 0.8719006180763245, "step": 822, "token_acc": 0.751357220412595 }, { "epoch": 0.9257592800899888, "grad_norm": 1.6129487752914429, "learning_rate": 9.950647597371248e-05, "loss": 1.0162131786346436, "step": 823, "token_acc": 0.7183908045977011 }, { "epoch": 0.9268841394825647, "grad_norm": 1.6730716228485107, "learning_rate": 9.950386561417154e-05, "loss": 1.0122299194335938, "step": 824, "token_acc": 0.7016129032258065 }, { "epoch": 0.9280089988751407, "grad_norm": 1.7188140153884888, "learning_rate": 9.95012484038581e-05, "loss": 0.8318296670913696, "step": 825, "token_acc": 0.7664516129032258 }, { "epoch": 0.9291338582677166, "grad_norm": 1.4970688819885254, "learning_rate": 9.949862434313438e-05, "loss": 0.7339006066322327, "step": 826, "token_acc": 0.7745222929936306 }, { "epoch": 0.9302587176602924, "grad_norm": 1.6073499917984009, "learning_rate": 9.949599343236347e-05, "loss": 0.94533771276474, "step": 827, "token_acc": 0.7259978425026968 }, { "epoch": 0.9313835770528683, "grad_norm": 1.5370196104049683, "learning_rate": 9.949335567190949e-05, "loss": 1.0233222246170044, "step": 828, "token_acc": 0.7407407407407407 }, { "epoch": 0.9325084364454443, "grad_norm": 1.4699498414993286, "learning_rate": 9.949071106213746e-05, "loss": 0.7591593265533447, "step": 829, "token_acc": 0.7782754759238522 }, { "epoch": 0.9336332958380202, "grad_norm": 1.4549957513809204, "learning_rate": 9.948805960341338e-05, "loss": 0.7916507720947266, "step": 830, "token_acc": 0.7658688865764828 }, { "epoch": 0.9347581552305961, "grad_norm": 1.5118483304977417, "learning_rate": 9.948540129610418e-05, "loss": 0.9603884220123291, "step": 831, "token_acc": 0.7313953488372092 }, { "epoch": 0.9358830146231721, "grad_norm": 1.5936923027038574, "learning_rate": 9.948273614057773e-05, "loss": 0.941769003868103, "step": 832, "token_acc": 0.7216699801192843 }, { "epoch": 0.937007874015748, "grad_norm": 1.6303367614746094, "learning_rate": 9.948006413720285e-05, "loss": 0.6913915872573853, "step": 833, "token_acc": 0.7850746268656716 }, { "epoch": 0.9381327334083239, "grad_norm": 1.6594948768615723, "learning_rate": 9.947738528634933e-05, "loss": 0.929300844669342, "step": 834, "token_acc": 0.7232267037552156 }, { "epoch": 0.9392575928008999, "grad_norm": 1.423675537109375, "learning_rate": 9.947469958838789e-05, "loss": 0.8667654395103455, "step": 835, "token_acc": 0.7429171038824763 }, { "epoch": 0.9403824521934758, "grad_norm": 1.386326551437378, "learning_rate": 9.94720070436902e-05, "loss": 0.9065897464752197, "step": 836, "token_acc": 0.7479131886477463 }, { "epoch": 0.9415073115860517, "grad_norm": 1.4630720615386963, "learning_rate": 9.946930765262887e-05, "loss": 1.0671309232711792, "step": 837, "token_acc": 0.7097560975609756 }, { "epoch": 0.9426321709786277, "grad_norm": 1.5271520614624023, "learning_rate": 9.946660141557748e-05, "loss": 0.8739483952522278, "step": 838, "token_acc": 0.7350746268656716 }, { "epoch": 0.9437570303712036, "grad_norm": 1.5476170778274536, "learning_rate": 9.946388833291052e-05, "loss": 0.876632034778595, "step": 839, "token_acc": 0.7497005988023953 }, { "epoch": 0.9448818897637795, "grad_norm": 1.726990818977356, "learning_rate": 9.946116840500349e-05, "loss": 0.7724737524986267, "step": 840, "token_acc": 0.75642965204236 }, { "epoch": 0.9460067491563554, "grad_norm": 1.5797302722930908, "learning_rate": 9.945844163223275e-05, "loss": 0.7838608622550964, "step": 841, "token_acc": 0.7637906647807637 }, { "epoch": 0.9471316085489314, "grad_norm": 1.5720324516296387, "learning_rate": 9.945570801497567e-05, "loss": 0.6883734464645386, "step": 842, "token_acc": 0.8024691358024691 }, { "epoch": 0.9482564679415073, "grad_norm": 1.8317309617996216, "learning_rate": 9.945296755361055e-05, "loss": 0.9278875589370728, "step": 843, "token_acc": 0.7286821705426356 }, { "epoch": 0.9493813273340832, "grad_norm": 1.4112917184829712, "learning_rate": 9.945022024851667e-05, "loss": 0.7560591101646423, "step": 844, "token_acc": 0.784037558685446 }, { "epoch": 0.9505061867266592, "grad_norm": 1.7552297115325928, "learning_rate": 9.944746610007418e-05, "loss": 0.8997507095336914, "step": 845, "token_acc": 0.7264516129032258 }, { "epoch": 0.9516310461192351, "grad_norm": 1.683524250984192, "learning_rate": 9.944470510866426e-05, "loss": 0.8892027139663696, "step": 846, "token_acc": 0.7382645803698435 }, { "epoch": 0.952755905511811, "grad_norm": 1.437582015991211, "learning_rate": 9.944193727466897e-05, "loss": 0.8574955463409424, "step": 847, "token_acc": 0.7477203647416414 }, { "epoch": 0.953880764904387, "grad_norm": 1.6825342178344727, "learning_rate": 9.943916259847137e-05, "loss": 0.8548638224601746, "step": 848, "token_acc": 0.75 }, { "epoch": 0.9550056242969629, "grad_norm": 1.1052398681640625, "learning_rate": 9.943638108045543e-05, "loss": 1.0246803760528564, "step": 849, "token_acc": 0.7407665505226481 }, { "epoch": 0.9561304836895388, "grad_norm": 1.445750117301941, "learning_rate": 9.943359272100609e-05, "loss": 1.0026438236236572, "step": 850, "token_acc": 0.735632183908046 }, { "epoch": 0.9572553430821147, "grad_norm": 1.6515024900436401, "learning_rate": 9.943079752050922e-05, "loss": 0.8294997215270996, "step": 851, "token_acc": 0.7524475524475525 }, { "epoch": 0.9583802024746907, "grad_norm": 1.5658183097839355, "learning_rate": 9.942799547935163e-05, "loss": 0.9540088176727295, "step": 852, "token_acc": 0.7131696428571429 }, { "epoch": 0.9595050618672666, "grad_norm": 1.8495062589645386, "learning_rate": 9.942518659792113e-05, "loss": 1.0897811651229858, "step": 853, "token_acc": 0.6974595842956121 }, { "epoch": 0.9606299212598425, "grad_norm": 1.8098114728927612, "learning_rate": 9.94223708766064e-05, "loss": 1.0821534395217896, "step": 854, "token_acc": 0.7007722007722008 }, { "epoch": 0.9617547806524185, "grad_norm": 1.459570050239563, "learning_rate": 9.941954831579712e-05, "loss": 1.0016058683395386, "step": 855, "token_acc": 0.7005291005291006 }, { "epoch": 0.9628796400449944, "grad_norm": 1.722278356552124, "learning_rate": 9.94167189158839e-05, "loss": 0.7183030843734741, "step": 856, "token_acc": 0.7496062992125985 }, { "epoch": 0.9640044994375703, "grad_norm": 1.5452113151550293, "learning_rate": 9.941388267725829e-05, "loss": 0.8223056793212891, "step": 857, "token_acc": 0.7579462102689487 }, { "epoch": 0.9651293588301463, "grad_norm": 1.5601240396499634, "learning_rate": 9.941103960031279e-05, "loss": 0.9493224620819092, "step": 858, "token_acc": 0.7350332594235033 }, { "epoch": 0.9662542182227222, "grad_norm": 1.797393798828125, "learning_rate": 9.940818968544086e-05, "loss": 0.907567024230957, "step": 859, "token_acc": 0.7327823691460055 }, { "epoch": 0.9673790776152981, "grad_norm": 1.3913823366165161, "learning_rate": 9.940533293303691e-05, "loss": 0.7852994203567505, "step": 860, "token_acc": 0.7867494824016563 }, { "epoch": 0.968503937007874, "grad_norm": 1.3302485942840576, "learning_rate": 9.940246934349625e-05, "loss": 0.8405075669288635, "step": 861, "token_acc": 0.7546125461254612 }, { "epoch": 0.96962879640045, "grad_norm": 1.5534770488739014, "learning_rate": 9.939959891721518e-05, "loss": 0.929782509803772, "step": 862, "token_acc": 0.7292377701934016 }, { "epoch": 0.9707536557930259, "grad_norm": 1.1704704761505127, "learning_rate": 9.939672165459095e-05, "loss": 0.9228657484054565, "step": 863, "token_acc": 0.7406876790830945 }, { "epoch": 0.9718785151856018, "grad_norm": 1.6044881343841553, "learning_rate": 9.939383755602172e-05, "loss": 0.9070751667022705, "step": 864, "token_acc": 0.7403598971722365 }, { "epoch": 0.9730033745781778, "grad_norm": 1.3236223459243774, "learning_rate": 9.939094662190663e-05, "loss": 0.7256869077682495, "step": 865, "token_acc": 0.7780320366132724 }, { "epoch": 0.9741282339707537, "grad_norm": 1.607419490814209, "learning_rate": 9.938804885264574e-05, "loss": 0.7838770151138306, "step": 866, "token_acc": 0.7444279346210996 }, { "epoch": 0.9752530933633295, "grad_norm": 1.5648255348205566, "learning_rate": 9.938514424864009e-05, "loss": 0.924368143081665, "step": 867, "token_acc": 0.7232558139534884 }, { "epoch": 0.9763779527559056, "grad_norm": 1.6547348499298096, "learning_rate": 9.938223281029163e-05, "loss": 1.0353469848632812, "step": 868, "token_acc": 0.7105263157894737 }, { "epoch": 0.9775028121484814, "grad_norm": 1.4519191980361938, "learning_rate": 9.937931453800325e-05, "loss": 0.866372287273407, "step": 869, "token_acc": 0.7611777535441657 }, { "epoch": 0.9786276715410573, "grad_norm": 1.580224633216858, "learning_rate": 9.937638943217884e-05, "loss": 0.720623254776001, "step": 870, "token_acc": 0.7818448023426061 }, { "epoch": 0.9797525309336333, "grad_norm": 1.6209633350372314, "learning_rate": 9.937345749322318e-05, "loss": 0.8432456254959106, "step": 871, "token_acc": 0.7522123893805309 }, { "epoch": 0.9808773903262092, "grad_norm": 1.5397424697875977, "learning_rate": 9.937051872154203e-05, "loss": 0.924987256526947, "step": 872, "token_acc": 0.7308612440191388 }, { "epoch": 0.9820022497187851, "grad_norm": 1.5875797271728516, "learning_rate": 9.936757311754208e-05, "loss": 1.0739061832427979, "step": 873, "token_acc": 0.7082352941176471 }, { "epoch": 0.983127109111361, "grad_norm": 1.589887022972107, "learning_rate": 9.936462068163095e-05, "loss": 0.7585837841033936, "step": 874, "token_acc": 0.7741530740276035 }, { "epoch": 0.984251968503937, "grad_norm": 1.686587929725647, "learning_rate": 9.936166141421726e-05, "loss": 0.8689331412315369, "step": 875, "token_acc": 0.7262479871175523 }, { "epoch": 0.9853768278965129, "grad_norm": 1.5799217224121094, "learning_rate": 9.935869531571049e-05, "loss": 0.896557629108429, "step": 876, "token_acc": 0.7430025445292621 }, { "epoch": 0.9865016872890888, "grad_norm": 1.5723917484283447, "learning_rate": 9.935572238652115e-05, "loss": 0.9257574677467346, "step": 877, "token_acc": 0.7259740259740259 }, { "epoch": 0.9876265466816648, "grad_norm": 1.6239306926727295, "learning_rate": 9.935274262706067e-05, "loss": 1.0342459678649902, "step": 878, "token_acc": 0.7214285714285714 }, { "epoch": 0.9887514060742407, "grad_norm": 1.3797731399536133, "learning_rate": 9.934975603774136e-05, "loss": 0.7916626334190369, "step": 879, "token_acc": 0.7519466073414905 }, { "epoch": 0.9898762654668166, "grad_norm": 1.7215884923934937, "learning_rate": 9.93467626189766e-05, "loss": 0.8159435987472534, "step": 880, "token_acc": 0.7590361445783133 }, { "epoch": 0.9910011248593926, "grad_norm": 1.2806230783462524, "learning_rate": 9.934376237118059e-05, "loss": 0.7226642966270447, "step": 881, "token_acc": 0.7851605758582503 }, { "epoch": 0.9921259842519685, "grad_norm": 1.5358145236968994, "learning_rate": 9.934075529476856e-05, "loss": 0.6849958896636963, "step": 882, "token_acc": 0.782051282051282 }, { "epoch": 0.9932508436445444, "grad_norm": 1.7339332103729248, "learning_rate": 9.933774139015665e-05, "loss": 0.9200420379638672, "step": 883, "token_acc": 0.7250673854447439 }, { "epoch": 0.9943757030371203, "grad_norm": 1.4197396039962769, "learning_rate": 9.933472065776195e-05, "loss": 0.9151552319526672, "step": 884, "token_acc": 0.7446373850868233 }, { "epoch": 0.9955005624296963, "grad_norm": 1.3568702936172485, "learning_rate": 9.933169309800248e-05, "loss": 0.828211784362793, "step": 885, "token_acc": 0.7642857142857142 }, { "epoch": 0.9966254218222722, "grad_norm": 1.3985289335250854, "learning_rate": 9.932865871129724e-05, "loss": 0.8221898078918457, "step": 886, "token_acc": 0.7654320987654321 }, { "epoch": 0.9977502812148481, "grad_norm": 1.5605140924453735, "learning_rate": 9.932561749806615e-05, "loss": 0.900260865688324, "step": 887, "token_acc": 0.7406568516421291 }, { "epoch": 0.9988751406074241, "grad_norm": 1.3819880485534668, "learning_rate": 9.932256945873006e-05, "loss": 0.7871809601783752, "step": 888, "token_acc": 0.76266137040715 }, { "epoch": 1.0, "grad_norm": 1.5047004222869873, "learning_rate": 9.93195145937108e-05, "loss": 0.832351565361023, "step": 889, "token_acc": 0.7509578544061303 }, { "epoch": 1.001124859392576, "grad_norm": 1.3886194229125977, "learning_rate": 9.931645290343114e-05, "loss": 0.8151365518569946, "step": 890, "token_acc": 0.7612095933263816 }, { "epoch": 1.0022497187851518, "grad_norm": 1.233345866203308, "learning_rate": 9.931338438831477e-05, "loss": 0.6345759034156799, "step": 891, "token_acc": 0.7991452991452992 }, { "epoch": 1.0033745781777277, "grad_norm": 1.4569950103759766, "learning_rate": 9.931030904878634e-05, "loss": 0.696320652961731, "step": 892, "token_acc": 0.7889182058047494 }, { "epoch": 1.0044994375703038, "grad_norm": 1.2090489864349365, "learning_rate": 9.930722688527144e-05, "loss": 0.5535595417022705, "step": 893, "token_acc": 0.8246013667425968 }, { "epoch": 1.0056242969628797, "grad_norm": 1.544079065322876, "learning_rate": 9.93041378981966e-05, "loss": 0.7547067999839783, "step": 894, "token_acc": 0.7708333333333334 }, { "epoch": 1.0067491563554556, "grad_norm": 1.5158504247665405, "learning_rate": 9.930104208798932e-05, "loss": 0.8079156875610352, "step": 895, "token_acc": 0.7577197149643705 }, { "epoch": 1.0078740157480315, "grad_norm": 1.4339433908462524, "learning_rate": 9.9297939455078e-05, "loss": 0.7826467752456665, "step": 896, "token_acc": 0.7652838427947598 }, { "epoch": 1.0089988751406074, "grad_norm": 1.4819750785827637, "learning_rate": 9.929482999989203e-05, "loss": 0.8341797590255737, "step": 897, "token_acc": 0.7387234042553191 }, { "epoch": 1.0101237345331833, "grad_norm": 1.8356832265853882, "learning_rate": 9.929171372286172e-05, "loss": 0.6837634444236755, "step": 898, "token_acc": 0.7743300423131171 }, { "epoch": 1.0112485939257594, "grad_norm": 1.5893092155456543, "learning_rate": 9.928859062441831e-05, "loss": 0.8784787654876709, "step": 899, "token_acc": 0.7553879310344828 }, { "epoch": 1.0123734533183353, "grad_norm": 1.6607754230499268, "learning_rate": 9.928546070499402e-05, "loss": 0.6958364248275757, "step": 900, "token_acc": 0.7761780104712042 }, { "epoch": 1.0123734533183353, "eval_loss": 0.9041223526000977, "eval_runtime": 31.9567, "eval_samples_per_second": 25.128, "eval_steps_per_second": 3.161, "eval_token_acc": 0.7368990570412738, "step": 900 }, { "epoch": 1.0134983127109112, "grad_norm": 1.5128378868103027, "learning_rate": 9.9282323965022e-05, "loss": 0.8221575617790222, "step": 901, "token_acc": 0.7530120481927711 }, { "epoch": 1.014623172103487, "grad_norm": 2.0671088695526123, "learning_rate": 9.92791804049363e-05, "loss": 0.8844960927963257, "step": 902, "token_acc": 0.7283072546230441 }, { "epoch": 1.015748031496063, "grad_norm": 1.866845965385437, "learning_rate": 9.927603002517201e-05, "loss": 1.0008044242858887, "step": 903, "token_acc": 0.7157894736842105 }, { "epoch": 1.0168728908886389, "grad_norm": 1.5316959619522095, "learning_rate": 9.927287282616507e-05, "loss": 0.8898968696594238, "step": 904, "token_acc": 0.7644927536231884 }, { "epoch": 1.0179977502812148, "grad_norm": 1.4676575660705566, "learning_rate": 9.926970880835241e-05, "loss": 0.7892181873321533, "step": 905, "token_acc": 0.7645478961504029 }, { "epoch": 1.0191226096737909, "grad_norm": 1.6815071105957031, "learning_rate": 9.926653797217189e-05, "loss": 0.6952338218688965, "step": 906, "token_acc": 0.8004836759371221 }, { "epoch": 1.0202474690663668, "grad_norm": 1.9443389177322388, "learning_rate": 9.926336031806232e-05, "loss": 0.8682780861854553, "step": 907, "token_acc": 0.7437582128777924 }, { "epoch": 1.0213723284589427, "grad_norm": 1.697880744934082, "learning_rate": 9.926017584646346e-05, "loss": 0.6675167083740234, "step": 908, "token_acc": 0.7894088669950738 }, { "epoch": 1.0224971878515186, "grad_norm": 1.583058476448059, "learning_rate": 9.925698455781598e-05, "loss": 0.7472224235534668, "step": 909, "token_acc": 0.7681818181818182 }, { "epoch": 1.0236220472440944, "grad_norm": 1.649269461631775, "learning_rate": 9.925378645256156e-05, "loss": 0.7085657715797424, "step": 910, "token_acc": 0.7771084337349398 }, { "epoch": 1.0247469066366703, "grad_norm": 1.6934324502944946, "learning_rate": 9.925058153114273e-05, "loss": 0.710473358631134, "step": 911, "token_acc": 0.7971938775510204 }, { "epoch": 1.0258717660292462, "grad_norm": 1.6996710300445557, "learning_rate": 9.924736979400306e-05, "loss": 0.7088537812232971, "step": 912, "token_acc": 0.793063583815029 }, { "epoch": 1.0269966254218224, "grad_norm": 1.5944418907165527, "learning_rate": 9.924415124158699e-05, "loss": 0.7987306118011475, "step": 913, "token_acc": 0.7450765864332604 }, { "epoch": 1.0281214848143982, "grad_norm": 1.6225109100341797, "learning_rate": 9.924092587433993e-05, "loss": 0.788836658000946, "step": 914, "token_acc": 0.7694736842105263 }, { "epoch": 1.0292463442069741, "grad_norm": 1.6165695190429688, "learning_rate": 9.923769369270825e-05, "loss": 0.5829221606254578, "step": 915, "token_acc": 0.8097560975609757 }, { "epoch": 1.03037120359955, "grad_norm": 1.8762911558151245, "learning_rate": 9.923445469713925e-05, "loss": 0.864579975605011, "step": 916, "token_acc": 0.7506142506142506 }, { "epoch": 1.031496062992126, "grad_norm": 1.791709065437317, "learning_rate": 9.923120888808116e-05, "loss": 0.8532757759094238, "step": 917, "token_acc": 0.7480916030534351 }, { "epoch": 1.0326209223847018, "grad_norm": 1.6161748170852661, "learning_rate": 9.922795626598316e-05, "loss": 0.7383064031600952, "step": 918, "token_acc": 0.7766393442622951 }, { "epoch": 1.033745781777278, "grad_norm": 1.9433609247207642, "learning_rate": 9.922469683129538e-05, "loss": 0.889350175857544, "step": 919, "token_acc": 0.7481012658227848 }, { "epoch": 1.0348706411698538, "grad_norm": 1.8638015985488892, "learning_rate": 9.922143058446888e-05, "loss": 0.7862579822540283, "step": 920, "token_acc": 0.7585798816568048 }, { "epoch": 1.0359955005624297, "grad_norm": 1.7083909511566162, "learning_rate": 9.92181575259557e-05, "loss": 0.8653193712234497, "step": 921, "token_acc": 0.7473583093179635 }, { "epoch": 1.0371203599550056, "grad_norm": 1.414158582687378, "learning_rate": 9.921487765620877e-05, "loss": 0.8043832182884216, "step": 922, "token_acc": 0.7648 }, { "epoch": 1.0382452193475815, "grad_norm": 1.9099700450897217, "learning_rate": 9.921159097568199e-05, "loss": 0.851813018321991, "step": 923, "token_acc": 0.7502750275027503 }, { "epoch": 1.0393700787401574, "grad_norm": 1.6623998880386353, "learning_rate": 9.920829748483019e-05, "loss": 0.8502336740493774, "step": 924, "token_acc": 0.7434094903339191 }, { "epoch": 1.0404949381327333, "grad_norm": 1.789060115814209, "learning_rate": 9.920499718410918e-05, "loss": 0.7532194256782532, "step": 925, "token_acc": 0.776239907727797 }, { "epoch": 1.0416197975253094, "grad_norm": 1.9397916793823242, "learning_rate": 9.920169007397565e-05, "loss": 0.6300563812255859, "step": 926, "token_acc": 0.8170028818443804 }, { "epoch": 1.0427446569178853, "grad_norm": 2.160593271255493, "learning_rate": 9.919837615488729e-05, "loss": 0.6178661584854126, "step": 927, "token_acc": 0.7931654676258992 }, { "epoch": 1.0438695163104612, "grad_norm": 1.7933175563812256, "learning_rate": 9.91950554273027e-05, "loss": 0.6969425678253174, "step": 928, "token_acc": 0.7839195979899497 }, { "epoch": 1.044994375703037, "grad_norm": 1.8257203102111816, "learning_rate": 9.919172789168142e-05, "loss": 0.6699044704437256, "step": 929, "token_acc": 0.7929155313351499 }, { "epoch": 1.046119235095613, "grad_norm": 2.0376198291778564, "learning_rate": 9.918839354848399e-05, "loss": 1.0547993183135986, "step": 930, "token_acc": 0.717741935483871 }, { "epoch": 1.047244094488189, "grad_norm": 1.7401132583618164, "learning_rate": 9.918505239817179e-05, "loss": 0.7588546872138977, "step": 931, "token_acc": 0.7581227436823105 }, { "epoch": 1.048368953880765, "grad_norm": 1.9140479564666748, "learning_rate": 9.918170444120721e-05, "loss": 0.9371983408927917, "step": 932, "token_acc": 0.718717683557394 }, { "epoch": 1.049493813273341, "grad_norm": 2.170701503753662, "learning_rate": 9.91783496780536e-05, "loss": 0.9513338804244995, "step": 933, "token_acc": 0.7371879106438897 }, { "epoch": 1.0506186726659168, "grad_norm": 1.540938138961792, "learning_rate": 9.917498810917518e-05, "loss": 0.6479253768920898, "step": 934, "token_acc": 0.8162230671736375 }, { "epoch": 1.0517435320584927, "grad_norm": 1.920308232307434, "learning_rate": 9.917161973503716e-05, "loss": 0.9394348859786987, "step": 935, "token_acc": 0.7260812581913499 }, { "epoch": 1.0528683914510686, "grad_norm": 1.7045081853866577, "learning_rate": 9.916824455610571e-05, "loss": 0.9181839227676392, "step": 936, "token_acc": 0.7342007434944238 }, { "epoch": 1.0539932508436445, "grad_norm": 1.6959329843521118, "learning_rate": 9.916486257284792e-05, "loss": 0.705910325050354, "step": 937, "token_acc": 0.7610208816705336 }, { "epoch": 1.0551181102362204, "grad_norm": 1.6082569360733032, "learning_rate": 9.916147378573178e-05, "loss": 0.6529836654663086, "step": 938, "token_acc": 0.7814269535673839 }, { "epoch": 1.0562429696287965, "grad_norm": 1.7792717218399048, "learning_rate": 9.915807819522628e-05, "loss": 0.8262868523597717, "step": 939, "token_acc": 0.7609921082299888 }, { "epoch": 1.0573678290213724, "grad_norm": 1.624019742012024, "learning_rate": 9.915467580180134e-05, "loss": 0.7345082759857178, "step": 940, "token_acc": 0.7761692650334076 }, { "epoch": 1.0584926884139483, "grad_norm": 1.308997631072998, "learning_rate": 9.91512666059278e-05, "loss": 0.4690554738044739, "step": 941, "token_acc": 0.8386454183266933 }, { "epoch": 1.0596175478065242, "grad_norm": 1.9849926233291626, "learning_rate": 9.914785060807747e-05, "loss": 0.73500657081604, "step": 942, "token_acc": 0.7807737397420867 }, { "epoch": 1.0607424071991, "grad_norm": 1.9730424880981445, "learning_rate": 9.914442780872306e-05, "loss": 0.7262517213821411, "step": 943, "token_acc": 0.7605442176870748 }, { "epoch": 1.061867266591676, "grad_norm": 1.899154543876648, "learning_rate": 9.914099820833827e-05, "loss": 0.8511804342269897, "step": 944, "token_acc": 0.7411477411477412 }, { "epoch": 1.0629921259842519, "grad_norm": 2.3391942977905273, "learning_rate": 9.913756180739768e-05, "loss": 0.7486095428466797, "step": 945, "token_acc": 0.7823741007194245 }, { "epoch": 1.064116985376828, "grad_norm": 1.608491063117981, "learning_rate": 9.913411860637691e-05, "loss": 0.7847080230712891, "step": 946, "token_acc": 0.7654867256637168 }, { "epoch": 1.0652418447694039, "grad_norm": 2.0758838653564453, "learning_rate": 9.913066860575241e-05, "loss": 0.9643989205360413, "step": 947, "token_acc": 0.7112597547380156 }, { "epoch": 1.0663667041619798, "grad_norm": 2.1422417163848877, "learning_rate": 9.912721180600164e-05, "loss": 0.6414483785629272, "step": 948, "token_acc": 0.7945425361155698 }, { "epoch": 1.0674915635545557, "grad_norm": 1.6824426651000977, "learning_rate": 9.912374820760298e-05, "loss": 0.7036554217338562, "step": 949, "token_acc": 0.7867965367965368 }, { "epoch": 1.0686164229471316, "grad_norm": 1.876808762550354, "learning_rate": 9.912027781103575e-05, "loss": 0.7813992500305176, "step": 950, "token_acc": 0.7471698113207547 }, { "epoch": 1.0697412823397074, "grad_norm": 1.7202346324920654, "learning_rate": 9.911680061678022e-05, "loss": 0.7755405902862549, "step": 951, "token_acc": 0.7710526315789473 }, { "epoch": 1.0708661417322836, "grad_norm": 1.7495349645614624, "learning_rate": 9.911331662531757e-05, "loss": 0.8195908665657043, "step": 952, "token_acc": 0.7641304347826087 }, { "epoch": 1.0719910011248595, "grad_norm": 1.4975496530532837, "learning_rate": 9.910982583712998e-05, "loss": 0.8595860600471497, "step": 953, "token_acc": 0.75 }, { "epoch": 1.0731158605174353, "grad_norm": 1.4502170085906982, "learning_rate": 9.910632825270052e-05, "loss": 0.7282490134239197, "step": 954, "token_acc": 0.7923875432525952 }, { "epoch": 1.0742407199100112, "grad_norm": 1.9652513265609741, "learning_rate": 9.910282387251322e-05, "loss": 0.9176247715950012, "step": 955, "token_acc": 0.7406542056074766 }, { "epoch": 1.0753655793025871, "grad_norm": 1.8823096752166748, "learning_rate": 9.909931269705303e-05, "loss": 0.8836848735809326, "step": 956, "token_acc": 0.7435897435897436 }, { "epoch": 1.076490438695163, "grad_norm": 2.088956594467163, "learning_rate": 9.909579472680587e-05, "loss": 0.9226083159446716, "step": 957, "token_acc": 0.7192982456140351 }, { "epoch": 1.0776152980877391, "grad_norm": 1.8181045055389404, "learning_rate": 9.90922699622586e-05, "loss": 0.7956765294075012, "step": 958, "token_acc": 0.767962308598351 }, { "epoch": 1.078740157480315, "grad_norm": 1.8891890048980713, "learning_rate": 9.908873840389896e-05, "loss": 0.83858323097229, "step": 959, "token_acc": 0.7586618876941458 }, { "epoch": 1.079865016872891, "grad_norm": 1.8649429082870483, "learning_rate": 9.908520005221574e-05, "loss": 0.8044394254684448, "step": 960, "token_acc": 0.7668997668997669 }, { "epoch": 1.0809898762654668, "grad_norm": 1.498835563659668, "learning_rate": 9.908165490769857e-05, "loss": 0.5737241506576538, "step": 961, "token_acc": 0.8288288288288288 }, { "epoch": 1.0821147356580427, "grad_norm": 1.7956262826919556, "learning_rate": 9.907810297083806e-05, "loss": 0.7240085005760193, "step": 962, "token_acc": 0.774018944519621 }, { "epoch": 1.0832395950506186, "grad_norm": 2.0637435913085938, "learning_rate": 9.907454424212578e-05, "loss": 0.7458258867263794, "step": 963, "token_acc": 0.7811594202898551 }, { "epoch": 1.0843644544431945, "grad_norm": 1.9711368083953857, "learning_rate": 9.907097872205419e-05, "loss": 0.8778837323188782, "step": 964, "token_acc": 0.7397622192866579 }, { "epoch": 1.0854893138357706, "grad_norm": 1.612127423286438, "learning_rate": 9.906740641111673e-05, "loss": 0.6535238027572632, "step": 965, "token_acc": 0.7889022919179735 }, { "epoch": 1.0866141732283465, "grad_norm": 1.6515363454818726, "learning_rate": 9.906382730980776e-05, "loss": 0.6697081923484802, "step": 966, "token_acc": 0.8049645390070922 }, { "epoch": 1.0877390326209224, "grad_norm": 1.5020391941070557, "learning_rate": 9.90602414186226e-05, "loss": 0.6822036504745483, "step": 967, "token_acc": 0.7903225806451613 }, { "epoch": 1.0888638920134983, "grad_norm": 1.7259968519210815, "learning_rate": 9.905664873805749e-05, "loss": 0.7810940146446228, "step": 968, "token_acc": 0.7560483870967742 }, { "epoch": 1.0899887514060742, "grad_norm": 1.8110078573226929, "learning_rate": 9.905304926860962e-05, "loss": 0.7744519710540771, "step": 969, "token_acc": 0.7734470158343484 }, { "epoch": 1.09111361079865, "grad_norm": 2.067399024963379, "learning_rate": 9.90494430107771e-05, "loss": 0.8164230585098267, "step": 970, "token_acc": 0.751696065128901 }, { "epoch": 1.092238470191226, "grad_norm": 1.7637059688568115, "learning_rate": 9.904582996505903e-05, "loss": 0.7491020560264587, "step": 971, "token_acc": 0.7629083245521602 }, { "epoch": 1.0933633295838021, "grad_norm": 1.4671814441680908, "learning_rate": 9.904221013195537e-05, "loss": 0.6753535270690918, "step": 972, "token_acc": 0.8015640273704789 }, { "epoch": 1.094488188976378, "grad_norm": 1.6868460178375244, "learning_rate": 9.903858351196708e-05, "loss": 0.7227917313575745, "step": 973, "token_acc": 0.7640845070422535 }, { "epoch": 1.095613048368954, "grad_norm": 1.4740667343139648, "learning_rate": 9.903495010559606e-05, "loss": 0.7594507336616516, "step": 974, "token_acc": 0.7773343974461293 }, { "epoch": 1.0967379077615298, "grad_norm": 1.7550362348556519, "learning_rate": 9.903130991334512e-05, "loss": 0.6448140144348145, "step": 975, "token_acc": 0.7995018679950187 }, { "epoch": 1.0978627671541057, "grad_norm": 1.6901167631149292, "learning_rate": 9.902766293571801e-05, "loss": 0.7475540041923523, "step": 976, "token_acc": 0.7862068965517242 }, { "epoch": 1.0989876265466816, "grad_norm": 1.660377860069275, "learning_rate": 9.902400917321945e-05, "loss": 0.7052556276321411, "step": 977, "token_acc": 0.7823204419889502 }, { "epoch": 1.1001124859392575, "grad_norm": 1.8101234436035156, "learning_rate": 9.902034862635508e-05, "loss": 0.9171386361122131, "step": 978, "token_acc": 0.7388888888888889 }, { "epoch": 1.1012373453318336, "grad_norm": 1.9689631462097168, "learning_rate": 9.901668129563144e-05, "loss": 0.6687447428703308, "step": 979, "token_acc": 0.7902298850574713 }, { "epoch": 1.1023622047244095, "grad_norm": 1.9149768352508545, "learning_rate": 9.90130071815561e-05, "loss": 0.6476566195487976, "step": 980, "token_acc": 0.8050065876152833 }, { "epoch": 1.1034870641169854, "grad_norm": 1.9313069581985474, "learning_rate": 9.900932628463748e-05, "loss": 0.8526951670646667, "step": 981, "token_acc": 0.7537128712871287 }, { "epoch": 1.1046119235095613, "grad_norm": 1.8034113645553589, "learning_rate": 9.9005638605385e-05, "loss": 0.6293154954910278, "step": 982, "token_acc": 0.8118948824343015 }, { "epoch": 1.1057367829021372, "grad_norm": 1.9953246116638184, "learning_rate": 9.900194414430897e-05, "loss": 0.7881186008453369, "step": 983, "token_acc": 0.7609170305676856 }, { "epoch": 1.106861642294713, "grad_norm": 1.8247008323669434, "learning_rate": 9.899824290192065e-05, "loss": 0.688132107257843, "step": 984, "token_acc": 0.7827130852340937 }, { "epoch": 1.1079865016872892, "grad_norm": 1.9742939472198486, "learning_rate": 9.899453487873231e-05, "loss": 0.9474039077758789, "step": 985, "token_acc": 0.7223451327433629 }, { "epoch": 1.109111361079865, "grad_norm": 1.8638603687286377, "learning_rate": 9.899082007525702e-05, "loss": 0.7586148381233215, "step": 986, "token_acc": 0.7777777777777778 }, { "epoch": 1.110236220472441, "grad_norm": 1.922324299812317, "learning_rate": 9.898709849200893e-05, "loss": 0.7502880096435547, "step": 987, "token_acc": 0.7689320388349514 }, { "epoch": 1.1113610798650169, "grad_norm": 1.7862205505371094, "learning_rate": 9.898337012950303e-05, "loss": 0.7093402147293091, "step": 988, "token_acc": 0.7806004618937644 }, { "epoch": 1.1124859392575928, "grad_norm": 1.8790091276168823, "learning_rate": 9.897963498825528e-05, "loss": 0.7395766973495483, "step": 989, "token_acc": 0.7682403433476395 }, { "epoch": 1.1136107986501687, "grad_norm": 1.6607329845428467, "learning_rate": 9.897589306878262e-05, "loss": 0.6986875534057617, "step": 990, "token_acc": 0.8025415444770283 }, { "epoch": 1.1147356580427448, "grad_norm": 2.1557834148406982, "learning_rate": 9.897214437160285e-05, "loss": 0.8308762311935425, "step": 991, "token_acc": 0.7511261261261262 }, { "epoch": 1.1158605174353207, "grad_norm": 1.8445008993148804, "learning_rate": 9.896838889723476e-05, "loss": 0.8049067854881287, "step": 992, "token_acc": 0.769090909090909 }, { "epoch": 1.1169853768278966, "grad_norm": 2.0687878131866455, "learning_rate": 9.896462664619806e-05, "loss": 0.7902045249938965, "step": 993, "token_acc": 0.7657784011220197 }, { "epoch": 1.1181102362204725, "grad_norm": 1.6997286081314087, "learning_rate": 9.896085761901342e-05, "loss": 0.5624246597290039, "step": 994, "token_acc": 0.8110151187904968 }, { "epoch": 1.1192350956130483, "grad_norm": 1.804634690284729, "learning_rate": 9.895708181620241e-05, "loss": 0.7192327976226807, "step": 995, "token_acc": 0.7933884297520661 }, { "epoch": 1.1203599550056242, "grad_norm": 1.9428592920303345, "learning_rate": 9.895329923828755e-05, "loss": 0.7932791709899902, "step": 996, "token_acc": 0.7670588235294118 }, { "epoch": 1.1214848143982001, "grad_norm": 1.6463708877563477, "learning_rate": 9.894950988579235e-05, "loss": 0.7246082425117493, "step": 997, "token_acc": 0.7695067264573991 }, { "epoch": 1.1226096737907763, "grad_norm": 1.626259446144104, "learning_rate": 9.894571375924117e-05, "loss": 0.5670942068099976, "step": 998, "token_acc": 0.8385598141695703 }, { "epoch": 1.1237345331833521, "grad_norm": 2.036090850830078, "learning_rate": 9.894191085915938e-05, "loss": 0.8275429606437683, "step": 999, "token_acc": 0.7629987908101572 }, { "epoch": 1.124859392575928, "grad_norm": 1.7392226457595825, "learning_rate": 9.893810118607323e-05, "loss": 0.5310137271881104, "step": 1000, "token_acc": 0.8348729792147807 }, { "epoch": 1.125984251968504, "grad_norm": 1.8722096681594849, "learning_rate": 9.893428474050996e-05, "loss": 0.7551854848861694, "step": 1001, "token_acc": 0.7713717693836978 }, { "epoch": 1.1271091113610798, "grad_norm": 2.021446943283081, "learning_rate": 9.893046152299769e-05, "loss": 0.9847474098205566, "step": 1002, "token_acc": 0.7427341227125942 }, { "epoch": 1.1282339707536557, "grad_norm": 1.7962864637374878, "learning_rate": 9.892663153406556e-05, "loss": 0.8379891514778137, "step": 1003, "token_acc": 0.7576374745417516 }, { "epoch": 1.1293588301462316, "grad_norm": 1.979744791984558, "learning_rate": 9.892279477424357e-05, "loss": 0.7916675806045532, "step": 1004, "token_acc": 0.78125 }, { "epoch": 1.1304836895388077, "grad_norm": 2.065375328063965, "learning_rate": 9.891895124406266e-05, "loss": 0.8200545310974121, "step": 1005, "token_acc": 0.7497181510710259 }, { "epoch": 1.1316085489313836, "grad_norm": 1.8008625507354736, "learning_rate": 9.891510094405478e-05, "loss": 0.5744394659996033, "step": 1006, "token_acc": 0.8197146562905318 }, { "epoch": 1.1327334083239595, "grad_norm": 2.0366482734680176, "learning_rate": 9.891124387475272e-05, "loss": 0.8781095743179321, "step": 1007, "token_acc": 0.7668112798264642 }, { "epoch": 1.1338582677165354, "grad_norm": 2.0129611492156982, "learning_rate": 9.890738003669029e-05, "loss": 0.6773532629013062, "step": 1008, "token_acc": 0.790257104194858 }, { "epoch": 1.1349831271091113, "grad_norm": 1.6898316144943237, "learning_rate": 9.890350943040218e-05, "loss": 0.7546791434288025, "step": 1009, "token_acc": 0.7887970615243343 }, { "epoch": 1.1361079865016872, "grad_norm": 1.5520615577697754, "learning_rate": 9.889963205642405e-05, "loss": 0.5962123870849609, "step": 1010, "token_acc": 0.8199791883454735 }, { "epoch": 1.137232845894263, "grad_norm": 2.1429386138916016, "learning_rate": 9.889574791529248e-05, "loss": 0.8808229565620422, "step": 1011, "token_acc": 0.7463592233009708 }, { "epoch": 1.1383577052868392, "grad_norm": 1.9682352542877197, "learning_rate": 9.8891857007545e-05, "loss": 0.6994647979736328, "step": 1012, "token_acc": 0.7842424242424243 }, { "epoch": 1.139482564679415, "grad_norm": 1.7178378105163574, "learning_rate": 9.888795933372004e-05, "loss": 0.7946627140045166, "step": 1013, "token_acc": 0.7725409836065574 }, { "epoch": 1.140607424071991, "grad_norm": 1.7295441627502441, "learning_rate": 9.888405489435702e-05, "loss": 0.87061607837677, "step": 1014, "token_acc": 0.744165170556553 }, { "epoch": 1.141732283464567, "grad_norm": 2.084143877029419, "learning_rate": 9.888014368999628e-05, "loss": 0.7593210339546204, "step": 1015, "token_acc": 0.7798561151079136 }, { "epoch": 1.1428571428571428, "grad_norm": 1.8054407835006714, "learning_rate": 9.887622572117903e-05, "loss": 0.8524465560913086, "step": 1016, "token_acc": 0.7394296951819076 }, { "epoch": 1.143982002249719, "grad_norm": 1.6688339710235596, "learning_rate": 9.887230098844754e-05, "loss": 0.864521324634552, "step": 1017, "token_acc": 0.7434607645875252 }, { "epoch": 1.1451068616422948, "grad_norm": 1.8150449991226196, "learning_rate": 9.886836949234493e-05, "loss": 0.7196289896965027, "step": 1018, "token_acc": 0.7755102040816326 }, { "epoch": 1.1462317210348707, "grad_norm": 2.234403133392334, "learning_rate": 9.886443123341525e-05, "loss": 0.799077570438385, "step": 1019, "token_acc": 0.7550143266475645 }, { "epoch": 1.1473565804274466, "grad_norm": 1.7661705017089844, "learning_rate": 9.886048621220351e-05, "loss": 0.6986681222915649, "step": 1020, "token_acc": 0.8083623693379791 }, { "epoch": 1.1484814398200225, "grad_norm": 1.7862352132797241, "learning_rate": 9.885653442925568e-05, "loss": 0.7997151613235474, "step": 1021, "token_acc": 0.7685643564356436 }, { "epoch": 1.1496062992125984, "grad_norm": 1.8714238405227661, "learning_rate": 9.885257588511864e-05, "loss": 0.6881147623062134, "step": 1022, "token_acc": 0.7848911651728553 }, { "epoch": 1.1507311586051743, "grad_norm": 1.8259344100952148, "learning_rate": 9.884861058034018e-05, "loss": 0.700319230556488, "step": 1023, "token_acc": 0.7829373650107991 }, { "epoch": 1.1518560179977504, "grad_norm": 1.709301233291626, "learning_rate": 9.88446385154691e-05, "loss": 0.6497907638549805, "step": 1024, "token_acc": 0.8132832080200502 }, { "epoch": 1.1529808773903263, "grad_norm": 1.9485901594161987, "learning_rate": 9.884065969105505e-05, "loss": 0.7224053144454956, "step": 1025, "token_acc": 0.788235294117647 }, { "epoch": 1.1541057367829022, "grad_norm": 1.9252604246139526, "learning_rate": 9.883667410764866e-05, "loss": 1.0127681493759155, "step": 1026, "token_acc": 0.707089552238806 }, { "epoch": 1.155230596175478, "grad_norm": 1.7990463972091675, "learning_rate": 9.883268176580149e-05, "loss": 0.6574790477752686, "step": 1027, "token_acc": 0.787515006002401 }, { "epoch": 1.156355455568054, "grad_norm": 2.000417947769165, "learning_rate": 9.882868266606606e-05, "loss": 0.6315466165542603, "step": 1028, "token_acc": 0.7943548387096774 }, { "epoch": 1.1574803149606299, "grad_norm": 1.527561902999878, "learning_rate": 9.882467680899577e-05, "loss": 0.4830964207649231, "step": 1029, "token_acc": 0.8470728793309439 }, { "epoch": 1.1586051743532058, "grad_norm": 1.5940115451812744, "learning_rate": 9.8820664195145e-05, "loss": 0.6793588399887085, "step": 1030, "token_acc": 0.8 }, { "epoch": 1.1597300337457819, "grad_norm": 1.9372591972351074, "learning_rate": 9.881664482506903e-05, "loss": 0.8744587898254395, "step": 1031, "token_acc": 0.7345415778251599 }, { "epoch": 1.1608548931383578, "grad_norm": 2.088409900665283, "learning_rate": 9.881261869932412e-05, "loss": 0.7444016933441162, "step": 1032, "token_acc": 0.7853470437017995 }, { "epoch": 1.1619797525309337, "grad_norm": 2.133322238922119, "learning_rate": 9.880858581846743e-05, "loss": 0.7230957746505737, "step": 1033, "token_acc": 0.7784172661870503 }, { "epoch": 1.1631046119235096, "grad_norm": 2.0989503860473633, "learning_rate": 9.880454618305707e-05, "loss": 0.8099318146705627, "step": 1034, "token_acc": 0.7576923076923077 }, { "epoch": 1.1642294713160855, "grad_norm": 2.0130615234375, "learning_rate": 9.880049979365206e-05, "loss": 0.9290593862533569, "step": 1035, "token_acc": 0.7386934673366834 }, { "epoch": 1.1653543307086613, "grad_norm": 1.967644214630127, "learning_rate": 9.87964466508124e-05, "loss": 0.7967303991317749, "step": 1036, "token_acc": 0.7655453618756372 }, { "epoch": 1.1664791901012372, "grad_norm": 1.694694995880127, "learning_rate": 9.879238675509899e-05, "loss": 0.6969523429870605, "step": 1037, "token_acc": 0.790008467400508 }, { "epoch": 1.1676040494938134, "grad_norm": 2.1467769145965576, "learning_rate": 9.878832010707366e-05, "loss": 0.8062194585800171, "step": 1038, "token_acc": 0.7749699157641395 }, { "epoch": 1.1687289088863893, "grad_norm": 1.704192876815796, "learning_rate": 9.878424670729922e-05, "loss": 0.6940456628799438, "step": 1039, "token_acc": 0.7981481481481482 }, { "epoch": 1.1698537682789651, "grad_norm": 1.764968752861023, "learning_rate": 9.878016655633934e-05, "loss": 0.8394087553024292, "step": 1040, "token_acc": 0.76793893129771 }, { "epoch": 1.170978627671541, "grad_norm": 1.8403760194778442, "learning_rate": 9.87760796547587e-05, "loss": 0.6796817779541016, "step": 1041, "token_acc": 0.7864184008762322 }, { "epoch": 1.172103487064117, "grad_norm": 1.8985134363174438, "learning_rate": 9.877198600312288e-05, "loss": 0.568522572517395, "step": 1042, "token_acc": 0.8191176470588235 }, { "epoch": 1.1732283464566928, "grad_norm": 1.8414713144302368, "learning_rate": 9.876788560199835e-05, "loss": 0.6308461427688599, "step": 1043, "token_acc": 0.8148614609571788 }, { "epoch": 1.1743532058492687, "grad_norm": 1.979507565498352, "learning_rate": 9.876377845195261e-05, "loss": 0.9907978773117065, "step": 1044, "token_acc": 0.7156040268456376 }, { "epoch": 1.1754780652418448, "grad_norm": 1.5953116416931152, "learning_rate": 9.875966455355403e-05, "loss": 0.6393597722053528, "step": 1045, "token_acc": 0.8185085354896675 }, { "epoch": 1.1766029246344207, "grad_norm": 1.7751193046569824, "learning_rate": 9.875554390737191e-05, "loss": 0.7883366346359253, "step": 1046, "token_acc": 0.7815964523281597 }, { "epoch": 1.1777277840269966, "grad_norm": 1.554008960723877, "learning_rate": 9.875141651397654e-05, "loss": 0.7790632247924805, "step": 1047, "token_acc": 0.7816265060240963 }, { "epoch": 1.1788526434195725, "grad_norm": 2.1153509616851807, "learning_rate": 9.874728237393906e-05, "loss": 0.9232488870620728, "step": 1048, "token_acc": 0.7455621301775148 }, { "epoch": 1.1799775028121484, "grad_norm": 1.898842453956604, "learning_rate": 9.874314148783161e-05, "loss": 0.7876330614089966, "step": 1049, "token_acc": 0.7525539160045402 }, { "epoch": 1.1811023622047245, "grad_norm": 1.8402438163757324, "learning_rate": 9.873899385622723e-05, "loss": 0.7766503691673279, "step": 1050, "token_acc": 0.7780040733197556 }, { "epoch": 1.1822272215973004, "grad_norm": 2.011876106262207, "learning_rate": 9.87348394796999e-05, "loss": 0.9257179498672485, "step": 1051, "token_acc": 0.7261904761904762 }, { "epoch": 1.1833520809898763, "grad_norm": 1.6434382200241089, "learning_rate": 9.873067835882457e-05, "loss": 0.782052218914032, "step": 1052, "token_acc": 0.7537248028045574 }, { "epoch": 1.1844769403824522, "grad_norm": 1.6193424463272095, "learning_rate": 9.872651049417707e-05, "loss": 0.7229406833648682, "step": 1053, "token_acc": 0.7875226039783002 }, { "epoch": 1.185601799775028, "grad_norm": 1.7701863050460815, "learning_rate": 9.872233588633419e-05, "loss": 0.8539007902145386, "step": 1054, "token_acc": 0.7468599033816425 }, { "epoch": 1.186726659167604, "grad_norm": 1.5909498929977417, "learning_rate": 9.871815453587363e-05, "loss": 0.7871546745300293, "step": 1055, "token_acc": 0.777264325323475 }, { "epoch": 1.18785151856018, "grad_norm": 1.7399165630340576, "learning_rate": 9.871396644337407e-05, "loss": 0.539446234703064, "step": 1056, "token_acc": 0.8337801608579088 }, { "epoch": 1.188976377952756, "grad_norm": 1.7949501276016235, "learning_rate": 9.870977160941508e-05, "loss": 0.7444372177124023, "step": 1057, "token_acc": 0.7681297709923665 }, { "epoch": 1.190101237345332, "grad_norm": 2.0116281509399414, "learning_rate": 9.870557003457717e-05, "loss": 0.9293835759162903, "step": 1058, "token_acc": 0.7471264367816092 }, { "epoch": 1.1912260967379078, "grad_norm": 1.4091111421585083, "learning_rate": 9.870136171944181e-05, "loss": 0.5634556412696838, "step": 1059, "token_acc": 0.8240200166805671 }, { "epoch": 1.1923509561304837, "grad_norm": 2.1456551551818848, "learning_rate": 9.869714666459137e-05, "loss": 0.7371869087219238, "step": 1060, "token_acc": 0.7672872340425532 }, { "epoch": 1.1934758155230596, "grad_norm": 1.7064337730407715, "learning_rate": 9.869292487060917e-05, "loss": 0.7440473437309265, "step": 1061, "token_acc": 0.7967086156824782 }, { "epoch": 1.1946006749156355, "grad_norm": 1.7470288276672363, "learning_rate": 9.868869633807944e-05, "loss": 0.7229954600334167, "step": 1062, "token_acc": 0.7792521109770808 }, { "epoch": 1.1957255343082114, "grad_norm": 1.9165387153625488, "learning_rate": 9.86844610675874e-05, "loss": 0.6707170009613037, "step": 1063, "token_acc": 0.7938144329896907 }, { "epoch": 1.1968503937007875, "grad_norm": 1.85575532913208, "learning_rate": 9.868021905971912e-05, "loss": 0.7267842292785645, "step": 1064, "token_acc": 0.7783018867924528 }, { "epoch": 1.1979752530933634, "grad_norm": 1.8728471994400024, "learning_rate": 9.867597031506168e-05, "loss": 0.7506218552589417, "step": 1065, "token_acc": 0.7885714285714286 }, { "epoch": 1.1991001124859393, "grad_norm": 1.6575701236724854, "learning_rate": 9.867171483420304e-05, "loss": 0.6944119334220886, "step": 1066, "token_acc": 0.7993795243019648 }, { "epoch": 1.2002249718785152, "grad_norm": 1.9615247249603271, "learning_rate": 9.866745261773211e-05, "loss": 0.6921218037605286, "step": 1067, "token_acc": 0.7899159663865546 }, { "epoch": 1.201349831271091, "grad_norm": 1.9968774318695068, "learning_rate": 9.866318366623874e-05, "loss": 0.8560754060745239, "step": 1068, "token_acc": 0.7387580299785867 }, { "epoch": 1.202474690663667, "grad_norm": 1.512145757675171, "learning_rate": 9.86589079803137e-05, "loss": 0.5352452993392944, "step": 1069, "token_acc": 0.8341759352881699 }, { "epoch": 1.2035995500562429, "grad_norm": 2.0756096839904785, "learning_rate": 9.86546255605487e-05, "loss": 0.792404055595398, "step": 1070, "token_acc": 0.7730582524271845 }, { "epoch": 1.204724409448819, "grad_norm": 1.7361799478530884, "learning_rate": 9.865033640753638e-05, "loss": 0.6667233109474182, "step": 1071, "token_acc": 0.7889908256880734 }, { "epoch": 1.2058492688413949, "grad_norm": 2.0141680240631104, "learning_rate": 9.864604052187029e-05, "loss": 0.6351679563522339, "step": 1072, "token_acc": 0.797191887675507 }, { "epoch": 1.2069741282339708, "grad_norm": 1.7844659090042114, "learning_rate": 9.864173790414495e-05, "loss": 0.759089469909668, "step": 1073, "token_acc": 0.7749747729566094 }, { "epoch": 1.2080989876265467, "grad_norm": 2.1197869777679443, "learning_rate": 9.863742855495579e-05, "loss": 0.9093616008758545, "step": 1074, "token_acc": 0.7345029239766082 }, { "epoch": 1.2092238470191226, "grad_norm": 2.0963308811187744, "learning_rate": 9.863311247489919e-05, "loss": 0.7380388379096985, "step": 1075, "token_acc": 0.7897310513447433 }, { "epoch": 1.2103487064116984, "grad_norm": 1.802414059638977, "learning_rate": 9.862878966457242e-05, "loss": 0.8467721939086914, "step": 1076, "token_acc": 0.7523364485981309 }, { "epoch": 1.2114735658042743, "grad_norm": 1.7221776247024536, "learning_rate": 9.862446012457372e-05, "loss": 0.6081470251083374, "step": 1077, "token_acc": 0.8157303370786517 }, { "epoch": 1.2125984251968505, "grad_norm": 2.0686917304992676, "learning_rate": 9.862012385550226e-05, "loss": 0.5994859933853149, "step": 1078, "token_acc": 0.8034300791556728 }, { "epoch": 1.2137232845894264, "grad_norm": 1.9705700874328613, "learning_rate": 9.86157808579581e-05, "loss": 0.7470605373382568, "step": 1079, "token_acc": 0.7693965517241379 }, { "epoch": 1.2148481439820022, "grad_norm": 2.055659055709839, "learning_rate": 9.86114311325423e-05, "loss": 0.7519593238830566, "step": 1080, "token_acc": 0.7729083665338645 }, { "epoch": 1.2159730033745781, "grad_norm": 2.0604233741760254, "learning_rate": 9.86070746798568e-05, "loss": 0.8023025989532471, "step": 1081, "token_acc": 0.7505910165484634 }, { "epoch": 1.217097862767154, "grad_norm": 2.2152581214904785, "learning_rate": 9.860271150050446e-05, "loss": 0.779739260673523, "step": 1082, "token_acc": 0.768025078369906 }, { "epoch": 1.2182227221597302, "grad_norm": 1.3188753128051758, "learning_rate": 9.859834159508911e-05, "loss": 0.8406123518943787, "step": 1083, "token_acc": 0.7568555758683729 }, { "epoch": 1.219347581552306, "grad_norm": 1.9941028356552124, "learning_rate": 9.859396496421549e-05, "loss": 0.7400748133659363, "step": 1084, "token_acc": 0.7690387016229713 }, { "epoch": 1.220472440944882, "grad_norm": 1.93882417678833, "learning_rate": 9.85895816084893e-05, "loss": 0.8055552840232849, "step": 1085, "token_acc": 0.7656405163853028 }, { "epoch": 1.2215973003374578, "grad_norm": 2.056769371032715, "learning_rate": 9.858519152851713e-05, "loss": 0.6512973308563232, "step": 1086, "token_acc": 0.7879699248120301 }, { "epoch": 1.2227221597300337, "grad_norm": 2.040513277053833, "learning_rate": 9.85807947249065e-05, "loss": 0.7885076999664307, "step": 1087, "token_acc": 0.7650676506765067 }, { "epoch": 1.2238470191226096, "grad_norm": 2.0609021186828613, "learning_rate": 9.85763911982659e-05, "loss": 0.9189263582229614, "step": 1088, "token_acc": 0.7401812688821753 }, { "epoch": 1.2249718785151855, "grad_norm": 1.7333117723464966, "learning_rate": 9.857198094920473e-05, "loss": 0.7075931429862976, "step": 1089, "token_acc": 0.7857142857142857 }, { "epoch": 1.2260967379077616, "grad_norm": 1.9723747968673706, "learning_rate": 9.85675639783333e-05, "loss": 0.5464856624603271, "step": 1090, "token_acc": 0.838258164852255 }, { "epoch": 1.2272215973003375, "grad_norm": 1.9176980257034302, "learning_rate": 9.85631402862629e-05, "loss": 0.6734591126441956, "step": 1091, "token_acc": 0.7846332945285215 }, { "epoch": 1.2283464566929134, "grad_norm": 2.1149277687072754, "learning_rate": 9.855870987360567e-05, "loss": 0.6462591886520386, "step": 1092, "token_acc": 0.7987220447284346 }, { "epoch": 1.2294713160854893, "grad_norm": 1.8957749605178833, "learning_rate": 9.855427274097478e-05, "loss": 0.5706501007080078, "step": 1093, "token_acc": 0.8114525139664804 }, { "epoch": 1.2305961754780652, "grad_norm": 2.0141026973724365, "learning_rate": 9.854982888898425e-05, "loss": 0.7886853218078613, "step": 1094, "token_acc": 0.7591973244147158 }, { "epoch": 1.231721034870641, "grad_norm": 1.7669544219970703, "learning_rate": 9.854537831824906e-05, "loss": 0.862631618976593, "step": 1095, "token_acc": 0.7466666666666667 }, { "epoch": 1.232845894263217, "grad_norm": 1.9202200174331665, "learning_rate": 9.85409210293851e-05, "loss": 0.7882469296455383, "step": 1096, "token_acc": 0.7607655502392344 }, { "epoch": 1.2339707536557931, "grad_norm": 1.8039946556091309, "learning_rate": 9.853645702300926e-05, "loss": 0.7769016623497009, "step": 1097, "token_acc": 0.775435380384968 }, { "epoch": 1.235095613048369, "grad_norm": 1.9146826267242432, "learning_rate": 9.853198629973928e-05, "loss": 0.7346781492233276, "step": 1098, "token_acc": 0.8041958041958042 }, { "epoch": 1.236220472440945, "grad_norm": 1.9846240282058716, "learning_rate": 9.852750886019384e-05, "loss": 0.6409051418304443, "step": 1099, "token_acc": 0.813953488372093 }, { "epoch": 1.2373453318335208, "grad_norm": 1.5886664390563965, "learning_rate": 9.852302470499259e-05, "loss": 0.6257208585739136, "step": 1100, "token_acc": 0.8233719892952721 }, { "epoch": 1.2384701912260967, "grad_norm": 2.1841952800750732, "learning_rate": 9.851853383475607e-05, "loss": 0.8039845824241638, "step": 1101, "token_acc": 0.7610294117647058 }, { "epoch": 1.2395950506186726, "grad_norm": 1.9526396989822388, "learning_rate": 9.851403625010578e-05, "loss": 0.8503068089485168, "step": 1102, "token_acc": 0.7435897435897436 }, { "epoch": 1.2407199100112485, "grad_norm": 1.9749596118927002, "learning_rate": 9.850953195166413e-05, "loss": 0.8229572176933289, "step": 1103, "token_acc": 0.7447478991596639 }, { "epoch": 1.2418447694038246, "grad_norm": 2.074444055557251, "learning_rate": 9.850502094005447e-05, "loss": 0.8630874156951904, "step": 1104, "token_acc": 0.7403314917127072 }, { "epoch": 1.2429696287964005, "grad_norm": 2.2238144874572754, "learning_rate": 9.850050321590105e-05, "loss": 0.6959813833236694, "step": 1105, "token_acc": 0.787598944591029 }, { "epoch": 1.2440944881889764, "grad_norm": 2.220320224761963, "learning_rate": 9.849597877982909e-05, "loss": 0.7972249984741211, "step": 1106, "token_acc": 0.7555555555555555 }, { "epoch": 1.2452193475815523, "grad_norm": 1.6296430826187134, "learning_rate": 9.849144763246472e-05, "loss": 0.6671210527420044, "step": 1107, "token_acc": 0.8022847100175747 }, { "epoch": 1.2463442069741282, "grad_norm": 1.8485764265060425, "learning_rate": 9.8486909774435e-05, "loss": 0.8901037573814392, "step": 1108, "token_acc": 0.7413793103448276 }, { "epoch": 1.247469066366704, "grad_norm": 2.0639424324035645, "learning_rate": 9.848236520636791e-05, "loss": 0.7153653502464294, "step": 1109, "token_acc": 0.7859116022099447 }, { "epoch": 1.24859392575928, "grad_norm": 2.200390577316284, "learning_rate": 9.847781392889237e-05, "loss": 0.8894630074501038, "step": 1110, "token_acc": 0.7567251461988304 }, { "epoch": 1.249718785151856, "grad_norm": 1.9271881580352783, "learning_rate": 9.847325594263824e-05, "loss": 0.8936643600463867, "step": 1111, "token_acc": 0.7406568516421291 }, { "epoch": 1.250843644544432, "grad_norm": 1.7713828086853027, "learning_rate": 9.846869124823626e-05, "loss": 0.744909405708313, "step": 1112, "token_acc": 0.7911975435005117 }, { "epoch": 1.2519685039370079, "grad_norm": 2.0065088272094727, "learning_rate": 9.846411984631814e-05, "loss": 0.7006340026855469, "step": 1113, "token_acc": 0.7857142857142857 }, { "epoch": 1.2530933633295838, "grad_norm": 2.3015918731689453, "learning_rate": 9.845954173751654e-05, "loss": 0.7962387800216675, "step": 1114, "token_acc": 0.7603305785123967 }, { "epoch": 1.2542182227221597, "grad_norm": 1.9261541366577148, "learning_rate": 9.8454956922465e-05, "loss": 0.7194169759750366, "step": 1115, "token_acc": 0.7963525835866262 }, { "epoch": 1.2553430821147358, "grad_norm": 1.8706921339035034, "learning_rate": 9.845036540179802e-05, "loss": 0.676758885383606, "step": 1116, "token_acc": 0.7897435897435897 }, { "epoch": 1.2564679415073114, "grad_norm": 2.134390115737915, "learning_rate": 9.844576717615099e-05, "loss": 0.9071669578552246, "step": 1117, "token_acc": 0.7372881355932204 }, { "epoch": 1.2575928008998876, "grad_norm": 2.034235954284668, "learning_rate": 9.844116224616025e-05, "loss": 0.817076563835144, "step": 1118, "token_acc": 0.7493540051679587 }, { "epoch": 1.2587176602924635, "grad_norm": 1.793542504310608, "learning_rate": 9.84365506124631e-05, "loss": 0.6895601749420166, "step": 1119, "token_acc": 0.7820823244552058 }, { "epoch": 1.2598425196850394, "grad_norm": 1.961474061012268, "learning_rate": 9.843193227569771e-05, "loss": 0.9516655206680298, "step": 1120, "token_acc": 0.7230443974630021 }, { "epoch": 1.2609673790776152, "grad_norm": 1.9216622114181519, "learning_rate": 9.842730723650322e-05, "loss": 0.6908737421035767, "step": 1121, "token_acc": 0.7788732394366197 }, { "epoch": 1.2620922384701911, "grad_norm": 2.1790921688079834, "learning_rate": 9.842267549551967e-05, "loss": 0.699737012386322, "step": 1122, "token_acc": 0.7688356164383562 }, { "epoch": 1.2632170978627673, "grad_norm": 1.5109224319458008, "learning_rate": 9.841803705338806e-05, "loss": 0.6868637800216675, "step": 1123, "token_acc": 0.8058419243986255 }, { "epoch": 1.2643419572553432, "grad_norm": 1.7731660604476929, "learning_rate": 9.841339191075028e-05, "loss": 0.9159467220306396, "step": 1124, "token_acc": 0.7396226415094339 }, { "epoch": 1.265466816647919, "grad_norm": 1.784867525100708, "learning_rate": 9.840874006824917e-05, "loss": 0.6441066861152649, "step": 1125, "token_acc": 0.7944572748267898 }, { "epoch": 1.266591676040495, "grad_norm": 1.970720887184143, "learning_rate": 9.840408152652849e-05, "loss": 0.8414185047149658, "step": 1126, "token_acc": 0.7392739273927392 }, { "epoch": 1.2677165354330708, "grad_norm": 1.6855396032333374, "learning_rate": 9.839941628623295e-05, "loss": 0.7751345038414001, "step": 1127, "token_acc": 0.7876984126984127 }, { "epoch": 1.2688413948256467, "grad_norm": 1.6813006401062012, "learning_rate": 9.839474434800812e-05, "loss": 0.9087167382240295, "step": 1128, "token_acc": 0.7469244288224957 }, { "epoch": 1.2699662542182226, "grad_norm": 1.914397120475769, "learning_rate": 9.83900657125006e-05, "loss": 0.7158008813858032, "step": 1129, "token_acc": 0.7857142857142857 }, { "epoch": 1.2710911136107987, "grad_norm": 1.9976913928985596, "learning_rate": 9.838538038035783e-05, "loss": 0.835132360458374, "step": 1130, "token_acc": 0.7471502590673575 }, { "epoch": 1.2722159730033746, "grad_norm": 1.8085864782333374, "learning_rate": 9.838068835222821e-05, "loss": 0.7157701253890991, "step": 1131, "token_acc": 0.7794561933534743 }, { "epoch": 1.2733408323959505, "grad_norm": 1.919837236404419, "learning_rate": 9.837598962876104e-05, "loss": 0.6978321075439453, "step": 1132, "token_acc": 0.77491601343785 }, { "epoch": 1.2744656917885264, "grad_norm": 1.9517295360565186, "learning_rate": 9.837128421060661e-05, "loss": 0.9586184024810791, "step": 1133, "token_acc": 0.7205750224618149 }, { "epoch": 1.2755905511811023, "grad_norm": 2.05635404586792, "learning_rate": 9.836657209841607e-05, "loss": 0.7032399773597717, "step": 1134, "token_acc": 0.7790697674418605 }, { "epoch": 1.2767154105736782, "grad_norm": 2.0216894149780273, "learning_rate": 9.836185329284154e-05, "loss": 0.7913829684257507, "step": 1135, "token_acc": 0.753688989784336 }, { "epoch": 1.277840269966254, "grad_norm": 1.9038254022598267, "learning_rate": 9.835712779453603e-05, "loss": 0.5243752002716064, "step": 1136, "token_acc": 0.8215892053973014 }, { "epoch": 1.2789651293588302, "grad_norm": 1.9229291677474976, "learning_rate": 9.835239560415353e-05, "loss": 0.5047225952148438, "step": 1137, "token_acc": 0.8431911966987621 }, { "epoch": 1.2800899887514061, "grad_norm": 1.9632418155670166, "learning_rate": 9.834765672234886e-05, "loss": 0.8656517863273621, "step": 1138, "token_acc": 0.7319711538461539 }, { "epoch": 1.281214848143982, "grad_norm": 1.9160821437835693, "learning_rate": 9.834291114977789e-05, "loss": 0.7650549411773682, "step": 1139, "token_acc": 0.7719836400817995 }, { "epoch": 1.282339707536558, "grad_norm": 2.034409523010254, "learning_rate": 9.833815888709732e-05, "loss": 0.6606457233428955, "step": 1140, "token_acc": 0.8053097345132744 }, { "epoch": 1.2834645669291338, "grad_norm": 1.8386536836624146, "learning_rate": 9.833339993496482e-05, "loss": 0.5787357687950134, "step": 1141, "token_acc": 0.8220338983050848 }, { "epoch": 1.28458942632171, "grad_norm": 1.8483532667160034, "learning_rate": 9.832863429403898e-05, "loss": 0.73223876953125, "step": 1142, "token_acc": 0.7667464114832536 }, { "epoch": 1.2857142857142856, "grad_norm": 1.6013882160186768, "learning_rate": 9.832386196497929e-05, "loss": 0.6825962066650391, "step": 1143, "token_acc": 0.7962347729789591 }, { "epoch": 1.2868391451068617, "grad_norm": 1.8573744297027588, "learning_rate": 9.831908294844621e-05, "loss": 0.7070825099945068, "step": 1144, "token_acc": 0.7828054298642534 }, { "epoch": 1.2879640044994376, "grad_norm": 1.739137887954712, "learning_rate": 9.831429724510107e-05, "loss": 0.8214720487594604, "step": 1145, "token_acc": 0.7402843601895734 }, { "epoch": 1.2890888638920135, "grad_norm": 2.1184847354888916, "learning_rate": 9.83095048556062e-05, "loss": 0.8210320472717285, "step": 1146, "token_acc": 0.7478260869565218 }, { "epoch": 1.2902137232845894, "grad_norm": 1.6576919555664062, "learning_rate": 9.830470578062478e-05, "loss": 0.7436544895172119, "step": 1147, "token_acc": 0.7782846715328468 }, { "epoch": 1.2913385826771653, "grad_norm": 1.7995818853378296, "learning_rate": 9.829990002082095e-05, "loss": 0.6687522530555725, "step": 1148, "token_acc": 0.7895348837209303 }, { "epoch": 1.2924634420697414, "grad_norm": 2.1854593753814697, "learning_rate": 9.829508757685979e-05, "loss": 0.6283881664276123, "step": 1149, "token_acc": 0.8029315960912052 }, { "epoch": 1.2935883014623173, "grad_norm": 1.9457000494003296, "learning_rate": 9.829026844940726e-05, "loss": 0.7387537360191345, "step": 1150, "token_acc": 0.7909930715935335 }, { "epoch": 1.2947131608548932, "grad_norm": 2.464952230453491, "learning_rate": 9.82854426391303e-05, "loss": 0.7758152484893799, "step": 1151, "token_acc": 0.7656500802568218 }, { "epoch": 1.295838020247469, "grad_norm": 1.6146377325057983, "learning_rate": 9.828061014669674e-05, "loss": 0.5888039469718933, "step": 1152, "token_acc": 0.8071278825995807 }, { "epoch": 1.296962879640045, "grad_norm": 1.9203675985336304, "learning_rate": 9.827577097277534e-05, "loss": 0.820580005645752, "step": 1153, "token_acc": 0.7569367369589345 }, { "epoch": 1.2980877390326209, "grad_norm": 1.8714969158172607, "learning_rate": 9.827092511803579e-05, "loss": 0.782315731048584, "step": 1154, "token_acc": 0.7859531772575251 }, { "epoch": 1.2992125984251968, "grad_norm": 1.909345269203186, "learning_rate": 9.826607258314868e-05, "loss": 0.835975170135498, "step": 1155, "token_acc": 0.7472118959107806 }, { "epoch": 1.3003374578177729, "grad_norm": 1.8869329690933228, "learning_rate": 9.826121336878557e-05, "loss": 0.7520095705986023, "step": 1156, "token_acc": 0.7606837606837606 }, { "epoch": 1.3014623172103488, "grad_norm": 2.258169174194336, "learning_rate": 9.825634747561892e-05, "loss": 0.6906620264053345, "step": 1157, "token_acc": 0.7872696817420436 }, { "epoch": 1.3025871766029247, "grad_norm": 2.0555737018585205, "learning_rate": 9.82514749043221e-05, "loss": 0.899552583694458, "step": 1158, "token_acc": 0.7375 }, { "epoch": 1.3037120359955006, "grad_norm": 1.943041443824768, "learning_rate": 9.824659565556943e-05, "loss": 0.8888822197914124, "step": 1159, "token_acc": 0.7405405405405405 }, { "epoch": 1.3048368953880765, "grad_norm": 1.948220133781433, "learning_rate": 9.824170973003613e-05, "loss": 0.6744353175163269, "step": 1160, "token_acc": 0.7951807228915663 }, { "epoch": 1.3059617547806524, "grad_norm": 2.0374958515167236, "learning_rate": 9.823681712839838e-05, "loss": 0.7634316682815552, "step": 1161, "token_acc": 0.7538659793814433 }, { "epoch": 1.3070866141732282, "grad_norm": 1.7320092916488647, "learning_rate": 9.823191785133323e-05, "loss": 0.676822304725647, "step": 1162, "token_acc": 0.7953144266337855 }, { "epoch": 1.3082114735658044, "grad_norm": 2.0614230632781982, "learning_rate": 9.82270118995187e-05, "loss": 0.8957776427268982, "step": 1163, "token_acc": 0.7620689655172413 }, { "epoch": 1.3093363329583803, "grad_norm": 1.8237327337265015, "learning_rate": 9.822209927363373e-05, "loss": 0.6826528310775757, "step": 1164, "token_acc": 0.7845188284518828 }, { "epoch": 1.3104611923509561, "grad_norm": 1.8311508893966675, "learning_rate": 9.821717997435813e-05, "loss": 0.7462542653083801, "step": 1165, "token_acc": 0.7751855779427359 }, { "epoch": 1.311586051743532, "grad_norm": 1.9363471269607544, "learning_rate": 9.821225400237272e-05, "loss": 0.6297539472579956, "step": 1166, "token_acc": 0.810126582278481 }, { "epoch": 1.312710911136108, "grad_norm": 1.8774213790893555, "learning_rate": 9.82073213583592e-05, "loss": 0.7530253529548645, "step": 1167, "token_acc": 0.7691415313225058 }, { "epoch": 1.3138357705286838, "grad_norm": 2.119764804840088, "learning_rate": 9.820238204300016e-05, "loss": 0.7457355260848999, "step": 1168, "token_acc": 0.7831325301204819 }, { "epoch": 1.3149606299212597, "grad_norm": 1.8852651119232178, "learning_rate": 9.819743605697915e-05, "loss": 0.7799338102340698, "step": 1169, "token_acc": 0.7615480649188514 }, { "epoch": 1.3160854893138358, "grad_norm": 1.6425353288650513, "learning_rate": 9.819248340098066e-05, "loss": 0.7283724546432495, "step": 1170, "token_acc": 0.777992277992278 }, { "epoch": 1.3172103487064117, "grad_norm": 1.7326633930206299, "learning_rate": 9.818752407569007e-05, "loss": 0.7621176242828369, "step": 1171, "token_acc": 0.7786333012512031 }, { "epoch": 1.3183352080989876, "grad_norm": 1.852607250213623, "learning_rate": 9.818255808179368e-05, "loss": 0.7715255618095398, "step": 1172, "token_acc": 0.7731591448931117 }, { "epoch": 1.3194600674915635, "grad_norm": 1.9347583055496216, "learning_rate": 9.817758541997876e-05, "loss": 0.8249392509460449, "step": 1173, "token_acc": 0.7544052863436124 }, { "epoch": 1.3205849268841394, "grad_norm": 1.8014034032821655, "learning_rate": 9.817260609093344e-05, "loss": 0.7191550731658936, "step": 1174, "token_acc": 0.8076923076923077 }, { "epoch": 1.3217097862767155, "grad_norm": 1.8334016799926758, "learning_rate": 9.816762009534681e-05, "loss": 0.7270307540893555, "step": 1175, "token_acc": 0.786046511627907 }, { "epoch": 1.3228346456692912, "grad_norm": 1.7981114387512207, "learning_rate": 9.816262743390886e-05, "loss": 0.9002513885498047, "step": 1176, "token_acc": 0.7321063394683026 }, { "epoch": 1.3239595050618673, "grad_norm": 1.7505642175674438, "learning_rate": 9.815762810731058e-05, "loss": 0.6990538239479065, "step": 1177, "token_acc": 0.7769516728624535 }, { "epoch": 1.3250843644544432, "grad_norm": 1.624420404434204, "learning_rate": 9.815262211624374e-05, "loss": 0.7295364737510681, "step": 1178, "token_acc": 0.7681592039800995 }, { "epoch": 1.3262092238470191, "grad_norm": 1.7243483066558838, "learning_rate": 9.814760946140117e-05, "loss": 0.6327999830245972, "step": 1179, "token_acc": 0.7987951807228916 }, { "epoch": 1.327334083239595, "grad_norm": 1.8242930173873901, "learning_rate": 9.814259014347654e-05, "loss": 0.8540687561035156, "step": 1180, "token_acc": 0.7515856236786469 }, { "epoch": 1.328458942632171, "grad_norm": 1.7290256023406982, "learning_rate": 9.813756416316445e-05, "loss": 0.7045478820800781, "step": 1181, "token_acc": 0.7844243792325056 }, { "epoch": 1.329583802024747, "grad_norm": 1.7752656936645508, "learning_rate": 9.813253152116046e-05, "loss": 0.7353832721710205, "step": 1182, "token_acc": 0.8014101057579318 }, { "epoch": 1.330708661417323, "grad_norm": 1.9084714651107788, "learning_rate": 9.812749221816104e-05, "loss": 0.6864399909973145, "step": 1183, "token_acc": 0.7939560439560439 }, { "epoch": 1.3318335208098988, "grad_norm": 1.9909682273864746, "learning_rate": 9.812244625486357e-05, "loss": 0.7946924567222595, "step": 1184, "token_acc": 0.7633674630261661 }, { "epoch": 1.3329583802024747, "grad_norm": 1.7924903631210327, "learning_rate": 9.811739363196632e-05, "loss": 0.7163004875183105, "step": 1185, "token_acc": 0.7860759493670886 }, { "epoch": 1.3340832395950506, "grad_norm": 1.8167482614517212, "learning_rate": 9.811233435016855e-05, "loss": 0.7713123559951782, "step": 1186, "token_acc": 0.7672764227642277 }, { "epoch": 1.3352080989876265, "grad_norm": 1.7960072755813599, "learning_rate": 9.81072684101704e-05, "loss": 0.7397011518478394, "step": 1187, "token_acc": 0.7742594484167518 }, { "epoch": 1.3363329583802024, "grad_norm": 1.6907356977462769, "learning_rate": 9.810219581267294e-05, "loss": 0.6869243383407593, "step": 1188, "token_acc": 0.7923387096774194 }, { "epoch": 1.3374578177727785, "grad_norm": 1.8542588949203491, "learning_rate": 9.809711655837815e-05, "loss": 0.7611798048019409, "step": 1189, "token_acc": 0.7706214689265537 }, { "epoch": 1.3385826771653544, "grad_norm": 1.850479245185852, "learning_rate": 9.809203064798894e-05, "loss": 0.8262269496917725, "step": 1190, "token_acc": 0.7399553571428571 }, { "epoch": 1.3397075365579303, "grad_norm": 1.812281847000122, "learning_rate": 9.808693808220916e-05, "loss": 0.7734524011611938, "step": 1191, "token_acc": 0.7780847145488029 }, { "epoch": 1.3408323959505062, "grad_norm": 2.1112239360809326, "learning_rate": 9.808183886174354e-05, "loss": 0.6427402496337891, "step": 1192, "token_acc": 0.8014705882352942 }, { "epoch": 1.341957255343082, "grad_norm": 1.3059227466583252, "learning_rate": 9.807673298729778e-05, "loss": 0.5081080198287964, "step": 1193, "token_acc": 0.8301574150787076 }, { "epoch": 1.343082114735658, "grad_norm": 1.7677901983261108, "learning_rate": 9.807162045957845e-05, "loss": 0.4972659945487976, "step": 1194, "token_acc": 0.8258859784283513 }, { "epoch": 1.3442069741282339, "grad_norm": 1.6707537174224854, "learning_rate": 9.806650127929308e-05, "loss": 0.5908019542694092, "step": 1195, "token_acc": 0.8155650319829424 }, { "epoch": 1.34533183352081, "grad_norm": 1.9535456895828247, "learning_rate": 9.80613754471501e-05, "loss": 0.6776699423789978, "step": 1196, "token_acc": 0.7893333333333333 }, { "epoch": 1.3464566929133859, "grad_norm": 2.099128484725952, "learning_rate": 9.805624296385887e-05, "loss": 0.8872470259666443, "step": 1197, "token_acc": 0.7520661157024794 }, { "epoch": 1.3475815523059618, "grad_norm": 1.833154320716858, "learning_rate": 9.805110383012967e-05, "loss": 0.9282819032669067, "step": 1198, "token_acc": 0.7437275985663082 }, { "epoch": 1.3487064116985377, "grad_norm": 1.9539858102798462, "learning_rate": 9.80459580466737e-05, "loss": 0.7610458135604858, "step": 1199, "token_acc": 0.762114537444934 }, { "epoch": 1.3498312710911136, "grad_norm": 1.3830363750457764, "learning_rate": 9.804080561420306e-05, "loss": 0.530579686164856, "step": 1200, "token_acc": 0.8433303491495077 }, { "epoch": 1.3498312710911136, "eval_loss": 0.9042019248008728, "eval_runtime": 31.5947, "eval_samples_per_second": 25.416, "eval_steps_per_second": 3.197, "eval_token_acc": 0.7396594748581146, "step": 1200 }, { "epoch": 1.3509561304836897, "grad_norm": 1.9711734056472778, "learning_rate": 9.803564653343083e-05, "loss": 0.8833035230636597, "step": 1201, "token_acc": 0.7422360248447205 }, { "epoch": 1.3520809898762653, "grad_norm": 1.947704553604126, "learning_rate": 9.803048080507091e-05, "loss": 0.6516164541244507, "step": 1202, "token_acc": 0.8018867924528302 }, { "epoch": 1.3532058492688415, "grad_norm": 2.0422778129577637, "learning_rate": 9.802530842983821e-05, "loss": 0.6906799674034119, "step": 1203, "token_acc": 0.7706422018348624 }, { "epoch": 1.3543307086614174, "grad_norm": 1.849646806716919, "learning_rate": 9.802012940844853e-05, "loss": 0.6728134155273438, "step": 1204, "token_acc": 0.8189448441247003 }, { "epoch": 1.3554555680539933, "grad_norm": 1.6206181049346924, "learning_rate": 9.801494374161859e-05, "loss": 0.6462411880493164, "step": 1205, "token_acc": 0.7918683446272992 }, { "epoch": 1.3565804274465691, "grad_norm": 2.335343360900879, "learning_rate": 9.800975143006603e-05, "loss": 0.8859072923660278, "step": 1206, "token_acc": 0.7558823529411764 }, { "epoch": 1.357705286839145, "grad_norm": 1.5724085569381714, "learning_rate": 9.800455247450937e-05, "loss": 0.6215620636940002, "step": 1207, "token_acc": 0.8091451292246521 }, { "epoch": 1.3588301462317212, "grad_norm": 2.018808126449585, "learning_rate": 9.799934687566813e-05, "loss": 0.6603258848190308, "step": 1208, "token_acc": 0.7899159663865546 }, { "epoch": 1.3599550056242968, "grad_norm": 1.6595518589019775, "learning_rate": 9.799413463426269e-05, "loss": 0.6511388421058655, "step": 1209, "token_acc": 0.8130952380952381 }, { "epoch": 1.361079865016873, "grad_norm": 2.3242530822753906, "learning_rate": 9.798891575101436e-05, "loss": 0.6456098556518555, "step": 1210, "token_acc": 0.8024691358024691 }, { "epoch": 1.3622047244094488, "grad_norm": 1.8875223398208618, "learning_rate": 9.798369022664537e-05, "loss": 0.7139500379562378, "step": 1211, "token_acc": 0.7662037037037037 }, { "epoch": 1.3633295838020247, "grad_norm": 1.5834060907363892, "learning_rate": 9.79784580618789e-05, "loss": 0.728079080581665, "step": 1212, "token_acc": 0.7964804896710023 }, { "epoch": 1.3644544431946006, "grad_norm": 1.695387840270996, "learning_rate": 9.797321925743899e-05, "loss": 0.8298897743225098, "step": 1213, "token_acc": 0.7595486111111112 }, { "epoch": 1.3655793025871765, "grad_norm": 1.9305402040481567, "learning_rate": 9.796797381405066e-05, "loss": 0.7677526473999023, "step": 1214, "token_acc": 0.7598978288633461 }, { "epoch": 1.3667041619797526, "grad_norm": 1.8348453044891357, "learning_rate": 9.796272173243978e-05, "loss": 0.8261646032333374, "step": 1215, "token_acc": 0.7590113285272915 }, { "epoch": 1.3678290213723285, "grad_norm": 1.9346692562103271, "learning_rate": 9.795746301333324e-05, "loss": 0.6393595933914185, "step": 1216, "token_acc": 0.7980501392757661 }, { "epoch": 1.3689538807649044, "grad_norm": 1.8678233623504639, "learning_rate": 9.795219765745873e-05, "loss": 0.7619723081588745, "step": 1217, "token_acc": 0.7835269271383316 }, { "epoch": 1.3700787401574803, "grad_norm": 2.066598415374756, "learning_rate": 9.794692566554493e-05, "loss": 0.7790516018867493, "step": 1218, "token_acc": 0.7749648382559775 }, { "epoch": 1.3712035995500562, "grad_norm": 2.0440330505371094, "learning_rate": 9.794164703832144e-05, "loss": 0.5336970686912537, "step": 1219, "token_acc": 0.8235294117647058 }, { "epoch": 1.3723284589426321, "grad_norm": 1.94808030128479, "learning_rate": 9.793636177651874e-05, "loss": 0.7029914855957031, "step": 1220, "token_acc": 0.7724719101123596 }, { "epoch": 1.373453318335208, "grad_norm": 1.7973524332046509, "learning_rate": 9.793106988086827e-05, "loss": 0.7514328956604004, "step": 1221, "token_acc": 0.7741935483870968 }, { "epoch": 1.3745781777277841, "grad_norm": 1.8755327463150024, "learning_rate": 9.792577135210236e-05, "loss": 0.8779059648513794, "step": 1222, "token_acc": 0.7549668874172185 }, { "epoch": 1.37570303712036, "grad_norm": 2.1922717094421387, "learning_rate": 9.792046619095425e-05, "loss": 0.6990558505058289, "step": 1223, "token_acc": 0.7618384401114207 }, { "epoch": 1.376827896512936, "grad_norm": 1.605543851852417, "learning_rate": 9.791515439815815e-05, "loss": 0.6103789806365967, "step": 1224, "token_acc": 0.8087934560327198 }, { "epoch": 1.3779527559055118, "grad_norm": 2.138601779937744, "learning_rate": 9.790983597444913e-05, "loss": 0.6461384296417236, "step": 1225, "token_acc": 0.7792 }, { "epoch": 1.3790776152980877, "grad_norm": 1.7272295951843262, "learning_rate": 9.79045109205632e-05, "loss": 0.9522769451141357, "step": 1226, "token_acc": 0.726078799249531 }, { "epoch": 1.3802024746906636, "grad_norm": 1.782637596130371, "learning_rate": 9.789917923723728e-05, "loss": 0.5821595788002014, "step": 1227, "token_acc": 0.8129952456418383 }, { "epoch": 1.3813273340832395, "grad_norm": 2.3103485107421875, "learning_rate": 9.789384092520923e-05, "loss": 0.7048953771591187, "step": 1228, "token_acc": 0.7993779160186625 }, { "epoch": 1.3824521934758156, "grad_norm": 2.027578592300415, "learning_rate": 9.78884959852178e-05, "loss": 0.7083699703216553, "step": 1229, "token_acc": 0.7843784378437844 }, { "epoch": 1.3835770528683915, "grad_norm": 2.0039172172546387, "learning_rate": 9.788314441800267e-05, "loss": 0.858109712600708, "step": 1230, "token_acc": 0.7537746806039489 }, { "epoch": 1.3847019122609674, "grad_norm": 1.5794634819030762, "learning_rate": 9.787778622430445e-05, "loss": 0.7201387882232666, "step": 1231, "token_acc": 0.7977178423236515 }, { "epoch": 1.3858267716535433, "grad_norm": 1.8155194520950317, "learning_rate": 9.787242140486463e-05, "loss": 0.7522667646408081, "step": 1232, "token_acc": 0.7725060827250608 }, { "epoch": 1.3869516310461192, "grad_norm": 2.196178436279297, "learning_rate": 9.786704996042567e-05, "loss": 0.7015236020088196, "step": 1233, "token_acc": 0.7877813504823151 }, { "epoch": 1.3880764904386953, "grad_norm": 1.9539371728897095, "learning_rate": 9.786167189173088e-05, "loss": 0.9088111519813538, "step": 1234, "token_acc": 0.7357456140350878 }, { "epoch": 1.389201349831271, "grad_norm": 1.6403218507766724, "learning_rate": 9.785628719952456e-05, "loss": 0.5796622633934021, "step": 1235, "token_acc": 0.8418549346016647 }, { "epoch": 1.390326209223847, "grad_norm": 1.7642529010772705, "learning_rate": 9.785089588455185e-05, "loss": 0.7190603017807007, "step": 1236, "token_acc": 0.7667667667667668 }, { "epoch": 1.391451068616423, "grad_norm": 1.7321652173995972, "learning_rate": 9.784549794755889e-05, "loss": 0.7172149419784546, "step": 1237, "token_acc": 0.798175598631699 }, { "epoch": 1.3925759280089989, "grad_norm": 1.93333899974823, "learning_rate": 9.784009338929268e-05, "loss": 0.7826778888702393, "step": 1238, "token_acc": 0.7619047619047619 }, { "epoch": 1.3937007874015748, "grad_norm": 1.8266716003417969, "learning_rate": 9.783468221050112e-05, "loss": 0.8009290099143982, "step": 1239, "token_acc": 0.7737603305785123 }, { "epoch": 1.3948256467941507, "grad_norm": 1.8634634017944336, "learning_rate": 9.782926441193309e-05, "loss": 0.7750263214111328, "step": 1240, "token_acc": 0.7641618497109827 }, { "epoch": 1.3959505061867268, "grad_norm": 1.9322770833969116, "learning_rate": 9.782383999433833e-05, "loss": 0.8740724325180054, "step": 1241, "token_acc": 0.7678185745140389 }, { "epoch": 1.3970753655793025, "grad_norm": 1.7476725578308105, "learning_rate": 9.781840895846755e-05, "loss": 0.740912675857544, "step": 1242, "token_acc": 0.7711386696730552 }, { "epoch": 1.3982002249718786, "grad_norm": 1.7748504877090454, "learning_rate": 9.781297130507229e-05, "loss": 0.8929165601730347, "step": 1243, "token_acc": 0.7451640033641715 }, { "epoch": 1.3993250843644545, "grad_norm": 2.1497576236724854, "learning_rate": 9.780752703490512e-05, "loss": 0.7834689617156982, "step": 1244, "token_acc": 0.7728 }, { "epoch": 1.4004499437570304, "grad_norm": 1.7344166040420532, "learning_rate": 9.780207614871942e-05, "loss": 0.8027245998382568, "step": 1245, "token_acc": 0.7607025246981339 }, { "epoch": 1.4015748031496063, "grad_norm": 1.3980607986450195, "learning_rate": 9.779661864726954e-05, "loss": 0.6059050559997559, "step": 1246, "token_acc": 0.8245149911816578 }, { "epoch": 1.4026996625421821, "grad_norm": 1.7305450439453125, "learning_rate": 9.779115453131076e-05, "loss": 0.644611120223999, "step": 1247, "token_acc": 0.7968936678614098 }, { "epoch": 1.4038245219347583, "grad_norm": 1.7227176427841187, "learning_rate": 9.778568380159922e-05, "loss": 0.754315972328186, "step": 1248, "token_acc": 0.7929824561403509 }, { "epoch": 1.4049493813273342, "grad_norm": 1.5385007858276367, "learning_rate": 9.778020645889204e-05, "loss": 0.838065505027771, "step": 1249, "token_acc": 0.7652582159624414 }, { "epoch": 1.40607424071991, "grad_norm": 1.7347893714904785, "learning_rate": 9.777472250394719e-05, "loss": 0.7813566327095032, "step": 1250, "token_acc": 0.7659115426105717 }, { "epoch": 1.407199100112486, "grad_norm": 1.7362730503082275, "learning_rate": 9.776923193752361e-05, "loss": 0.5930313467979431, "step": 1251, "token_acc": 0.8156498673740054 }, { "epoch": 1.4083239595050618, "grad_norm": 1.7613277435302734, "learning_rate": 9.776373476038111e-05, "loss": 0.6985921263694763, "step": 1252, "token_acc": 0.8024844720496894 }, { "epoch": 1.4094488188976377, "grad_norm": 1.919978141784668, "learning_rate": 9.775823097328045e-05, "loss": 0.6796516180038452, "step": 1253, "token_acc": 0.793002915451895 }, { "epoch": 1.4105736782902136, "grad_norm": 2.096275568008423, "learning_rate": 9.77527205769833e-05, "loss": 0.6765460968017578, "step": 1254, "token_acc": 0.7914614121510674 }, { "epoch": 1.4116985376827897, "grad_norm": 1.8828691244125366, "learning_rate": 9.774720357225222e-05, "loss": 1.0019274950027466, "step": 1255, "token_acc": 0.7261785356068204 }, { "epoch": 1.4128233970753656, "grad_norm": 1.80917489528656, "learning_rate": 9.774167995985071e-05, "loss": 0.7865121364593506, "step": 1256, "token_acc": 0.7696447793326158 }, { "epoch": 1.4139482564679415, "grad_norm": 1.7788019180297852, "learning_rate": 9.773614974054317e-05, "loss": 0.8558187484741211, "step": 1257, "token_acc": 0.7492096944151738 }, { "epoch": 1.4150731158605174, "grad_norm": 1.9513136148452759, "learning_rate": 9.773061291509493e-05, "loss": 0.8854610323905945, "step": 1258, "token_acc": 0.7405602923264312 }, { "epoch": 1.4161979752530933, "grad_norm": 1.8409239053726196, "learning_rate": 9.772506948427221e-05, "loss": 0.6192444562911987, "step": 1259, "token_acc": 0.8080110497237569 }, { "epoch": 1.4173228346456692, "grad_norm": 1.9857902526855469, "learning_rate": 9.771951944884217e-05, "loss": 0.7062106728553772, "step": 1260, "token_acc": 0.7854077253218884 }, { "epoch": 1.418447694038245, "grad_norm": 1.5305010080337524, "learning_rate": 9.771396280957285e-05, "loss": 0.675408124923706, "step": 1261, "token_acc": 0.8001658374792703 }, { "epoch": 1.4195725534308212, "grad_norm": 1.9132109880447388, "learning_rate": 9.770839956723326e-05, "loss": 0.6611477732658386, "step": 1262, "token_acc": 0.7973684210526316 }, { "epoch": 1.4206974128233971, "grad_norm": 1.746442437171936, "learning_rate": 9.770282972259327e-05, "loss": 0.6260803937911987, "step": 1263, "token_acc": 0.807843137254902 }, { "epoch": 1.421822272215973, "grad_norm": 1.9538253545761108, "learning_rate": 9.769725327642367e-05, "loss": 0.8068036437034607, "step": 1264, "token_acc": 0.7565217391304347 }, { "epoch": 1.422947131608549, "grad_norm": 1.8382195234298706, "learning_rate": 9.76916702294962e-05, "loss": 0.7666763663291931, "step": 1265, "token_acc": 0.7654867256637168 }, { "epoch": 1.4240719910011248, "grad_norm": 1.8336620330810547, "learning_rate": 9.768608058258347e-05, "loss": 0.7700809836387634, "step": 1266, "token_acc": 0.7735399284862932 }, { "epoch": 1.425196850393701, "grad_norm": 1.8333510160446167, "learning_rate": 9.768048433645904e-05, "loss": 0.8285579085350037, "step": 1267, "token_acc": 0.75 }, { "epoch": 1.4263217097862766, "grad_norm": 1.9015469551086426, "learning_rate": 9.767488149189738e-05, "loss": 0.8775292634963989, "step": 1268, "token_acc": 0.7423442449841605 }, { "epoch": 1.4274465691788527, "grad_norm": 2.2239274978637695, "learning_rate": 9.766927204967382e-05, "loss": 0.9342637658119202, "step": 1269, "token_acc": 0.7653061224489796 }, { "epoch": 1.4285714285714286, "grad_norm": 1.889794111251831, "learning_rate": 9.766365601056468e-05, "loss": 0.8183103799819946, "step": 1270, "token_acc": 0.7620111731843575 }, { "epoch": 1.4296962879640045, "grad_norm": 1.7303168773651123, "learning_rate": 9.765803337534713e-05, "loss": 0.791266679763794, "step": 1271, "token_acc": 0.7571569595261599 }, { "epoch": 1.4308211473565804, "grad_norm": 1.5996417999267578, "learning_rate": 9.76524041447993e-05, "loss": 0.6836048364639282, "step": 1272, "token_acc": 0.7973856209150327 }, { "epoch": 1.4319460067491563, "grad_norm": 2.207568645477295, "learning_rate": 9.764676831970019e-05, "loss": 0.7749098539352417, "step": 1273, "token_acc": 0.7661406025824964 }, { "epoch": 1.4330708661417324, "grad_norm": 1.988532304763794, "learning_rate": 9.764112590082974e-05, "loss": 0.8843616247177124, "step": 1274, "token_acc": 0.7408343868520859 }, { "epoch": 1.434195725534308, "grad_norm": 1.6053600311279297, "learning_rate": 9.763547688896882e-05, "loss": 0.7723551988601685, "step": 1275, "token_acc": 0.7938461538461539 }, { "epoch": 1.4353205849268842, "grad_norm": 1.6843782663345337, "learning_rate": 9.762982128489918e-05, "loss": 0.5772839784622192, "step": 1276, "token_acc": 0.821664464993395 }, { "epoch": 1.43644544431946, "grad_norm": 1.5588475465774536, "learning_rate": 9.762415908940346e-05, "loss": 0.6251198053359985, "step": 1277, "token_acc": 0.7946611909650924 }, { "epoch": 1.437570303712036, "grad_norm": 1.7692241668701172, "learning_rate": 9.761849030326526e-05, "loss": 0.7360280156135559, "step": 1278, "token_acc": 0.79 }, { "epoch": 1.4386951631046119, "grad_norm": 1.5717484951019287, "learning_rate": 9.76128149272691e-05, "loss": 0.7253256440162659, "step": 1279, "token_acc": 0.7936016511867905 }, { "epoch": 1.4398200224971878, "grad_norm": 1.8375296592712402, "learning_rate": 9.760713296220036e-05, "loss": 0.6739622354507446, "step": 1280, "token_acc": 0.7832929782082324 }, { "epoch": 1.4409448818897639, "grad_norm": 1.8137038946151733, "learning_rate": 9.760144440884538e-05, "loss": 0.6599689722061157, "step": 1281, "token_acc": 0.7838541666666666 }, { "epoch": 1.4420697412823398, "grad_norm": 1.8411439657211304, "learning_rate": 9.759574926799136e-05, "loss": 0.7943038940429688, "step": 1282, "token_acc": 0.7561761546723953 }, { "epoch": 1.4431946006749157, "grad_norm": 1.7458146810531616, "learning_rate": 9.759004754042648e-05, "loss": 0.8005959987640381, "step": 1283, "token_acc": 0.7597137014314929 }, { "epoch": 1.4443194600674916, "grad_norm": 1.6995097398757935, "learning_rate": 9.758433922693977e-05, "loss": 0.6551939249038696, "step": 1284, "token_acc": 0.8095238095238095 }, { "epoch": 1.4454443194600675, "grad_norm": 1.8232816457748413, "learning_rate": 9.757862432832121e-05, "loss": 0.7159420847892761, "step": 1285, "token_acc": 0.7802585193889542 }, { "epoch": 1.4465691788526434, "grad_norm": 1.8640668392181396, "learning_rate": 9.757290284536166e-05, "loss": 0.7907788157463074, "step": 1286, "token_acc": 0.7604519774011299 }, { "epoch": 1.4476940382452193, "grad_norm": 1.6229958534240723, "learning_rate": 9.756717477885291e-05, "loss": 0.5743993520736694, "step": 1287, "token_acc": 0.8256658595641646 }, { "epoch": 1.4488188976377954, "grad_norm": 2.051156759262085, "learning_rate": 9.756144012958768e-05, "loss": 0.8179750442504883, "step": 1288, "token_acc": 0.7723270440251573 }, { "epoch": 1.4499437570303713, "grad_norm": 2.0995967388153076, "learning_rate": 9.755569889835957e-05, "loss": 0.6708594560623169, "step": 1289, "token_acc": 0.8006134969325154 }, { "epoch": 1.4510686164229472, "grad_norm": 1.6339095830917358, "learning_rate": 9.75499510859631e-05, "loss": 0.9024752378463745, "step": 1290, "token_acc": 0.7597292724196277 }, { "epoch": 1.452193475815523, "grad_norm": 1.7340854406356812, "learning_rate": 9.754419669319367e-05, "loss": 0.633522629737854, "step": 1291, "token_acc": 0.8146811070998796 }, { "epoch": 1.453318335208099, "grad_norm": 1.7753891944885254, "learning_rate": 9.753843572084768e-05, "loss": 0.926228404045105, "step": 1292, "token_acc": 0.7408793264733395 }, { "epoch": 1.4544431946006748, "grad_norm": 1.7404507398605347, "learning_rate": 9.753266816972238e-05, "loss": 0.9048328399658203, "step": 1293, "token_acc": 0.7375954198473282 }, { "epoch": 1.4555680539932507, "grad_norm": 2.0704681873321533, "learning_rate": 9.752689404061587e-05, "loss": 0.7450323104858398, "step": 1294, "token_acc": 0.7982989064398542 }, { "epoch": 1.4566929133858268, "grad_norm": 1.7027411460876465, "learning_rate": 9.752111333432728e-05, "loss": 0.7891813516616821, "step": 1295, "token_acc": 0.775178026449644 }, { "epoch": 1.4578177727784027, "grad_norm": 1.9491533041000366, "learning_rate": 9.751532605165656e-05, "loss": 0.7491126656532288, "step": 1296, "token_acc": 0.7664516129032258 }, { "epoch": 1.4589426321709786, "grad_norm": 1.9767087697982788, "learning_rate": 9.750953219340464e-05, "loss": 0.7956689596176147, "step": 1297, "token_acc": 0.7660738714090287 }, { "epoch": 1.4600674915635545, "grad_norm": 1.7191522121429443, "learning_rate": 9.75037317603733e-05, "loss": 0.5245246291160583, "step": 1298, "token_acc": 0.8247863247863247 }, { "epoch": 1.4611923509561304, "grad_norm": 1.6487728357315063, "learning_rate": 9.749792475336528e-05, "loss": 0.6939521431922913, "step": 1299, "token_acc": 0.7914847161572053 }, { "epoch": 1.4623172103487065, "grad_norm": 1.7074658870697021, "learning_rate": 9.749211117318415e-05, "loss": 0.7518646121025085, "step": 1300, "token_acc": 0.7726809378185525 }, { "epoch": 1.4634420697412822, "grad_norm": 1.6952484846115112, "learning_rate": 9.748629102063449e-05, "loss": 0.7014018297195435, "step": 1301, "token_acc": 0.793836344314559 }, { "epoch": 1.4645669291338583, "grad_norm": 1.7389861345291138, "learning_rate": 9.748046429652173e-05, "loss": 0.5717225074768066, "step": 1302, "token_acc": 0.8091428571428572 }, { "epoch": 1.4656917885264342, "grad_norm": 1.6615065336227417, "learning_rate": 9.747463100165224e-05, "loss": 0.6474238634109497, "step": 1303, "token_acc": 0.8069977426636569 }, { "epoch": 1.4668166479190101, "grad_norm": 1.8130065202713013, "learning_rate": 9.746879113683325e-05, "loss": 0.923785924911499, "step": 1304, "token_acc": 0.7349192863211554 }, { "epoch": 1.467941507311586, "grad_norm": 1.921948790550232, "learning_rate": 9.746294470287293e-05, "loss": 0.7309877872467041, "step": 1305, "token_acc": 0.7812895069532237 }, { "epoch": 1.469066366704162, "grad_norm": 2.2245960235595703, "learning_rate": 9.74570917005804e-05, "loss": 0.946930468082428, "step": 1306, "token_acc": 0.7396184062850729 }, { "epoch": 1.470191226096738, "grad_norm": 1.6648904085159302, "learning_rate": 9.745123213076562e-05, "loss": 0.6086941957473755, "step": 1307, "token_acc": 0.8131370328425821 }, { "epoch": 1.471316085489314, "grad_norm": 1.9086030721664429, "learning_rate": 9.744536599423949e-05, "loss": 0.7538081407546997, "step": 1308, "token_acc": 0.7730582524271845 }, { "epoch": 1.4724409448818898, "grad_norm": 1.5561943054199219, "learning_rate": 9.743949329181381e-05, "loss": 0.5634579658508301, "step": 1309, "token_acc": 0.8288393903868698 }, { "epoch": 1.4735658042744657, "grad_norm": 1.8888092041015625, "learning_rate": 9.743361402430131e-05, "loss": 0.7846359014511108, "step": 1310, "token_acc": 0.7604602510460251 }, { "epoch": 1.4746906636670416, "grad_norm": 1.78713858127594, "learning_rate": 9.74277281925156e-05, "loss": 0.6515089273452759, "step": 1311, "token_acc": 0.803921568627451 }, { "epoch": 1.4758155230596175, "grad_norm": 1.748324990272522, "learning_rate": 9.742183579727124e-05, "loss": 0.7014302015304565, "step": 1312, "token_acc": 0.7842364532019704 }, { "epoch": 1.4769403824521934, "grad_norm": 1.6675328016281128, "learning_rate": 9.741593683938363e-05, "loss": 0.8017094135284424, "step": 1313, "token_acc": 0.7618657937806874 }, { "epoch": 1.4780652418447695, "grad_norm": 1.848507285118103, "learning_rate": 9.741003131966915e-05, "loss": 0.6621315479278564, "step": 1314, "token_acc": 0.7969661610268378 }, { "epoch": 1.4791901012373454, "grad_norm": 1.9900027513504028, "learning_rate": 9.740411923894503e-05, "loss": 0.7675366401672363, "step": 1315, "token_acc": 0.7830423940149626 }, { "epoch": 1.4803149606299213, "grad_norm": 2.0351758003234863, "learning_rate": 9.739820059802947e-05, "loss": 0.6506272554397583, "step": 1316, "token_acc": 0.7860326894502229 }, { "epoch": 1.4814398200224972, "grad_norm": 1.6520016193389893, "learning_rate": 9.739227539774152e-05, "loss": 0.7621671557426453, "step": 1317, "token_acc": 0.7740805604203153 }, { "epoch": 1.482564679415073, "grad_norm": 1.6872179508209229, "learning_rate": 9.738634363890117e-05, "loss": 0.7885518074035645, "step": 1318, "token_acc": 0.7647058823529411 }, { "epoch": 1.483689538807649, "grad_norm": 1.859019160270691, "learning_rate": 9.738040532232928e-05, "loss": 0.6186896562576294, "step": 1319, "token_acc": 0.8124207858048162 }, { "epoch": 1.4848143982002249, "grad_norm": 1.8278850317001343, "learning_rate": 9.737446044884769e-05, "loss": 0.7232296466827393, "step": 1320, "token_acc": 0.7642369020501139 }, { "epoch": 1.485939257592801, "grad_norm": 2.032841920852661, "learning_rate": 9.736850901927907e-05, "loss": 0.6058595776557922, "step": 1321, "token_acc": 0.7977369165487977 }, { "epoch": 1.4870641169853769, "grad_norm": 1.9351966381072998, "learning_rate": 9.736255103444704e-05, "loss": 0.6959415674209595, "step": 1322, "token_acc": 0.7871657754010695 }, { "epoch": 1.4881889763779528, "grad_norm": 1.7987730503082275, "learning_rate": 9.735658649517614e-05, "loss": 0.8056268692016602, "step": 1323, "token_acc": 0.7717277486910995 }, { "epoch": 1.4893138357705287, "grad_norm": 1.7400997877120972, "learning_rate": 9.735061540229174e-05, "loss": 0.8132789134979248, "step": 1324, "token_acc": 0.7741935483870968 }, { "epoch": 1.4904386951631046, "grad_norm": 1.7300899028778076, "learning_rate": 9.734463775662023e-05, "loss": 0.7353576421737671, "step": 1325, "token_acc": 0.7801418439716312 }, { "epoch": 1.4915635545556805, "grad_norm": 1.8891255855560303, "learning_rate": 9.733865355898881e-05, "loss": 0.8941518664360046, "step": 1326, "token_acc": 0.752539242843952 }, { "epoch": 1.4926884139482564, "grad_norm": 1.9761435985565186, "learning_rate": 9.733266281022563e-05, "loss": 0.6713072061538696, "step": 1327, "token_acc": 0.7961432506887053 }, { "epoch": 1.4938132733408325, "grad_norm": 1.8733869791030884, "learning_rate": 9.732666551115977e-05, "loss": 0.8226813077926636, "step": 1328, "token_acc": 0.7566241413150148 }, { "epoch": 1.4949381327334084, "grad_norm": 1.8701136112213135, "learning_rate": 9.732066166262115e-05, "loss": 0.7274206876754761, "step": 1329, "token_acc": 0.780952380952381 }, { "epoch": 1.4960629921259843, "grad_norm": 1.931969165802002, "learning_rate": 9.731465126544065e-05, "loss": 0.8187294006347656, "step": 1330, "token_acc": 0.760845383759733 }, { "epoch": 1.4971878515185602, "grad_norm": 2.0192835330963135, "learning_rate": 9.730863432045006e-05, "loss": 0.6779778003692627, "step": 1331, "token_acc": 0.7745358090185677 }, { "epoch": 1.498312710911136, "grad_norm": 1.8837276697158813, "learning_rate": 9.730261082848202e-05, "loss": 0.8615982532501221, "step": 1332, "token_acc": 0.7337142857142858 }, { "epoch": 1.4994375703037122, "grad_norm": 1.5223217010498047, "learning_rate": 9.729658079037013e-05, "loss": 0.6129337549209595, "step": 1333, "token_acc": 0.8145251396648044 }, { "epoch": 1.5005624296962878, "grad_norm": 1.8298872709274292, "learning_rate": 9.72905442069489e-05, "loss": 0.669506311416626, "step": 1334, "token_acc": 0.7981651376146789 }, { "epoch": 1.501687289088864, "grad_norm": 1.5530656576156616, "learning_rate": 9.728450107905368e-05, "loss": 0.6966613531112671, "step": 1335, "token_acc": 0.7885714285714286 }, { "epoch": 1.5028121484814398, "grad_norm": 1.8138720989227295, "learning_rate": 9.72784514075208e-05, "loss": 0.8728063702583313, "step": 1336, "token_acc": 0.7391752577319588 }, { "epoch": 1.5039370078740157, "grad_norm": 1.7958743572235107, "learning_rate": 9.727239519318747e-05, "loss": 0.712306559085846, "step": 1337, "token_acc": 0.7846674182638106 }, { "epoch": 1.5050618672665916, "grad_norm": 1.6768786907196045, "learning_rate": 9.72663324368918e-05, "loss": 0.7438738942146301, "step": 1338, "token_acc": 0.7808880308880309 }, { "epoch": 1.5061867266591675, "grad_norm": 1.7522246837615967, "learning_rate": 9.726026313947279e-05, "loss": 0.6229562759399414, "step": 1339, "token_acc": 0.805699481865285 }, { "epoch": 1.5073115860517436, "grad_norm": 1.8854902982711792, "learning_rate": 9.725418730177037e-05, "loss": 0.7127248644828796, "step": 1340, "token_acc": 0.7934336525307798 }, { "epoch": 1.5084364454443193, "grad_norm": 1.8320726156234741, "learning_rate": 9.724810492462537e-05, "loss": 0.8240312933921814, "step": 1341, "token_acc": 0.771539206195547 }, { "epoch": 1.5095613048368954, "grad_norm": 1.5118968486785889, "learning_rate": 9.724201600887953e-05, "loss": 0.6226657629013062, "step": 1342, "token_acc": 0.8028953229398663 }, { "epoch": 1.5106861642294713, "grad_norm": 1.9833155870437622, "learning_rate": 9.723592055537544e-05, "loss": 0.8345725536346436, "step": 1343, "token_acc": 0.7684964200477327 }, { "epoch": 1.5118110236220472, "grad_norm": 1.8500995635986328, "learning_rate": 9.72298185649567e-05, "loss": 0.7686733603477478, "step": 1344, "token_acc": 0.7623862487360971 }, { "epoch": 1.5129358830146231, "grad_norm": 1.8084831237792969, "learning_rate": 9.722371003846775e-05, "loss": 0.7874232530593872, "step": 1345, "token_acc": 0.7660069848661234 }, { "epoch": 1.514060742407199, "grad_norm": 1.8857593536376953, "learning_rate": 9.721759497675391e-05, "loss": 0.8956812024116516, "step": 1346, "token_acc": 0.7504970178926441 }, { "epoch": 1.5151856017997751, "grad_norm": 1.5979782342910767, "learning_rate": 9.721147338066144e-05, "loss": 0.7997124195098877, "step": 1347, "token_acc": 0.75 }, { "epoch": 1.5163104611923508, "grad_norm": 1.6410024166107178, "learning_rate": 9.720534525103753e-05, "loss": 0.563124418258667, "step": 1348, "token_acc": 0.8121761658031088 }, { "epoch": 1.517435320584927, "grad_norm": 1.8595960140228271, "learning_rate": 9.71992105887302e-05, "loss": 0.7897274494171143, "step": 1349, "token_acc": 0.7732484076433122 }, { "epoch": 1.5185601799775028, "grad_norm": 1.7038620710372925, "learning_rate": 9.719306939458845e-05, "loss": 0.6517189145088196, "step": 1350, "token_acc": 0.7893544733861835 }, { "epoch": 1.5196850393700787, "grad_norm": 1.8878889083862305, "learning_rate": 9.718692166946215e-05, "loss": 0.7811403274536133, "step": 1351, "token_acc": 0.7673267326732673 }, { "epoch": 1.5208098987626548, "grad_norm": 1.670458436012268, "learning_rate": 9.718076741420205e-05, "loss": 0.6956831216812134, "step": 1352, "token_acc": 0.801829268292683 }, { "epoch": 1.5219347581552305, "grad_norm": 1.803709864616394, "learning_rate": 9.717460662965986e-05, "loss": 0.7034800052642822, "step": 1353, "token_acc": 0.7713598074608905 }, { "epoch": 1.5230596175478066, "grad_norm": 1.9704458713531494, "learning_rate": 9.716843931668813e-05, "loss": 0.6987758874893188, "step": 1354, "token_acc": 0.7899280575539568 }, { "epoch": 1.5241844769403825, "grad_norm": 1.9194990396499634, "learning_rate": 9.716226547614038e-05, "loss": 0.738502025604248, "step": 1355, "token_acc": 0.7850707850707851 }, { "epoch": 1.5253093363329584, "grad_norm": 2.080815553665161, "learning_rate": 9.715608510887099e-05, "loss": 0.7544739246368408, "step": 1356, "token_acc": 0.7686676427525623 }, { "epoch": 1.5264341957255343, "grad_norm": 1.7875864505767822, "learning_rate": 9.714989821573523e-05, "loss": 0.7000281810760498, "step": 1357, "token_acc": 0.77625 }, { "epoch": 1.5275590551181102, "grad_norm": 1.4154778718948364, "learning_rate": 9.714370479758932e-05, "loss": 0.5367105007171631, "step": 1358, "token_acc": 0.8429833169774289 }, { "epoch": 1.5286839145106863, "grad_norm": 1.6498692035675049, "learning_rate": 9.713750485529035e-05, "loss": 0.7713258266448975, "step": 1359, "token_acc": 0.7862679955703211 }, { "epoch": 1.529808773903262, "grad_norm": 1.8343068361282349, "learning_rate": 9.713129838969632e-05, "loss": 0.6998307704925537, "step": 1360, "token_acc": 0.8002481389578163 }, { "epoch": 1.530933633295838, "grad_norm": 1.8491013050079346, "learning_rate": 9.712508540166614e-05, "loss": 0.7695513963699341, "step": 1361, "token_acc": 0.7724550898203593 }, { "epoch": 1.532058492688414, "grad_norm": 1.6141505241394043, "learning_rate": 9.71188658920596e-05, "loss": 0.658767580986023, "step": 1362, "token_acc": 0.7963302752293578 }, { "epoch": 1.5331833520809899, "grad_norm": 1.6715033054351807, "learning_rate": 9.711263986173745e-05, "loss": 0.8239041566848755, "step": 1363, "token_acc": 0.7642418930762489 }, { "epoch": 1.5343082114735658, "grad_norm": 1.8121368885040283, "learning_rate": 9.710640731156126e-05, "loss": 0.6018253564834595, "step": 1364, "token_acc": 0.8254397834912043 }, { "epoch": 1.5354330708661417, "grad_norm": 2.0022566318511963, "learning_rate": 9.710016824239356e-05, "loss": 0.7930772304534912, "step": 1365, "token_acc": 0.7538461538461538 }, { "epoch": 1.5365579302587178, "grad_norm": 1.621789813041687, "learning_rate": 9.709392265509776e-05, "loss": 0.7104809284210205, "step": 1366, "token_acc": 0.7766355140186916 }, { "epoch": 1.5376827896512935, "grad_norm": 1.9076693058013916, "learning_rate": 9.708767055053818e-05, "loss": 0.6152622699737549, "step": 1367, "token_acc": 0.8299595141700404 }, { "epoch": 1.5388076490438696, "grad_norm": 1.7017995119094849, "learning_rate": 9.708141192958005e-05, "loss": 0.8580192923545837, "step": 1368, "token_acc": 0.7505399568034558 }, { "epoch": 1.5399325084364455, "grad_norm": 1.7878401279449463, "learning_rate": 9.707514679308949e-05, "loss": 0.8375182151794434, "step": 1369, "token_acc": 0.7527308838133069 }, { "epoch": 1.5410573678290214, "grad_norm": 1.7070989608764648, "learning_rate": 9.706887514193352e-05, "loss": 0.8602302074432373, "step": 1370, "token_acc": 0.7440982058545798 }, { "epoch": 1.5421822272215973, "grad_norm": 1.7873244285583496, "learning_rate": 9.706259697698006e-05, "loss": 0.9163176417350769, "step": 1371, "token_acc": 0.7323008849557522 }, { "epoch": 1.5433070866141732, "grad_norm": 1.8640317916870117, "learning_rate": 9.705631229909793e-05, "loss": 0.8907846212387085, "step": 1372, "token_acc": 0.753177966101695 }, { "epoch": 1.5444319460067493, "grad_norm": 1.851341724395752, "learning_rate": 9.705002110915689e-05, "loss": 0.8127912878990173, "step": 1373, "token_acc": 0.7409261576971214 }, { "epoch": 1.545556805399325, "grad_norm": 1.9492478370666504, "learning_rate": 9.704372340802754e-05, "loss": 0.8794480562210083, "step": 1374, "token_acc": 0.7364253393665159 }, { "epoch": 1.546681664791901, "grad_norm": 1.7409145832061768, "learning_rate": 9.703741919658143e-05, "loss": 0.7625657916069031, "step": 1375, "token_acc": 0.7770562770562771 }, { "epoch": 1.547806524184477, "grad_norm": 1.5248419046401978, "learning_rate": 9.703110847569095e-05, "loss": 0.6226625442504883, "step": 1376, "token_acc": 0.8008998875140607 }, { "epoch": 1.5489313835770528, "grad_norm": 1.9184167385101318, "learning_rate": 9.702479124622949e-05, "loss": 0.6635210514068604, "step": 1377, "token_acc": 0.7896678966789668 }, { "epoch": 1.550056242969629, "grad_norm": 1.5083121061325073, "learning_rate": 9.701846750907125e-05, "loss": 0.6907526254653931, "step": 1378, "token_acc": 0.7895238095238095 }, { "epoch": 1.5511811023622046, "grad_norm": 1.9391305446624756, "learning_rate": 9.701213726509137e-05, "loss": 0.8261804580688477, "step": 1379, "token_acc": 0.7537960954446855 }, { "epoch": 1.5523059617547807, "grad_norm": 1.8136626482009888, "learning_rate": 9.700580051516587e-05, "loss": 0.8471862077713013, "step": 1380, "token_acc": 0.7576754385964912 }, { "epoch": 1.5534308211473564, "grad_norm": 1.8484894037246704, "learning_rate": 9.69994572601717e-05, "loss": 0.8043135404586792, "step": 1381, "token_acc": 0.7638724911452184 }, { "epoch": 1.5545556805399325, "grad_norm": 2.1591031551361084, "learning_rate": 9.69931075009867e-05, "loss": 0.6966294646263123, "step": 1382, "token_acc": 0.8032786885245902 }, { "epoch": 1.5556805399325084, "grad_norm": 1.5941929817199707, "learning_rate": 9.69867512384896e-05, "loss": 0.5368881225585938, "step": 1383, "token_acc": 0.8331273176761433 }, { "epoch": 1.5568053993250843, "grad_norm": 1.7610316276550293, "learning_rate": 9.698038847356001e-05, "loss": 0.6590207815170288, "step": 1384, "token_acc": 0.7827547592385219 }, { "epoch": 1.5579302587176604, "grad_norm": 1.7967594861984253, "learning_rate": 9.69740192070785e-05, "loss": 0.7765593528747559, "step": 1385, "token_acc": 0.773838630806846 }, { "epoch": 1.5590551181102361, "grad_norm": 1.5153254270553589, "learning_rate": 9.696764343992647e-05, "loss": 0.7205203175544739, "step": 1386, "token_acc": 0.7950413223140496 }, { "epoch": 1.5601799775028122, "grad_norm": 1.7236665487289429, "learning_rate": 9.696126117298628e-05, "loss": 0.7578774690628052, "step": 1387, "token_acc": 0.7730844793713163 }, { "epoch": 1.5613048368953881, "grad_norm": 1.7608128786087036, "learning_rate": 9.695487240714116e-05, "loss": 0.9285554885864258, "step": 1388, "token_acc": 0.7241071428571428 }, { "epoch": 1.562429696287964, "grad_norm": 1.822867751121521, "learning_rate": 9.694847714327524e-05, "loss": 0.803697943687439, "step": 1389, "token_acc": 0.7703549060542797 }, { "epoch": 1.56355455568054, "grad_norm": 2.0293190479278564, "learning_rate": 9.694207538227355e-05, "loss": 0.8527755737304688, "step": 1390, "token_acc": 0.7324675324675325 }, { "epoch": 1.5646794150731158, "grad_norm": 1.4838424921035767, "learning_rate": 9.693566712502202e-05, "loss": 0.764399528503418, "step": 1391, "token_acc": 0.7841004184100419 }, { "epoch": 1.565804274465692, "grad_norm": 1.8052135705947876, "learning_rate": 9.692925237240747e-05, "loss": 0.7975213527679443, "step": 1392, "token_acc": 0.7525667351129364 }, { "epoch": 1.5669291338582676, "grad_norm": 1.9726526737213135, "learning_rate": 9.692283112531766e-05, "loss": 0.7017114162445068, "step": 1393, "token_acc": 0.7962962962962963 }, { "epoch": 1.5680539932508437, "grad_norm": 1.676390528678894, "learning_rate": 9.691640338464118e-05, "loss": 0.6886359453201294, "step": 1394, "token_acc": 0.7898009950248757 }, { "epoch": 1.5691788526434196, "grad_norm": 1.752204179763794, "learning_rate": 9.690996915126758e-05, "loss": 0.6115972995758057, "step": 1395, "token_acc": 0.8027465667915106 }, { "epoch": 1.5703037120359955, "grad_norm": 1.6857422590255737, "learning_rate": 9.690352842608727e-05, "loss": 0.7994945049285889, "step": 1396, "token_acc": 0.7556818181818182 }, { "epoch": 1.5714285714285714, "grad_norm": 1.7518057823181152, "learning_rate": 9.689708120999159e-05, "loss": 0.7055900692939758, "step": 1397, "token_acc": 0.781940441882805 }, { "epoch": 1.5725534308211473, "grad_norm": 1.7264916896820068, "learning_rate": 9.689062750387275e-05, "loss": 0.8606637716293335, "step": 1398, "token_acc": 0.7524461839530333 }, { "epoch": 1.5736782902137234, "grad_norm": 1.7806628942489624, "learning_rate": 9.688416730862388e-05, "loss": 0.7207500338554382, "step": 1399, "token_acc": 0.768837803320562 }, { "epoch": 1.574803149606299, "grad_norm": 1.8418354988098145, "learning_rate": 9.687770062513897e-05, "loss": 0.7492187023162842, "step": 1400, "token_acc": 0.758578431372549 }, { "epoch": 1.5759280089988752, "grad_norm": 1.581300973892212, "learning_rate": 9.687122745431298e-05, "loss": 0.5796877145767212, "step": 1401, "token_acc": 0.8118811881188119 }, { "epoch": 1.577052868391451, "grad_norm": 1.7891093492507935, "learning_rate": 9.686474779704168e-05, "loss": 0.8412790298461914, "step": 1402, "token_acc": 0.7449306296691569 }, { "epoch": 1.578177727784027, "grad_norm": 2.2768874168395996, "learning_rate": 9.685826165422181e-05, "loss": 0.8059755563735962, "step": 1403, "token_acc": 0.7451612903225806 }, { "epoch": 1.5793025871766029, "grad_norm": 1.6986849308013916, "learning_rate": 9.685176902675097e-05, "loss": 0.7303980588912964, "step": 1404, "token_acc": 0.7863534675615212 }, { "epoch": 1.5804274465691788, "grad_norm": 1.7959972620010376, "learning_rate": 9.684526991552766e-05, "loss": 0.7450390458106995, "step": 1405, "token_acc": 0.7663656884875847 }, { "epoch": 1.581552305961755, "grad_norm": 1.65813410282135, "learning_rate": 9.683876432145129e-05, "loss": 0.8170745968818665, "step": 1406, "token_acc": 0.759090909090909 }, { "epoch": 1.5826771653543306, "grad_norm": 1.7246389389038086, "learning_rate": 9.683225224542215e-05, "loss": 0.8053788542747498, "step": 1407, "token_acc": 0.748 }, { "epoch": 1.5838020247469067, "grad_norm": 1.543239712715149, "learning_rate": 9.682573368834144e-05, "loss": 0.5944617986679077, "step": 1408, "token_acc": 0.8169014084507042 }, { "epoch": 1.5849268841394826, "grad_norm": 1.7629978656768799, "learning_rate": 9.681920865111128e-05, "loss": 0.7268193960189819, "step": 1409, "token_acc": 0.7900990099009901 }, { "epoch": 1.5860517435320585, "grad_norm": 1.7670410871505737, "learning_rate": 9.681267713463464e-05, "loss": 0.7608370780944824, "step": 1410, "token_acc": 0.7767955801104972 }, { "epoch": 1.5871766029246346, "grad_norm": 1.6530665159225464, "learning_rate": 9.680613913981539e-05, "loss": 0.7083959579467773, "step": 1411, "token_acc": 0.7869700103412617 }, { "epoch": 1.5883014623172103, "grad_norm": 1.9113222360610962, "learning_rate": 9.679959466755834e-05, "loss": 0.7734941244125366, "step": 1412, "token_acc": 0.7740784780023782 }, { "epoch": 1.5894263217097864, "grad_norm": 1.9833587408065796, "learning_rate": 9.679304371876916e-05, "loss": 0.7341743111610413, "step": 1413, "token_acc": 0.7884615384615384 }, { "epoch": 1.590551181102362, "grad_norm": 1.839881420135498, "learning_rate": 9.678648629435443e-05, "loss": 0.845933198928833, "step": 1414, "token_acc": 0.7547357926221336 }, { "epoch": 1.5916760404949382, "grad_norm": 2.0898563861846924, "learning_rate": 9.677992239522162e-05, "loss": 0.8031039237976074, "step": 1415, "token_acc": 0.7641083521444695 }, { "epoch": 1.592800899887514, "grad_norm": 1.9246793985366821, "learning_rate": 9.67733520222791e-05, "loss": 0.7634053230285645, "step": 1416, "token_acc": 0.775609756097561 }, { "epoch": 1.59392575928009, "grad_norm": 1.8288131952285767, "learning_rate": 9.676677517643616e-05, "loss": 0.7299681901931763, "step": 1417, "token_acc": 0.7705357142857143 }, { "epoch": 1.595050618672666, "grad_norm": 1.9538449048995972, "learning_rate": 9.676019185860291e-05, "loss": 0.7612862586975098, "step": 1418, "token_acc": 0.781294964028777 }, { "epoch": 1.5961754780652417, "grad_norm": 1.8645164966583252, "learning_rate": 9.675360206969044e-05, "loss": 0.7202759981155396, "step": 1419, "token_acc": 0.7736486486486487 }, { "epoch": 1.5973003374578179, "grad_norm": 1.7318061590194702, "learning_rate": 9.67470058106107e-05, "loss": 0.8507455587387085, "step": 1420, "token_acc": 0.7572254335260116 }, { "epoch": 1.5984251968503937, "grad_norm": 1.7112807035446167, "learning_rate": 9.674040308227653e-05, "loss": 0.6420460939407349, "step": 1421, "token_acc": 0.7926829268292683 }, { "epoch": 1.5995500562429696, "grad_norm": 1.8606221675872803, "learning_rate": 9.673379388560169e-05, "loss": 0.648901104927063, "step": 1422, "token_acc": 0.7958115183246073 }, { "epoch": 1.6006749156355455, "grad_norm": 1.8965367078781128, "learning_rate": 9.672717822150079e-05, "loss": 0.7787694931030273, "step": 1423, "token_acc": 0.7801507537688442 }, { "epoch": 1.6017997750281214, "grad_norm": 1.7736899852752686, "learning_rate": 9.672055609088937e-05, "loss": 0.7128429412841797, "step": 1424, "token_acc": 0.7693169092945129 }, { "epoch": 1.6029246344206975, "grad_norm": 1.7163761854171753, "learning_rate": 9.671392749468386e-05, "loss": 0.8073232173919678, "step": 1425, "token_acc": 0.7669902912621359 }, { "epoch": 1.6040494938132732, "grad_norm": 1.9441415071487427, "learning_rate": 9.67072924338016e-05, "loss": 0.8035223484039307, "step": 1426, "token_acc": 0.7651098901098901 }, { "epoch": 1.6051743532058493, "grad_norm": 2.256533145904541, "learning_rate": 9.670065090916078e-05, "loss": 0.7466013431549072, "step": 1427, "token_acc": 0.762987012987013 }, { "epoch": 1.6062992125984252, "grad_norm": 1.9851672649383545, "learning_rate": 9.669400292168054e-05, "loss": 0.8533090353012085, "step": 1428, "token_acc": 0.7695906432748538 }, { "epoch": 1.6074240719910011, "grad_norm": 2.2155325412750244, "learning_rate": 9.668734847228087e-05, "loss": 0.9080935120582581, "step": 1429, "token_acc": 0.7325428194993412 }, { "epoch": 1.608548931383577, "grad_norm": 1.7181415557861328, "learning_rate": 9.668068756188266e-05, "loss": 0.7983996272087097, "step": 1430, "token_acc": 0.7547892720306514 }, { "epoch": 1.609673790776153, "grad_norm": 1.906917929649353, "learning_rate": 9.667402019140773e-05, "loss": 0.8175126910209656, "step": 1431, "token_acc": 0.7624565469293163 }, { "epoch": 1.610798650168729, "grad_norm": 1.512007474899292, "learning_rate": 9.666734636177874e-05, "loss": 0.8324935436248779, "step": 1432, "token_acc": 0.746141975308642 }, { "epoch": 1.6119235095613047, "grad_norm": 1.9603153467178345, "learning_rate": 9.666066607391929e-05, "loss": 0.7566289901733398, "step": 1433, "token_acc": 0.7777777777777778 }, { "epoch": 1.6130483689538808, "grad_norm": 1.8640258312225342, "learning_rate": 9.665397932875385e-05, "loss": 0.795473039150238, "step": 1434, "token_acc": 0.764021164021164 }, { "epoch": 1.6141732283464567, "grad_norm": 1.653085470199585, "learning_rate": 9.664728612720777e-05, "loss": 0.7605360150337219, "step": 1435, "token_acc": 0.7752161383285303 }, { "epoch": 1.6152980877390326, "grad_norm": 1.8560841083526611, "learning_rate": 9.664058647020735e-05, "loss": 0.9201065301895142, "step": 1436, "token_acc": 0.7286902286902287 }, { "epoch": 1.6164229471316085, "grad_norm": 1.8098540306091309, "learning_rate": 9.663388035867975e-05, "loss": 0.7353401184082031, "step": 1437, "token_acc": 0.7797695262483995 }, { "epoch": 1.6175478065241844, "grad_norm": 1.620861530303955, "learning_rate": 9.662716779355299e-05, "loss": 0.8163566589355469, "step": 1438, "token_acc": 0.7652411282984531 }, { "epoch": 1.6186726659167605, "grad_norm": 1.9525617361068726, "learning_rate": 9.6620448775756e-05, "loss": 0.8036710619926453, "step": 1439, "token_acc": 0.7505422993492408 }, { "epoch": 1.6197975253093362, "grad_norm": 2.0406270027160645, "learning_rate": 9.661372330621866e-05, "loss": 0.7897688150405884, "step": 1440, "token_acc": 0.7621283255086072 }, { "epoch": 1.6209223847019123, "grad_norm": 1.7322843074798584, "learning_rate": 9.660699138587169e-05, "loss": 0.8951771259307861, "step": 1441, "token_acc": 0.7546125461254612 }, { "epoch": 1.6220472440944882, "grad_norm": 1.7582863569259644, "learning_rate": 9.660025301564666e-05, "loss": 0.6833369135856628, "step": 1442, "token_acc": 0.7970822281167109 }, { "epoch": 1.623172103487064, "grad_norm": 1.9112730026245117, "learning_rate": 9.659350819647614e-05, "loss": 0.8051807880401611, "step": 1443, "token_acc": 0.762982689747004 }, { "epoch": 1.6242969628796402, "grad_norm": 1.935504674911499, "learning_rate": 9.658675692929354e-05, "loss": 0.7039178609848022, "step": 1444, "token_acc": 0.7866666666666666 }, { "epoch": 1.6254218222722159, "grad_norm": 1.7041739225387573, "learning_rate": 9.657999921503311e-05, "loss": 0.67329341173172, "step": 1445, "token_acc": 0.793400286944046 }, { "epoch": 1.626546681664792, "grad_norm": 1.9272243976593018, "learning_rate": 9.657323505463007e-05, "loss": 0.6889788508415222, "step": 1446, "token_acc": 0.793400286944046 }, { "epoch": 1.6276715410573677, "grad_norm": 1.7027548551559448, "learning_rate": 9.656646444902051e-05, "loss": 0.8277428150177002, "step": 1447, "token_acc": 0.7667057444314185 }, { "epoch": 1.6287964004499438, "grad_norm": 1.748950481414795, "learning_rate": 9.65596873991414e-05, "loss": 0.8693366050720215, "step": 1448, "token_acc": 0.7557436517533253 }, { "epoch": 1.6299212598425197, "grad_norm": 1.5587151050567627, "learning_rate": 9.65529039059306e-05, "loss": 0.7282986640930176, "step": 1449, "token_acc": 0.7747653806047967 }, { "epoch": 1.6310461192350956, "grad_norm": 1.8327234983444214, "learning_rate": 9.654611397032686e-05, "loss": 0.7343782186508179, "step": 1450, "token_acc": 0.7702552719200888 }, { "epoch": 1.6321709786276717, "grad_norm": 1.7205097675323486, "learning_rate": 9.653931759326984e-05, "loss": 0.6278746724128723, "step": 1451, "token_acc": 0.7840909090909091 }, { "epoch": 1.6332958380202474, "grad_norm": 1.6391233205795288, "learning_rate": 9.653251477570008e-05, "loss": 0.7932133674621582, "step": 1452, "token_acc": 0.779 }, { "epoch": 1.6344206974128235, "grad_norm": 1.8558634519577026, "learning_rate": 9.652570551855904e-05, "loss": 1.0268654823303223, "step": 1453, "token_acc": 0.7164634146341463 }, { "epoch": 1.6355455568053994, "grad_norm": 1.673924207687378, "learning_rate": 9.6518889822789e-05, "loss": 0.8145999908447266, "step": 1454, "token_acc": 0.7689530685920578 }, { "epoch": 1.6366704161979753, "grad_norm": 1.7984392642974854, "learning_rate": 9.651206768933321e-05, "loss": 0.7712310552597046, "step": 1455, "token_acc": 0.7912087912087912 }, { "epoch": 1.6377952755905512, "grad_norm": 1.6140010356903076, "learning_rate": 9.650523911913573e-05, "loss": 0.6238501071929932, "step": 1456, "token_acc": 0.8089480048367593 }, { "epoch": 1.638920134983127, "grad_norm": 1.7844727039337158, "learning_rate": 9.649840411314161e-05, "loss": 0.8968210816383362, "step": 1457, "token_acc": 0.7311608961303462 }, { "epoch": 1.6400449943757032, "grad_norm": 1.767529010772705, "learning_rate": 9.64915626722967e-05, "loss": 0.7058347463607788, "step": 1458, "token_acc": 0.7926208651399491 }, { "epoch": 1.6411698537682788, "grad_norm": 1.6716680526733398, "learning_rate": 9.64847147975478e-05, "loss": 0.7739843726158142, "step": 1459, "token_acc": 0.7694610778443114 }, { "epoch": 1.642294713160855, "grad_norm": 2.0047247409820557, "learning_rate": 9.647786048984257e-05, "loss": 0.8961246013641357, "step": 1460, "token_acc": 0.7468354430379747 }, { "epoch": 1.6434195725534309, "grad_norm": 1.6570442914962769, "learning_rate": 9.647099975012957e-05, "loss": 0.7589145302772522, "step": 1461, "token_acc": 0.7576028622540251 }, { "epoch": 1.6445444319460067, "grad_norm": 2.0136284828186035, "learning_rate": 9.646413257935822e-05, "loss": 0.8781553506851196, "step": 1462, "token_acc": 0.7395411605937922 }, { "epoch": 1.6456692913385826, "grad_norm": 1.5938788652420044, "learning_rate": 9.645725897847891e-05, "loss": 0.82892906665802, "step": 1463, "token_acc": 0.7627416520210897 }, { "epoch": 1.6467941507311585, "grad_norm": 1.6295124292373657, "learning_rate": 9.645037894844282e-05, "loss": 0.7947424650192261, "step": 1464, "token_acc": 0.776824034334764 }, { "epoch": 1.6479190101237347, "grad_norm": 2.193061351776123, "learning_rate": 9.644349249020211e-05, "loss": 0.9107636213302612, "step": 1465, "token_acc": 0.7382645803698435 }, { "epoch": 1.6490438695163103, "grad_norm": 1.5697104930877686, "learning_rate": 9.643659960470977e-05, "loss": 0.7404510974884033, "step": 1466, "token_acc": 0.7624413145539906 }, { "epoch": 1.6501687289088864, "grad_norm": 1.9063115119934082, "learning_rate": 9.642970029291968e-05, "loss": 0.8955010175704956, "step": 1467, "token_acc": 0.7412429378531074 }, { "epoch": 1.6512935883014623, "grad_norm": 1.5212599039077759, "learning_rate": 9.642279455578665e-05, "loss": 0.6793057918548584, "step": 1468, "token_acc": 0.793709528214616 }, { "epoch": 1.6524184476940382, "grad_norm": 1.6556702852249146, "learning_rate": 9.641588239426633e-05, "loss": 0.7024556994438171, "step": 1469, "token_acc": 0.7795874049945711 }, { "epoch": 1.6535433070866141, "grad_norm": 1.761672019958496, "learning_rate": 9.640896380931532e-05, "loss": 0.808295726776123, "step": 1470, "token_acc": 0.765979381443299 }, { "epoch": 1.65466816647919, "grad_norm": 1.739671230316162, "learning_rate": 9.640203880189105e-05, "loss": 0.8118762969970703, "step": 1471, "token_acc": 0.7751423149905123 }, { "epoch": 1.6557930258717661, "grad_norm": 1.5452371835708618, "learning_rate": 9.639510737295187e-05, "loss": 0.6593228578567505, "step": 1472, "token_acc": 0.7971153846153847 }, { "epoch": 1.6569178852643418, "grad_norm": 1.5082138776779175, "learning_rate": 9.6388169523457e-05, "loss": 0.6944828033447266, "step": 1473, "token_acc": 0.7899628252788105 }, { "epoch": 1.658042744656918, "grad_norm": 1.9256149530410767, "learning_rate": 9.638122525436658e-05, "loss": 0.684680700302124, "step": 1474, "token_acc": 0.7697841726618705 }, { "epoch": 1.6591676040494938, "grad_norm": 1.6295816898345947, "learning_rate": 9.637427456664158e-05, "loss": 0.7229764461517334, "step": 1475, "token_acc": 0.8085339168490153 }, { "epoch": 1.6602924634420697, "grad_norm": 2.0572237968444824, "learning_rate": 9.636731746124395e-05, "loss": 0.9412330389022827, "step": 1476, "token_acc": 0.7334878331402086 }, { "epoch": 1.6614173228346458, "grad_norm": 1.8045775890350342, "learning_rate": 9.636035393913643e-05, "loss": 0.7392874360084534, "step": 1477, "token_acc": 0.7877030162412993 }, { "epoch": 1.6625421822272215, "grad_norm": 1.9226405620574951, "learning_rate": 9.635338400128272e-05, "loss": 0.66661536693573, "step": 1478, "token_acc": 0.7917808219178082 }, { "epoch": 1.6636670416197976, "grad_norm": 1.651412844657898, "learning_rate": 9.634640764864736e-05, "loss": 0.7879306077957153, "step": 1479, "token_acc": 0.7718940936863544 }, { "epoch": 1.6647919010123733, "grad_norm": 1.4487844705581665, "learning_rate": 9.63394248821958e-05, "loss": 0.7109265327453613, "step": 1480, "token_acc": 0.7824909747292419 }, { "epoch": 1.6659167604049494, "grad_norm": 1.5773717164993286, "learning_rate": 9.63324357028944e-05, "loss": 0.7123584747314453, "step": 1481, "token_acc": 0.7816205533596838 }, { "epoch": 1.6670416197975253, "grad_norm": 1.6009548902511597, "learning_rate": 9.632544011171036e-05, "loss": 0.6860480308532715, "step": 1482, "token_acc": 0.7805611222444889 }, { "epoch": 1.6681664791901012, "grad_norm": 1.6979395151138306, "learning_rate": 9.631843810961179e-05, "loss": 0.6114160418510437, "step": 1483, "token_acc": 0.8179453836150845 }, { "epoch": 1.6692913385826773, "grad_norm": 1.666253685951233, "learning_rate": 9.63114296975677e-05, "loss": 0.8422726988792419, "step": 1484, "token_acc": 0.7462422634836428 }, { "epoch": 1.670416197975253, "grad_norm": 1.7799205780029297, "learning_rate": 9.630441487654796e-05, "loss": 0.7374563813209534, "step": 1485, "token_acc": 0.7936842105263158 }, { "epoch": 1.671541057367829, "grad_norm": 1.5865483283996582, "learning_rate": 9.629739364752335e-05, "loss": 0.6913084983825684, "step": 1486, "token_acc": 0.7953281423804227 }, { "epoch": 1.672665916760405, "grad_norm": 2.0092852115631104, "learning_rate": 9.629036601146552e-05, "loss": 0.645480751991272, "step": 1487, "token_acc": 0.8039867109634552 }, { "epoch": 1.6737907761529809, "grad_norm": 1.6715439558029175, "learning_rate": 9.628333196934703e-05, "loss": 0.8204668760299683, "step": 1488, "token_acc": 0.7630799605133267 }, { "epoch": 1.6749156355455568, "grad_norm": 1.9615956544876099, "learning_rate": 9.62762915221413e-05, "loss": 0.7010423541069031, "step": 1489, "token_acc": 0.7685643564356436 }, { "epoch": 1.6760404949381327, "grad_norm": 1.7833293676376343, "learning_rate": 9.626924467082266e-05, "loss": 0.8053275346755981, "step": 1490, "token_acc": 0.7649717514124293 }, { "epoch": 1.6771653543307088, "grad_norm": 2.099898338317871, "learning_rate": 9.626219141636631e-05, "loss": 0.641876220703125, "step": 1491, "token_acc": 0.7939778129952456 }, { "epoch": 1.6782902137232845, "grad_norm": 1.9493602514266968, "learning_rate": 9.625513175974833e-05, "loss": 0.5184808373451233, "step": 1492, "token_acc": 0.8392330383480826 }, { "epoch": 1.6794150731158606, "grad_norm": 1.7791684865951538, "learning_rate": 9.62480657019457e-05, "loss": 0.7537258863449097, "step": 1493, "token_acc": 0.7633410672853829 }, { "epoch": 1.6805399325084365, "grad_norm": 1.5763388872146606, "learning_rate": 9.624099324393629e-05, "loss": 0.7862507700920105, "step": 1494, "token_acc": 0.7698492462311558 }, { "epoch": 1.6816647919010124, "grad_norm": 1.8818155527114868, "learning_rate": 9.623391438669883e-05, "loss": 0.715083122253418, "step": 1495, "token_acc": 0.7834645669291339 }, { "epoch": 1.6827896512935883, "grad_norm": 1.9088916778564453, "learning_rate": 9.622682913121297e-05, "loss": 0.7403081655502319, "step": 1496, "token_acc": 0.776685393258427 }, { "epoch": 1.6839145106861642, "grad_norm": 1.5670634508132935, "learning_rate": 9.621973747845922e-05, "loss": 0.9687299728393555, "step": 1497, "token_acc": 0.7262830482115086 }, { "epoch": 1.6850393700787403, "grad_norm": 1.8961052894592285, "learning_rate": 9.621263942941901e-05, "loss": 0.738644003868103, "step": 1498, "token_acc": 0.7833125778331258 }, { "epoch": 1.686164229471316, "grad_norm": 1.7032445669174194, "learning_rate": 9.62055349850746e-05, "loss": 0.5837999582290649, "step": 1499, "token_acc": 0.8246913580246914 }, { "epoch": 1.687289088863892, "grad_norm": 1.823991298675537, "learning_rate": 9.619842414640916e-05, "loss": 0.8125043511390686, "step": 1500, "token_acc": 0.7792682926829269 }, { "epoch": 1.687289088863892, "eval_loss": 0.8873237371444702, "eval_runtime": 31.6384, "eval_samples_per_second": 25.381, "eval_steps_per_second": 3.192, "eval_token_acc": 0.74143718393216, "step": 1500 }, { "epoch": 1.688413948256468, "grad_norm": 1.600449800491333, "learning_rate": 9.619130691440677e-05, "loss": 0.6375556588172913, "step": 1501, "token_acc": 0.7922848664688428 }, { "epoch": 1.6895388076490439, "grad_norm": 1.789552092552185, "learning_rate": 9.618418329005237e-05, "loss": 0.5046806335449219, "step": 1502, "token_acc": 0.8225165562913908 }, { "epoch": 1.6906636670416197, "grad_norm": 1.587049961090088, "learning_rate": 9.617705327433178e-05, "loss": 0.8297234773635864, "step": 1503, "token_acc": 0.7619477006311993 }, { "epoch": 1.6917885264341956, "grad_norm": 1.7454675436019897, "learning_rate": 9.616991686823171e-05, "loss": 0.882544994354248, "step": 1504, "token_acc": 0.7325966850828729 }, { "epoch": 1.6929133858267718, "grad_norm": 1.835809588432312, "learning_rate": 9.616277407273976e-05, "loss": 0.6782231330871582, "step": 1505, "token_acc": 0.7795371498172959 }, { "epoch": 1.6940382452193474, "grad_norm": 1.7508492469787598, "learning_rate": 9.615562488884445e-05, "loss": 0.7568434476852417, "step": 1506, "token_acc": 0.7627118644067796 }, { "epoch": 1.6951631046119235, "grad_norm": 1.8113493919372559, "learning_rate": 9.61484693175351e-05, "loss": 0.7250678539276123, "step": 1507, "token_acc": 0.7800224466891134 }, { "epoch": 1.6962879640044994, "grad_norm": 1.7381081581115723, "learning_rate": 9.614130735980196e-05, "loss": 0.6448031067848206, "step": 1508, "token_acc": 0.8079545454545455 }, { "epoch": 1.6974128233970753, "grad_norm": 1.7468410730361938, "learning_rate": 9.613413901663617e-05, "loss": 0.8022958040237427, "step": 1509, "token_acc": 0.7600806451612904 }, { "epoch": 1.6985376827896514, "grad_norm": 1.6891659498214722, "learning_rate": 9.612696428902976e-05, "loss": 0.7417396903038025, "step": 1510, "token_acc": 0.7815587266739846 }, { "epoch": 1.6996625421822271, "grad_norm": 2.0261213779449463, "learning_rate": 9.611978317797563e-05, "loss": 0.7225087881088257, "step": 1511, "token_acc": 0.7938808373590982 }, { "epoch": 1.7007874015748032, "grad_norm": 1.8429064750671387, "learning_rate": 9.611259568446755e-05, "loss": 0.8613573908805847, "step": 1512, "token_acc": 0.7515605493133583 }, { "epoch": 1.7019122609673791, "grad_norm": 1.7497820854187012, "learning_rate": 9.610540180950021e-05, "loss": 0.7620080709457397, "step": 1513, "token_acc": 0.7823275862068966 }, { "epoch": 1.703037120359955, "grad_norm": 1.893460750579834, "learning_rate": 9.609820155406914e-05, "loss": 0.8893815279006958, "step": 1514, "token_acc": 0.7374599786552828 }, { "epoch": 1.704161979752531, "grad_norm": 1.5621525049209595, "learning_rate": 9.609099491917077e-05, "loss": 0.6098935604095459, "step": 1515, "token_acc": 0.8161389172625128 }, { "epoch": 1.7052868391451068, "grad_norm": 1.8354387283325195, "learning_rate": 9.608378190580246e-05, "loss": 0.6173191666603088, "step": 1516, "token_acc": 0.7798833819241983 }, { "epoch": 1.706411698537683, "grad_norm": 1.903664469718933, "learning_rate": 9.607656251496234e-05, "loss": 0.9374033808708191, "step": 1517, "token_acc": 0.738 }, { "epoch": 1.7075365579302586, "grad_norm": 1.5468684434890747, "learning_rate": 9.606933674764953e-05, "loss": 0.6710147857666016, "step": 1518, "token_acc": 0.781785392245266 }, { "epoch": 1.7086614173228347, "grad_norm": 1.4853596687316895, "learning_rate": 9.6062104604864e-05, "loss": 0.653039813041687, "step": 1519, "token_acc": 0.8073807380738074 }, { "epoch": 1.7097862767154106, "grad_norm": 1.6581809520721436, "learning_rate": 9.605486608760659e-05, "loss": 0.779392659664154, "step": 1520, "token_acc": 0.7642857142857142 }, { "epoch": 1.7109111361079865, "grad_norm": 1.888263463973999, "learning_rate": 9.604762119687901e-05, "loss": 0.6617695093154907, "step": 1521, "token_acc": 0.8002577319587629 }, { "epoch": 1.7120359955005624, "grad_norm": 1.953006386756897, "learning_rate": 9.604036993368389e-05, "loss": 0.8207992911338806, "step": 1522, "token_acc": 0.7538644470868014 }, { "epoch": 1.7131608548931383, "grad_norm": 1.845354437828064, "learning_rate": 9.603311229902473e-05, "loss": 0.828414797782898, "step": 1523, "token_acc": 0.7379619260918253 }, { "epoch": 1.7142857142857144, "grad_norm": 1.8481301069259644, "learning_rate": 9.602584829390588e-05, "loss": 0.9024078845977783, "step": 1524, "token_acc": 0.7428851815505397 }, { "epoch": 1.71541057367829, "grad_norm": 2.002997398376465, "learning_rate": 9.601857791933263e-05, "loss": 0.7095556259155273, "step": 1525, "token_acc": 0.7795620437956204 }, { "epoch": 1.7165354330708662, "grad_norm": 1.6362152099609375, "learning_rate": 9.601130117631108e-05, "loss": 0.6270558834075928, "step": 1526, "token_acc": 0.8180764774044033 }, { "epoch": 1.717660292463442, "grad_norm": 2.1669137477874756, "learning_rate": 9.600401806584827e-05, "loss": 1.0274498462677002, "step": 1527, "token_acc": 0.7348951911220715 }, { "epoch": 1.718785151856018, "grad_norm": 1.844222903251648, "learning_rate": 9.599672858895209e-05, "loss": 0.7232675552368164, "step": 1528, "token_acc": 0.7881773399014779 }, { "epoch": 1.7199100112485939, "grad_norm": 1.4796621799468994, "learning_rate": 9.598943274663133e-05, "loss": 0.7590517997741699, "step": 1529, "token_acc": 0.7822374039282665 }, { "epoch": 1.7210348706411698, "grad_norm": 1.7882061004638672, "learning_rate": 9.598213053989564e-05, "loss": 0.7267932891845703, "step": 1530, "token_acc": 0.774331550802139 }, { "epoch": 1.722159730033746, "grad_norm": 1.7976888418197632, "learning_rate": 9.597482196975558e-05, "loss": 0.8711785674095154, "step": 1531, "token_acc": 0.7607476635514019 }, { "epoch": 1.7232845894263216, "grad_norm": 1.8306421041488647, "learning_rate": 9.596750703722255e-05, "loss": 0.7901278734207153, "step": 1532, "token_acc": 0.7635270541082164 }, { "epoch": 1.7244094488188977, "grad_norm": 1.8687702417373657, "learning_rate": 9.596018574330887e-05, "loss": 0.8606181144714355, "step": 1533, "token_acc": 0.7508690614136733 }, { "epoch": 1.7255343082114736, "grad_norm": 1.7070356607437134, "learning_rate": 9.595285808902772e-05, "loss": 0.6710304617881775, "step": 1534, "token_acc": 0.7787913340935005 }, { "epoch": 1.7266591676040495, "grad_norm": 1.9329514503479004, "learning_rate": 9.594552407539317e-05, "loss": 0.7982407808303833, "step": 1535, "token_acc": 0.7565217391304347 }, { "epoch": 1.7277840269966256, "grad_norm": 1.899498462677002, "learning_rate": 9.593818370342015e-05, "loss": 0.7514640092849731, "step": 1536, "token_acc": 0.7704280155642024 }, { "epoch": 1.7289088863892013, "grad_norm": 1.6738090515136719, "learning_rate": 9.59308369741245e-05, "loss": 0.6440417170524597, "step": 1537, "token_acc": 0.786504424778761 }, { "epoch": 1.7300337457817774, "grad_norm": 1.6603108644485474, "learning_rate": 9.59234838885229e-05, "loss": 0.7451006174087524, "step": 1538, "token_acc": 0.7694770544290288 }, { "epoch": 1.731158605174353, "grad_norm": 1.7812089920043945, "learning_rate": 9.591612444763297e-05, "loss": 0.7075095176696777, "step": 1539, "token_acc": 0.7636566332218506 }, { "epoch": 1.7322834645669292, "grad_norm": 1.4857760667800903, "learning_rate": 9.590875865247314e-05, "loss": 0.7105891704559326, "step": 1540, "token_acc": 0.8009345794392523 }, { "epoch": 1.733408323959505, "grad_norm": 1.6951133012771606, "learning_rate": 9.590138650406278e-05, "loss": 0.7863790392875671, "step": 1541, "token_acc": 0.7657657657657657 }, { "epoch": 1.734533183352081, "grad_norm": 1.6400033235549927, "learning_rate": 9.589400800342208e-05, "loss": 0.7500651478767395, "step": 1542, "token_acc": 0.7746913580246914 }, { "epoch": 1.735658042744657, "grad_norm": 1.9745416641235352, "learning_rate": 9.588662315157216e-05, "loss": 0.8622907996177673, "step": 1543, "token_acc": 0.7439446366782007 }, { "epoch": 1.7367829021372327, "grad_norm": 1.8816924095153809, "learning_rate": 9.587923194953499e-05, "loss": 0.7928323745727539, "step": 1544, "token_acc": 0.7615819209039548 }, { "epoch": 1.7379077615298089, "grad_norm": 1.7088682651519775, "learning_rate": 9.587183439833344e-05, "loss": 0.7955119609832764, "step": 1545, "token_acc": 0.7626752966558792 }, { "epoch": 1.7390326209223848, "grad_norm": 1.7010594606399536, "learning_rate": 9.586443049899125e-05, "loss": 0.7009035348892212, "step": 1546, "token_acc": 0.7798165137614679 }, { "epoch": 1.7401574803149606, "grad_norm": 1.4288691282272339, "learning_rate": 9.585702025253302e-05, "loss": 0.7020480036735535, "step": 1547, "token_acc": 0.7828096118299446 }, { "epoch": 1.7412823397075365, "grad_norm": 1.979737401008606, "learning_rate": 9.584960365998426e-05, "loss": 0.742642343044281, "step": 1548, "token_acc": 0.7686170212765957 }, { "epoch": 1.7424071991001124, "grad_norm": 1.496243953704834, "learning_rate": 9.584218072237132e-05, "loss": 0.7029868364334106, "step": 1549, "token_acc": 0.7787610619469026 }, { "epoch": 1.7435320584926886, "grad_norm": 1.6605095863342285, "learning_rate": 9.583475144072147e-05, "loss": 0.7361111044883728, "step": 1550, "token_acc": 0.767342582710779 }, { "epoch": 1.7446569178852642, "grad_norm": 1.939010739326477, "learning_rate": 9.582731581606284e-05, "loss": 0.7370983362197876, "step": 1551, "token_acc": 0.7824207492795389 }, { "epoch": 1.7457817772778403, "grad_norm": 1.636982798576355, "learning_rate": 9.581987384942442e-05, "loss": 0.7538416981697083, "step": 1552, "token_acc": 0.7872582480091013 }, { "epoch": 1.7469066366704162, "grad_norm": 1.7748944759368896, "learning_rate": 9.581242554183609e-05, "loss": 0.9313595294952393, "step": 1553, "token_acc": 0.7251336898395722 }, { "epoch": 1.7480314960629921, "grad_norm": 1.6529055833816528, "learning_rate": 9.580497089432863e-05, "loss": 0.6088218688964844, "step": 1554, "token_acc": 0.8016627078384798 }, { "epoch": 1.749156355455568, "grad_norm": 1.848143219947815, "learning_rate": 9.579750990793368e-05, "loss": 0.645003080368042, "step": 1555, "token_acc": 0.8072669826224329 }, { "epoch": 1.750281214848144, "grad_norm": 1.905320405960083, "learning_rate": 9.579004258368373e-05, "loss": 0.6620921492576599, "step": 1556, "token_acc": 0.7975308641975308 }, { "epoch": 1.75140607424072, "grad_norm": 1.7954484224319458, "learning_rate": 9.578256892261221e-05, "loss": 0.8412375450134277, "step": 1557, "token_acc": 0.7502623294858342 }, { "epoch": 1.7525309336332957, "grad_norm": 1.6702486276626587, "learning_rate": 9.577508892575335e-05, "loss": 0.5399316549301147, "step": 1558, "token_acc": 0.8359264497878359 }, { "epoch": 1.7536557930258718, "grad_norm": 1.6504881381988525, "learning_rate": 9.576760259414233e-05, "loss": 0.6968065500259399, "step": 1559, "token_acc": 0.7806763285024154 }, { "epoch": 1.7547806524184477, "grad_norm": 1.6426668167114258, "learning_rate": 9.576010992881517e-05, "loss": 0.6551632881164551, "step": 1560, "token_acc": 0.8075772681954138 }, { "epoch": 1.7559055118110236, "grad_norm": 1.5624077320098877, "learning_rate": 9.575261093080874e-05, "loss": 0.5856071710586548, "step": 1561, "token_acc": 0.8162217659137577 }, { "epoch": 1.7570303712035995, "grad_norm": 1.9198764562606812, "learning_rate": 9.574510560116083e-05, "loss": 0.7332339286804199, "step": 1562, "token_acc": 0.7840112201963534 }, { "epoch": 1.7581552305961754, "grad_norm": 1.9135196208953857, "learning_rate": 9.573759394091011e-05, "loss": 0.7578145265579224, "step": 1563, "token_acc": 0.7643467643467643 }, { "epoch": 1.7592800899887515, "grad_norm": 1.9486173391342163, "learning_rate": 9.57300759510961e-05, "loss": 0.8030940294265747, "step": 1564, "token_acc": 0.7626321974148061 }, { "epoch": 1.7604049493813272, "grad_norm": 1.7062480449676514, "learning_rate": 9.57225516327592e-05, "loss": 0.7177442312240601, "step": 1565, "token_acc": 0.782249742002064 }, { "epoch": 1.7615298087739033, "grad_norm": 1.8121774196624756, "learning_rate": 9.571502098694067e-05, "loss": 0.6251269578933716, "step": 1566, "token_acc": 0.8185907046476761 }, { "epoch": 1.7626546681664792, "grad_norm": 1.9938162565231323, "learning_rate": 9.57074840146827e-05, "loss": 0.7694512009620667, "step": 1567, "token_acc": 0.7699004975124378 }, { "epoch": 1.763779527559055, "grad_norm": 1.9126063585281372, "learning_rate": 9.569994071702833e-05, "loss": 0.7114837169647217, "step": 1568, "token_acc": 0.7826603325415677 }, { "epoch": 1.7649043869516312, "grad_norm": 1.5938260555267334, "learning_rate": 9.569239109502142e-05, "loss": 0.797705888748169, "step": 1569, "token_acc": 0.7635009310986964 }, { "epoch": 1.7660292463442069, "grad_norm": 1.9114818572998047, "learning_rate": 9.568483514970677e-05, "loss": 0.7721642255783081, "step": 1570, "token_acc": 0.7750677506775068 }, { "epoch": 1.767154105736783, "grad_norm": 1.3544142246246338, "learning_rate": 9.567727288213005e-05, "loss": 0.6539074182510376, "step": 1571, "token_acc": 0.8054187192118226 }, { "epoch": 1.7682789651293587, "grad_norm": 1.8686999082565308, "learning_rate": 9.56697042933378e-05, "loss": 0.729436993598938, "step": 1572, "token_acc": 0.7752043596730245 }, { "epoch": 1.7694038245219348, "grad_norm": 1.7216452360153198, "learning_rate": 9.566212938437739e-05, "loss": 0.653132438659668, "step": 1573, "token_acc": 0.7965367965367965 }, { "epoch": 1.7705286839145107, "grad_norm": 1.8153408765792847, "learning_rate": 9.565454815629713e-05, "loss": 0.6955718994140625, "step": 1574, "token_acc": 0.7908571428571428 }, { "epoch": 1.7716535433070866, "grad_norm": 1.8733937740325928, "learning_rate": 9.564696061014617e-05, "loss": 0.8509937524795532, "step": 1575, "token_acc": 0.7285012285012284 }, { "epoch": 1.7727784026996627, "grad_norm": 1.7573983669281006, "learning_rate": 9.563936674697453e-05, "loss": 0.6228055953979492, "step": 1576, "token_acc": 0.7982885085574573 }, { "epoch": 1.7739032620922384, "grad_norm": 1.9105315208435059, "learning_rate": 9.563176656783314e-05, "loss": 0.8599588871002197, "step": 1577, "token_acc": 0.7421686746987952 }, { "epoch": 1.7750281214848145, "grad_norm": 1.7165391445159912, "learning_rate": 9.562416007377375e-05, "loss": 0.7134194374084473, "step": 1578, "token_acc": 0.7952646239554317 }, { "epoch": 1.7761529808773904, "grad_norm": 1.8561763763427734, "learning_rate": 9.561654726584903e-05, "loss": 0.6917146444320679, "step": 1579, "token_acc": 0.7809076682316118 }, { "epoch": 1.7772778402699663, "grad_norm": 1.6707212924957275, "learning_rate": 9.560892814511251e-05, "loss": 0.9421406388282776, "step": 1580, "token_acc": 0.7206551410373067 }, { "epoch": 1.7784026996625422, "grad_norm": 1.7500338554382324, "learning_rate": 9.560130271261856e-05, "loss": 0.7798848152160645, "step": 1581, "token_acc": 0.7675736961451247 }, { "epoch": 1.779527559055118, "grad_norm": 1.7368364334106445, "learning_rate": 9.559367096942248e-05, "loss": 0.7192870378494263, "step": 1582, "token_acc": 0.8072139303482587 }, { "epoch": 1.7806524184476942, "grad_norm": 1.7294416427612305, "learning_rate": 9.558603291658042e-05, "loss": 0.7768459320068359, "step": 1583, "token_acc": 0.7802313354363828 }, { "epoch": 1.7817772778402698, "grad_norm": 2.0249297618865967, "learning_rate": 9.557838855514938e-05, "loss": 0.9283678531646729, "step": 1584, "token_acc": 0.7354430379746836 }, { "epoch": 1.782902137232846, "grad_norm": 1.6896170377731323, "learning_rate": 9.557073788618726e-05, "loss": 0.7880818843841553, "step": 1585, "token_acc": 0.7745535714285714 }, { "epoch": 1.7840269966254219, "grad_norm": 1.6808385848999023, "learning_rate": 9.556308091075284e-05, "loss": 0.7966892123222351, "step": 1586, "token_acc": 0.773220747889023 }, { "epoch": 1.7851518560179978, "grad_norm": 1.4618715047836304, "learning_rate": 9.555541762990572e-05, "loss": 0.5925179719924927, "step": 1587, "token_acc": 0.825 }, { "epoch": 1.7862767154105736, "grad_norm": 1.7030813694000244, "learning_rate": 9.554774804470646e-05, "loss": 0.7964495420455933, "step": 1588, "token_acc": 0.7446808510638298 }, { "epoch": 1.7874015748031495, "grad_norm": 1.7467448711395264, "learning_rate": 9.554007215621641e-05, "loss": 0.8202598094940186, "step": 1589, "token_acc": 0.7661498708010336 }, { "epoch": 1.7885264341957257, "grad_norm": 1.6272717714309692, "learning_rate": 9.553238996549785e-05, "loss": 0.6487069725990295, "step": 1590, "token_acc": 0.8067331670822943 }, { "epoch": 1.7896512935883013, "grad_norm": 1.7959299087524414, "learning_rate": 9.552470147361388e-05, "loss": 0.7145756483078003, "step": 1591, "token_acc": 0.7863013698630137 }, { "epoch": 1.7907761529808774, "grad_norm": 1.764815330505371, "learning_rate": 9.551700668162853e-05, "loss": 0.742464542388916, "step": 1592, "token_acc": 0.7590361445783133 }, { "epoch": 1.7919010123734533, "grad_norm": 1.4768366813659668, "learning_rate": 9.550930559060665e-05, "loss": 0.6398870348930359, "step": 1593, "token_acc": 0.791814946619217 }, { "epoch": 1.7930258717660292, "grad_norm": 2.018599510192871, "learning_rate": 9.550159820161397e-05, "loss": 0.8908665180206299, "step": 1594, "token_acc": 0.7441860465116279 }, { "epoch": 1.7941507311586051, "grad_norm": 1.8237091302871704, "learning_rate": 9.549388451571714e-05, "loss": 0.8317153453826904, "step": 1595, "token_acc": 0.7452830188679245 }, { "epoch": 1.795275590551181, "grad_norm": 1.9809300899505615, "learning_rate": 9.548616453398365e-05, "loss": 0.6680772304534912, "step": 1596, "token_acc": 0.78330658105939 }, { "epoch": 1.7964004499437571, "grad_norm": 1.6622661352157593, "learning_rate": 9.547843825748182e-05, "loss": 0.7621951103210449, "step": 1597, "token_acc": 0.7728155339805826 }, { "epoch": 1.7975253093363328, "grad_norm": 1.3489172458648682, "learning_rate": 9.547070568728089e-05, "loss": 0.5502655506134033, "step": 1598, "token_acc": 0.8277119416590701 }, { "epoch": 1.798650168728909, "grad_norm": 1.512636423110962, "learning_rate": 9.546296682445099e-05, "loss": 0.6995335817337036, "step": 1599, "token_acc": 0.7882241215574549 }, { "epoch": 1.7997750281214848, "grad_norm": 1.842943787574768, "learning_rate": 9.545522167006304e-05, "loss": 0.851963222026825, "step": 1600, "token_acc": 0.7533482142857143 }, { "epoch": 1.8008998875140607, "grad_norm": 1.9030086994171143, "learning_rate": 9.544747022518893e-05, "loss": 0.8254432678222656, "step": 1601, "token_acc": 0.7511210762331838 }, { "epoch": 1.8020247469066368, "grad_norm": 1.6795299053192139, "learning_rate": 9.543971249090134e-05, "loss": 0.8391542434692383, "step": 1602, "token_acc": 0.7442273534635879 }, { "epoch": 1.8031496062992125, "grad_norm": 1.8167961835861206, "learning_rate": 9.543194846827388e-05, "loss": 0.7253971099853516, "step": 1603, "token_acc": 0.7713950762016413 }, { "epoch": 1.8042744656917886, "grad_norm": 1.7982884645462036, "learning_rate": 9.542417815838098e-05, "loss": 0.8119543790817261, "step": 1604, "token_acc": 0.7616209773539928 }, { "epoch": 1.8053993250843643, "grad_norm": 1.6700142621994019, "learning_rate": 9.541640156229796e-05, "loss": 0.7900311946868896, "step": 1605, "token_acc": 0.756 }, { "epoch": 1.8065241844769404, "grad_norm": 1.8900045156478882, "learning_rate": 9.540861868110103e-05, "loss": 0.6861236095428467, "step": 1606, "token_acc": 0.7837514934289128 }, { "epoch": 1.8076490438695163, "grad_norm": 2.0353925228118896, "learning_rate": 9.540082951586724e-05, "loss": 0.7463712692260742, "step": 1607, "token_acc": 0.7758152173913043 }, { "epoch": 1.8087739032620922, "grad_norm": 1.7760707139968872, "learning_rate": 9.539303406767453e-05, "loss": 0.6579590439796448, "step": 1608, "token_acc": 0.7901408450704225 }, { "epoch": 1.8098987626546683, "grad_norm": 1.8579721450805664, "learning_rate": 9.538523233760168e-05, "loss": 0.9732891321182251, "step": 1609, "token_acc": 0.7260556127703398 }, { "epoch": 1.811023622047244, "grad_norm": 1.891345500946045, "learning_rate": 9.53774243267284e-05, "loss": 0.752349853515625, "step": 1610, "token_acc": 0.7696078431372549 }, { "epoch": 1.81214848143982, "grad_norm": 1.6309236288070679, "learning_rate": 9.536961003613519e-05, "loss": 0.6381040215492249, "step": 1611, "token_acc": 0.797085201793722 }, { "epoch": 1.813273340832396, "grad_norm": 1.7484692335128784, "learning_rate": 9.536178946690348e-05, "loss": 0.7677179574966431, "step": 1612, "token_acc": 0.7690631808278867 }, { "epoch": 1.814398200224972, "grad_norm": 1.7928972244262695, "learning_rate": 9.535396262011556e-05, "loss": 0.5867965817451477, "step": 1613, "token_acc": 0.8263473053892215 }, { "epoch": 1.8155230596175478, "grad_norm": 2.2256033420562744, "learning_rate": 9.534612949685456e-05, "loss": 0.730548620223999, "step": 1614, "token_acc": 0.8010849909584087 }, { "epoch": 1.8166479190101237, "grad_norm": 1.8365795612335205, "learning_rate": 9.533829009820448e-05, "loss": 0.872665524482727, "step": 1615, "token_acc": 0.7694736842105263 }, { "epoch": 1.8177727784026998, "grad_norm": 1.7398866415023804, "learning_rate": 9.533044442525023e-05, "loss": 0.7748540043830872, "step": 1616, "token_acc": 0.7508813160987075 }, { "epoch": 1.8188976377952755, "grad_norm": 1.567742943763733, "learning_rate": 9.532259247907755e-05, "loss": 0.6563994288444519, "step": 1617, "token_acc": 0.7926008968609866 }, { "epoch": 1.8200224971878516, "grad_norm": 1.7246602773666382, "learning_rate": 9.531473426077306e-05, "loss": 0.6943480968475342, "step": 1618, "token_acc": 0.7891414141414141 }, { "epoch": 1.8211473565804275, "grad_norm": 1.5593489408493042, "learning_rate": 9.530686977142426e-05, "loss": 0.7680904865264893, "step": 1619, "token_acc": 0.7733847637415622 }, { "epoch": 1.8222722159730034, "grad_norm": 1.7727504968643188, "learning_rate": 9.52989990121195e-05, "loss": 0.9369572401046753, "step": 1620, "token_acc": 0.7302564102564103 }, { "epoch": 1.8233970753655793, "grad_norm": 1.703758955001831, "learning_rate": 9.529112198394799e-05, "loss": 0.6251256465911865, "step": 1621, "token_acc": 0.8026960784313726 }, { "epoch": 1.8245219347581552, "grad_norm": 1.5798896551132202, "learning_rate": 9.528323868799984e-05, "loss": 0.8689987659454346, "step": 1622, "token_acc": 0.7409747292418772 }, { "epoch": 1.8256467941507313, "grad_norm": 1.5997227430343628, "learning_rate": 9.5275349125366e-05, "loss": 0.641250729560852, "step": 1623, "token_acc": 0.7963206307490145 }, { "epoch": 1.826771653543307, "grad_norm": 1.8895246982574463, "learning_rate": 9.526745329713829e-05, "loss": 0.7472430467605591, "step": 1624, "token_acc": 0.7897727272727273 }, { "epoch": 1.827896512935883, "grad_norm": 1.807613730430603, "learning_rate": 9.52595512044094e-05, "loss": 0.8152951598167419, "step": 1625, "token_acc": 0.7473275024295433 }, { "epoch": 1.829021372328459, "grad_norm": 1.888350009918213, "learning_rate": 9.52516428482729e-05, "loss": 0.7948753833770752, "step": 1626, "token_acc": 0.7719806763285024 }, { "epoch": 1.8301462317210349, "grad_norm": 1.844931721687317, "learning_rate": 9.524372822982324e-05, "loss": 1.0173770189285278, "step": 1627, "token_acc": 0.7273673257023934 }, { "epoch": 1.8312710911136107, "grad_norm": 1.7653535604476929, "learning_rate": 9.523580735015567e-05, "loss": 0.881141185760498, "step": 1628, "token_acc": 0.7571569595261599 }, { "epoch": 1.8323959505061866, "grad_norm": 1.9928029775619507, "learning_rate": 9.522788021036637e-05, "loss": 0.9171479940414429, "step": 1629, "token_acc": 0.7406451612903225 }, { "epoch": 1.8335208098987628, "grad_norm": 1.5548876523971558, "learning_rate": 9.521994681155235e-05, "loss": 0.7928760647773743, "step": 1630, "token_acc": 0.7676969092721835 }, { "epoch": 1.8346456692913384, "grad_norm": 1.6917634010314941, "learning_rate": 9.521200715481153e-05, "loss": 0.8116762638092041, "step": 1631, "token_acc": 0.7497584541062802 }, { "epoch": 1.8357705286839145, "grad_norm": 1.5637943744659424, "learning_rate": 9.520406124124266e-05, "loss": 0.8345361351966858, "step": 1632, "token_acc": 0.752542372881356 }, { "epoch": 1.8368953880764904, "grad_norm": 1.9676722288131714, "learning_rate": 9.519610907194537e-05, "loss": 0.814874529838562, "step": 1633, "token_acc": 0.7463863337713534 }, { "epoch": 1.8380202474690663, "grad_norm": 1.8168468475341797, "learning_rate": 9.518815064802011e-05, "loss": 0.858005940914154, "step": 1634, "token_acc": 0.7534591194968554 }, { "epoch": 1.8391451068616425, "grad_norm": 1.5247902870178223, "learning_rate": 9.518018597056828e-05, "loss": 0.8203526735305786, "step": 1635, "token_acc": 0.7638376383763837 }, { "epoch": 1.8402699662542181, "grad_norm": 1.5556524991989136, "learning_rate": 9.51722150406921e-05, "loss": 0.8890230655670166, "step": 1636, "token_acc": 0.7502175805047868 }, { "epoch": 1.8413948256467942, "grad_norm": 1.5665075778961182, "learning_rate": 9.516423785949462e-05, "loss": 0.7260273694992065, "step": 1637, "token_acc": 0.7887640449438202 }, { "epoch": 1.84251968503937, "grad_norm": 1.8595765829086304, "learning_rate": 9.515625442807981e-05, "loss": 0.9335322380065918, "step": 1638, "token_acc": 0.7301401869158879 }, { "epoch": 1.843644544431946, "grad_norm": 1.5354253053665161, "learning_rate": 9.51482647475525e-05, "loss": 0.6131775379180908, "step": 1639, "token_acc": 0.80897583429229 }, { "epoch": 1.844769403824522, "grad_norm": 1.2580610513687134, "learning_rate": 9.514026881901837e-05, "loss": 0.5952597856521606, "step": 1640, "token_acc": 0.8195292066259808 }, { "epoch": 1.8458942632170978, "grad_norm": 1.713594913482666, "learning_rate": 9.513226664358392e-05, "loss": 0.9038985967636108, "step": 1641, "token_acc": 0.732484076433121 }, { "epoch": 1.847019122609674, "grad_norm": 1.693753719329834, "learning_rate": 9.512425822235662e-05, "loss": 0.7468829154968262, "step": 1642, "token_acc": 0.7808056872037915 }, { "epoch": 1.8481439820022496, "grad_norm": 1.5695420503616333, "learning_rate": 9.511624355644472e-05, "loss": 0.7838007211685181, "step": 1643, "token_acc": 0.7790821771611526 }, { "epoch": 1.8492688413948257, "grad_norm": 1.5286344289779663, "learning_rate": 9.510822264695736e-05, "loss": 0.7074995040893555, "step": 1644, "token_acc": 0.7716627634660421 }, { "epoch": 1.8503937007874016, "grad_norm": 1.4468286037445068, "learning_rate": 9.510019549500454e-05, "loss": 0.7783020734786987, "step": 1645, "token_acc": 0.7655462184873949 }, { "epoch": 1.8515185601799775, "grad_norm": 1.4983961582183838, "learning_rate": 9.509216210169713e-05, "loss": 0.7212342023849487, "step": 1646, "token_acc": 0.7857142857142857 }, { "epoch": 1.8526434195725534, "grad_norm": 1.8572725057601929, "learning_rate": 9.508412246814686e-05, "loss": 0.938509464263916, "step": 1647, "token_acc": 0.7371188222923238 }, { "epoch": 1.8537682789651293, "grad_norm": 1.6905418634414673, "learning_rate": 9.507607659546631e-05, "loss": 0.7024407982826233, "step": 1648, "token_acc": 0.7896907216494845 }, { "epoch": 1.8548931383577054, "grad_norm": 1.4162760972976685, "learning_rate": 9.506802448476895e-05, "loss": 0.7816365957260132, "step": 1649, "token_acc": 0.7607361963190185 }, { "epoch": 1.856017997750281, "grad_norm": 1.7512391805648804, "learning_rate": 9.505996613716913e-05, "loss": 0.7803499698638916, "step": 1650, "token_acc": 0.7796442687747036 }, { "epoch": 1.8571428571428572, "grad_norm": 1.7646300792694092, "learning_rate": 9.505190155378197e-05, "loss": 0.7131819725036621, "step": 1651, "token_acc": 0.7845433255269321 }, { "epoch": 1.858267716535433, "grad_norm": 1.936454176902771, "learning_rate": 9.504383073572356e-05, "loss": 0.8629712462425232, "step": 1652, "token_acc": 0.7485101311084624 }, { "epoch": 1.859392575928009, "grad_norm": 2.195711851119995, "learning_rate": 9.50357536841108e-05, "loss": 0.6954888105392456, "step": 1653, "token_acc": 0.7684391080617495 }, { "epoch": 1.860517435320585, "grad_norm": 1.790337085723877, "learning_rate": 9.502767040006147e-05, "loss": 0.7129502892494202, "step": 1654, "token_acc": 0.7879194630872484 }, { "epoch": 1.8616422947131608, "grad_norm": 1.68825101852417, "learning_rate": 9.501958088469418e-05, "loss": 0.5654075145721436, "step": 1655, "token_acc": 0.8222523744911805 }, { "epoch": 1.862767154105737, "grad_norm": 1.65273118019104, "learning_rate": 9.501148513912847e-05, "loss": 0.8639925122261047, "step": 1656, "token_acc": 0.7471852610030706 }, { "epoch": 1.8638920134983126, "grad_norm": 1.8670122623443604, "learning_rate": 9.500338316448465e-05, "loss": 0.8307267427444458, "step": 1657, "token_acc": 0.766704416761042 }, { "epoch": 1.8650168728908887, "grad_norm": 1.9441728591918945, "learning_rate": 9.499527496188398e-05, "loss": 0.7455260753631592, "step": 1658, "token_acc": 0.7769679300291545 }, { "epoch": 1.8661417322834646, "grad_norm": 1.8634874820709229, "learning_rate": 9.498716053244853e-05, "loss": 0.7301028370857239, "step": 1659, "token_acc": 0.7827225130890052 }, { "epoch": 1.8672665916760405, "grad_norm": 2.198274612426758, "learning_rate": 9.497903987730124e-05, "loss": 0.8691632151603699, "step": 1660, "token_acc": 0.7585733882030178 }, { "epoch": 1.8683914510686164, "grad_norm": 1.927148461341858, "learning_rate": 9.49709129975659e-05, "loss": 0.8382291197776794, "step": 1661, "token_acc": 0.758576874205845 }, { "epoch": 1.8695163104611923, "grad_norm": 1.5998426675796509, "learning_rate": 9.496277989436722e-05, "loss": 0.6500102281570435, "step": 1662, "token_acc": 0.811534500514933 }, { "epoch": 1.8706411698537684, "grad_norm": 1.751198410987854, "learning_rate": 9.49546405688307e-05, "loss": 0.6638067960739136, "step": 1663, "token_acc": 0.7939698492462312 }, { "epoch": 1.871766029246344, "grad_norm": 1.9019297361373901, "learning_rate": 9.494649502208273e-05, "loss": 0.7248831987380981, "step": 1664, "token_acc": 0.8047619047619048 }, { "epoch": 1.8728908886389202, "grad_norm": 1.7740023136138916, "learning_rate": 9.493834325525058e-05, "loss": 0.8718546628952026, "step": 1665, "token_acc": 0.744973544973545 }, { "epoch": 1.874015748031496, "grad_norm": 1.5582115650177002, "learning_rate": 9.493018526946233e-05, "loss": 0.7902712821960449, "step": 1666, "token_acc": 0.7703213610586012 }, { "epoch": 1.875140607424072, "grad_norm": 1.844992995262146, "learning_rate": 9.492202106584698e-05, "loss": 0.8774718642234802, "step": 1667, "token_acc": 0.7414529914529915 }, { "epoch": 1.876265466816648, "grad_norm": 1.9036647081375122, "learning_rate": 9.491385064553435e-05, "loss": 0.8359167575836182, "step": 1668, "token_acc": 0.7547892720306514 }, { "epoch": 1.8773903262092237, "grad_norm": 1.6645971536636353, "learning_rate": 9.490567400965515e-05, "loss": 0.689802885055542, "step": 1669, "token_acc": 0.7792553191489362 }, { "epoch": 1.8785151856017999, "grad_norm": 1.5381426811218262, "learning_rate": 9.489749115934091e-05, "loss": 0.7008439302444458, "step": 1670, "token_acc": 0.7840531561461794 }, { "epoch": 1.8796400449943758, "grad_norm": 1.6963223218917847, "learning_rate": 9.488930209572407e-05, "loss": 0.8024357557296753, "step": 1671, "token_acc": 0.7648428405122235 }, { "epoch": 1.8807649043869517, "grad_norm": 1.857999324798584, "learning_rate": 9.488110681993787e-05, "loss": 0.8815010786056519, "step": 1672, "token_acc": 0.7471554993678887 }, { "epoch": 1.8818897637795275, "grad_norm": 1.6551004648208618, "learning_rate": 9.487290533311647e-05, "loss": 0.8235596418380737, "step": 1673, "token_acc": 0.75365141187926 }, { "epoch": 1.8830146231721034, "grad_norm": 1.8978996276855469, "learning_rate": 9.486469763639485e-05, "loss": 0.8616473078727722, "step": 1674, "token_acc": 0.738933030646992 }, { "epoch": 1.8841394825646796, "grad_norm": 1.7987024784088135, "learning_rate": 9.485648373090887e-05, "loss": 0.7110544443130493, "step": 1675, "token_acc": 0.8028534370946823 }, { "epoch": 1.8852643419572552, "grad_norm": 1.7784208059310913, "learning_rate": 9.484826361779525e-05, "loss": 0.794674277305603, "step": 1676, "token_acc": 0.7538644470868014 }, { "epoch": 1.8863892013498313, "grad_norm": 1.7361981868743896, "learning_rate": 9.484003729819153e-05, "loss": 0.7926557064056396, "step": 1677, "token_acc": 0.753125 }, { "epoch": 1.8875140607424072, "grad_norm": 1.877676010131836, "learning_rate": 9.483180477323616e-05, "loss": 0.7564446926116943, "step": 1678, "token_acc": 0.7469879518072289 }, { "epoch": 1.8886389201349831, "grad_norm": 1.5682835578918457, "learning_rate": 9.482356604406841e-05, "loss": 0.8291887640953064, "step": 1679, "token_acc": 0.7559055118110236 }, { "epoch": 1.889763779527559, "grad_norm": 1.7418917417526245, "learning_rate": 9.481532111182847e-05, "loss": 0.6500436067581177, "step": 1680, "token_acc": 0.7946666666666666 }, { "epoch": 1.890888638920135, "grad_norm": 1.632265567779541, "learning_rate": 9.48070699776573e-05, "loss": 0.5346728563308716, "step": 1681, "token_acc": 0.8262910798122066 }, { "epoch": 1.892013498312711, "grad_norm": 1.521627426147461, "learning_rate": 9.479881264269678e-05, "loss": 0.6661152839660645, "step": 1682, "token_acc": 0.7876370887337986 }, { "epoch": 1.8931383577052867, "grad_norm": 1.725780963897705, "learning_rate": 9.479054910808962e-05, "loss": 0.7809461355209351, "step": 1683, "token_acc": 0.7634803921568627 }, { "epoch": 1.8942632170978628, "grad_norm": 1.8791929483413696, "learning_rate": 9.478227937497944e-05, "loss": 0.76300048828125, "step": 1684, "token_acc": 0.7566844919786097 }, { "epoch": 1.8953880764904387, "grad_norm": 1.7051489353179932, "learning_rate": 9.477400344451061e-05, "loss": 0.8847031593322754, "step": 1685, "token_acc": 0.7357894736842105 }, { "epoch": 1.8965129358830146, "grad_norm": 1.809470295906067, "learning_rate": 9.476572131782848e-05, "loss": 0.7713286876678467, "step": 1686, "token_acc": 0.7737789203084833 }, { "epoch": 1.8976377952755905, "grad_norm": 1.296880841255188, "learning_rate": 9.475743299607919e-05, "loss": 0.5889440774917603, "step": 1687, "token_acc": 0.8282208588957055 }, { "epoch": 1.8987626546681664, "grad_norm": 1.559215784072876, "learning_rate": 9.474913848040974e-05, "loss": 0.5326864123344421, "step": 1688, "token_acc": 0.8254189944134078 }, { "epoch": 1.8998875140607425, "grad_norm": 1.752658486366272, "learning_rate": 9.474083777196799e-05, "loss": 0.7979961633682251, "step": 1689, "token_acc": 0.7586666666666667 }, { "epoch": 1.9010123734533182, "grad_norm": 1.511483907699585, "learning_rate": 9.473253087190269e-05, "loss": 0.7473036646842957, "step": 1690, "token_acc": 0.7670250896057348 }, { "epoch": 1.9021372328458943, "grad_norm": 1.661537528038025, "learning_rate": 9.472421778136337e-05, "loss": 0.635658860206604, "step": 1691, "token_acc": 0.7964721845318861 }, { "epoch": 1.9032620922384702, "grad_norm": 1.956296443939209, "learning_rate": 9.471589850150053e-05, "loss": 0.8303481340408325, "step": 1692, "token_acc": 0.7479452054794521 }, { "epoch": 1.904386951631046, "grad_norm": 1.8215175867080688, "learning_rate": 9.470757303346543e-05, "loss": 0.756301760673523, "step": 1693, "token_acc": 0.7463768115942029 }, { "epoch": 1.905511811023622, "grad_norm": 1.6346588134765625, "learning_rate": 9.469924137841023e-05, "loss": 0.7389130592346191, "step": 1694, "token_acc": 0.7700650759219089 }, { "epoch": 1.9066366704161979, "grad_norm": 1.4656789302825928, "learning_rate": 9.469090353748793e-05, "loss": 0.749258279800415, "step": 1695, "token_acc": 0.7844254510921178 }, { "epoch": 1.907761529808774, "grad_norm": 2.1797289848327637, "learning_rate": 9.468255951185239e-05, "loss": 0.7483690977096558, "step": 1696, "token_acc": 0.752755905511811 }, { "epoch": 1.9088863892013497, "grad_norm": 1.7010329961776733, "learning_rate": 9.467420930265833e-05, "loss": 0.8153647780418396, "step": 1697, "token_acc": 0.7619631901840491 }, { "epoch": 1.9100112485939258, "grad_norm": 1.800873875617981, "learning_rate": 9.466585291106133e-05, "loss": 0.5670873522758484, "step": 1698, "token_acc": 0.8186813186813187 }, { "epoch": 1.9111361079865017, "grad_norm": 1.7959355115890503, "learning_rate": 9.465749033821782e-05, "loss": 0.6930376291275024, "step": 1699, "token_acc": 0.7971938775510204 }, { "epoch": 1.9122609673790776, "grad_norm": 1.6354743242263794, "learning_rate": 9.464912158528509e-05, "loss": 0.9852370619773865, "step": 1700, "token_acc": 0.7220630372492837 }, { "epoch": 1.9133858267716537, "grad_norm": 1.7560288906097412, "learning_rate": 9.464074665342126e-05, "loss": 0.6613855361938477, "step": 1701, "token_acc": 0.796373779637378 }, { "epoch": 1.9145106861642294, "grad_norm": 1.5348281860351562, "learning_rate": 9.463236554378534e-05, "loss": 0.7480504512786865, "step": 1702, "token_acc": 0.7760279965004374 }, { "epoch": 1.9156355455568055, "grad_norm": 1.8103102445602417, "learning_rate": 9.462397825753717e-05, "loss": 0.8110336065292358, "step": 1703, "token_acc": 0.7556346381969158 }, { "epoch": 1.9167604049493814, "grad_norm": 2.289855718612671, "learning_rate": 9.461558479583748e-05, "loss": 0.9045394659042358, "step": 1704, "token_acc": 0.7214170692431562 }, { "epoch": 1.9178852643419573, "grad_norm": 1.530943751335144, "learning_rate": 9.460718515984779e-05, "loss": 0.54144686460495, "step": 1705, "token_acc": 0.8357664233576643 }, { "epoch": 1.9190101237345332, "grad_norm": 1.7506492137908936, "learning_rate": 9.459877935073056e-05, "loss": 0.7865628004074097, "step": 1706, "token_acc": 0.7708082026537998 }, { "epoch": 1.920134983127109, "grad_norm": 1.7628432512283325, "learning_rate": 9.459036736964901e-05, "loss": 0.7145853638648987, "step": 1707, "token_acc": 0.787012987012987 }, { "epoch": 1.9212598425196852, "grad_norm": 1.7049124240875244, "learning_rate": 9.45819492177673e-05, "loss": 0.8622397184371948, "step": 1708, "token_acc": 0.7595190380761523 }, { "epoch": 1.9223847019122609, "grad_norm": 1.8974320888519287, "learning_rate": 9.45735248962504e-05, "loss": 0.7438486814498901, "step": 1709, "token_acc": 0.7831858407079646 }, { "epoch": 1.923509561304837, "grad_norm": 1.6934027671813965, "learning_rate": 9.456509440626414e-05, "loss": 0.7542759776115417, "step": 1710, "token_acc": 0.7708757637474541 }, { "epoch": 1.9246344206974129, "grad_norm": 1.5411159992218018, "learning_rate": 9.455665774897518e-05, "loss": 0.6811352372169495, "step": 1711, "token_acc": 0.8154327424400417 }, { "epoch": 1.9257592800899888, "grad_norm": 1.8635481595993042, "learning_rate": 9.454821492555109e-05, "loss": 0.6680900454521179, "step": 1712, "token_acc": 0.7908396946564885 }, { "epoch": 1.9268841394825647, "grad_norm": 1.9179677963256836, "learning_rate": 9.453976593716022e-05, "loss": 0.7261338829994202, "step": 1713, "token_acc": 0.7773333333333333 }, { "epoch": 1.9280089988751405, "grad_norm": 1.7538634538650513, "learning_rate": 9.453131078497189e-05, "loss": 0.6874141097068787, "step": 1714, "token_acc": 0.7818791946308725 }, { "epoch": 1.9291338582677167, "grad_norm": 1.770220398902893, "learning_rate": 9.452284947015611e-05, "loss": 0.8487533926963806, "step": 1715, "token_acc": 0.7346938775510204 }, { "epoch": 1.9302587176602923, "grad_norm": 1.8199132680892944, "learning_rate": 9.451438199388387e-05, "loss": 0.7698516249656677, "step": 1716, "token_acc": 0.7742402315484804 }, { "epoch": 1.9313835770528684, "grad_norm": 1.7349438667297363, "learning_rate": 9.450590835732698e-05, "loss": 0.761167049407959, "step": 1717, "token_acc": 0.7843866171003717 }, { "epoch": 1.9325084364454443, "grad_norm": 2.1771140098571777, "learning_rate": 9.449742856165809e-05, "loss": 0.8959881067276001, "step": 1718, "token_acc": 0.7448979591836735 }, { "epoch": 1.9336332958380202, "grad_norm": 1.3454042673110962, "learning_rate": 9.44889426080507e-05, "loss": 0.45067155361175537, "step": 1719, "token_acc": 0.8531073446327684 }, { "epoch": 1.9347581552305961, "grad_norm": 1.4642702341079712, "learning_rate": 9.448045049767916e-05, "loss": 0.5547469258308411, "step": 1720, "token_acc": 0.8157129000969933 }, { "epoch": 1.935883014623172, "grad_norm": 1.75333833694458, "learning_rate": 9.447195223171871e-05, "loss": 0.8574875593185425, "step": 1721, "token_acc": 0.7470525187566989 }, { "epoch": 1.9370078740157481, "grad_norm": 1.8932527303695679, "learning_rate": 9.446344781134539e-05, "loss": 0.9092689752578735, "step": 1722, "token_acc": 0.7487113402061856 }, { "epoch": 1.9381327334083238, "grad_norm": 1.6574608087539673, "learning_rate": 9.445493723773612e-05, "loss": 0.7494881749153137, "step": 1723, "token_acc": 0.7719869706840391 }, { "epoch": 1.9392575928009, "grad_norm": 1.687831163406372, "learning_rate": 9.444642051206867e-05, "loss": 0.7582780718803406, "step": 1724, "token_acc": 0.7693236714975845 }, { "epoch": 1.9403824521934758, "grad_norm": 1.490044116973877, "learning_rate": 9.443789763552165e-05, "loss": 0.8106259107589722, "step": 1725, "token_acc": 0.7685113016367888 }, { "epoch": 1.9415073115860517, "grad_norm": 1.5663930177688599, "learning_rate": 9.442936860927455e-05, "loss": 0.6313512325286865, "step": 1726, "token_acc": 0.8075601374570447 }, { "epoch": 1.9426321709786278, "grad_norm": 1.6084442138671875, "learning_rate": 9.442083343450767e-05, "loss": 0.6729409694671631, "step": 1727, "token_acc": 0.8026859504132231 }, { "epoch": 1.9437570303712035, "grad_norm": 1.5574522018432617, "learning_rate": 9.44122921124022e-05, "loss": 0.6922145485877991, "step": 1728, "token_acc": 0.8048220436280138 }, { "epoch": 1.9448818897637796, "grad_norm": 1.9340596199035645, "learning_rate": 9.440374464414013e-05, "loss": 0.8094472885131836, "step": 1729, "token_acc": 0.7576642335766424 }, { "epoch": 1.9460067491563553, "grad_norm": 2.331212043762207, "learning_rate": 9.439519103090435e-05, "loss": 0.6960645318031311, "step": 1730, "token_acc": 0.7834179357021996 }, { "epoch": 1.9471316085489314, "grad_norm": 1.7614645957946777, "learning_rate": 9.43866312738786e-05, "loss": 0.7475752830505371, "step": 1731, "token_acc": 0.7667804323094426 }, { "epoch": 1.9482564679415073, "grad_norm": 1.6217504739761353, "learning_rate": 9.437806537424743e-05, "loss": 0.6783014535903931, "step": 1732, "token_acc": 0.8043478260869565 }, { "epoch": 1.9493813273340832, "grad_norm": 2.043607711791992, "learning_rate": 9.436949333319628e-05, "loss": 0.7577610015869141, "step": 1733, "token_acc": 0.7732696897374701 }, { "epoch": 1.9505061867266593, "grad_norm": 1.8288496732711792, "learning_rate": 9.436091515191142e-05, "loss": 0.8039014339447021, "step": 1734, "token_acc": 0.7701793721973094 }, { "epoch": 1.951631046119235, "grad_norm": 1.833728313446045, "learning_rate": 9.435233083157995e-05, "loss": 0.7411763668060303, "step": 1735, "token_acc": 0.788 }, { "epoch": 1.952755905511811, "grad_norm": 1.7523689270019531, "learning_rate": 9.434374037338986e-05, "loss": 0.7435811758041382, "step": 1736, "token_acc": 0.7827130852340937 }, { "epoch": 1.953880764904387, "grad_norm": 1.7244371175765991, "learning_rate": 9.433514377852999e-05, "loss": 0.7238813638687134, "step": 1737, "token_acc": 0.7867564534231201 }, { "epoch": 1.955005624296963, "grad_norm": 1.8315660953521729, "learning_rate": 9.432654104818998e-05, "loss": 0.6673648953437805, "step": 1738, "token_acc": 0.7825421133231241 }, { "epoch": 1.9561304836895388, "grad_norm": 2.174734592437744, "learning_rate": 9.431793218356035e-05, "loss": 0.8763272762298584, "step": 1739, "token_acc": 0.7433290978398983 }, { "epoch": 1.9572553430821147, "grad_norm": 1.8126311302185059, "learning_rate": 9.430931718583251e-05, "loss": 0.7756106853485107, "step": 1740, "token_acc": 0.7746319365798414 }, { "epoch": 1.9583802024746908, "grad_norm": 1.575263500213623, "learning_rate": 9.430069605619863e-05, "loss": 0.7957792282104492, "step": 1741, "token_acc": 0.7514078841512469 }, { "epoch": 1.9595050618672665, "grad_norm": 1.7252517938613892, "learning_rate": 9.42920687958518e-05, "loss": 0.7775722146034241, "step": 1742, "token_acc": 0.7788671023965141 }, { "epoch": 1.9606299212598426, "grad_norm": 1.6339845657348633, "learning_rate": 9.428343540598594e-05, "loss": 0.7290801405906677, "step": 1743, "token_acc": 0.7940573770491803 }, { "epoch": 1.9617547806524185, "grad_norm": 1.6852025985717773, "learning_rate": 9.42747958877958e-05, "loss": 0.7462608814239502, "step": 1744, "token_acc": 0.7857142857142857 }, { "epoch": 1.9628796400449944, "grad_norm": 1.9958484172821045, "learning_rate": 9.426615024247698e-05, "loss": 0.594057023525238, "step": 1745, "token_acc": 0.8262195121951219 }, { "epoch": 1.9640044994375703, "grad_norm": 1.9564162492752075, "learning_rate": 9.425749847122595e-05, "loss": 0.8252936601638794, "step": 1746, "token_acc": 0.7534818941504178 }, { "epoch": 1.9651293588301462, "grad_norm": 1.5756064653396606, "learning_rate": 9.424884057524005e-05, "loss": 0.6970826387405396, "step": 1747, "token_acc": 0.7943037974683544 }, { "epoch": 1.9662542182227223, "grad_norm": 1.7236286401748657, "learning_rate": 9.42401765557174e-05, "loss": 0.7378159761428833, "step": 1748, "token_acc": 0.7777777777777778 }, { "epoch": 1.967379077615298, "grad_norm": 1.6261590719223022, "learning_rate": 9.423150641385699e-05, "loss": 0.7851699590682983, "step": 1749, "token_acc": 0.7689075630252101 }, { "epoch": 1.968503937007874, "grad_norm": 1.6708732843399048, "learning_rate": 9.422283015085869e-05, "loss": 0.841571569442749, "step": 1750, "token_acc": 0.747104247104247 }, { "epoch": 1.96962879640045, "grad_norm": 1.7676076889038086, "learning_rate": 9.42141477679232e-05, "loss": 0.8947119116783142, "step": 1751, "token_acc": 0.741701244813278 }, { "epoch": 1.9707536557930259, "grad_norm": 1.522920846939087, "learning_rate": 9.420545926625206e-05, "loss": 0.6645550727844238, "step": 1752, "token_acc": 0.7977099236641222 }, { "epoch": 1.9718785151856018, "grad_norm": 1.8115935325622559, "learning_rate": 9.419676464704764e-05, "loss": 0.7540801167488098, "step": 1753, "token_acc": 0.7620111731843575 }, { "epoch": 1.9730033745781776, "grad_norm": 2.128474473953247, "learning_rate": 9.418806391151321e-05, "loss": 0.7654923796653748, "step": 1754, "token_acc": 0.7606060606060606 }, { "epoch": 1.9741282339707538, "grad_norm": 1.7499016523361206, "learning_rate": 9.417935706085282e-05, "loss": 0.7347002029418945, "step": 1755, "token_acc": 0.7753705815279361 }, { "epoch": 1.9752530933633294, "grad_norm": 1.8632292747497559, "learning_rate": 9.417064409627141e-05, "loss": 0.7770131230354309, "step": 1756, "token_acc": 0.7639484978540773 }, { "epoch": 1.9763779527559056, "grad_norm": 1.8077771663665771, "learning_rate": 9.416192501897478e-05, "loss": 0.6661826372146606, "step": 1757, "token_acc": 0.8007915567282322 }, { "epoch": 1.9775028121484814, "grad_norm": 1.615902066230774, "learning_rate": 9.415319983016952e-05, "loss": 0.6819292306900024, "step": 1758, "token_acc": 0.7875586854460094 }, { "epoch": 1.9786276715410573, "grad_norm": 1.871978759765625, "learning_rate": 9.414446853106311e-05, "loss": 0.8485561013221741, "step": 1759, "token_acc": 0.7708609271523179 }, { "epoch": 1.9797525309336335, "grad_norm": 1.8398737907409668, "learning_rate": 9.413573112286385e-05, "loss": 0.8827280402183533, "step": 1760, "token_acc": 0.7448770491803278 }, { "epoch": 1.9808773903262091, "grad_norm": 1.9690579175949097, "learning_rate": 9.412698760678091e-05, "loss": 0.7351076602935791, "step": 1761, "token_acc": 0.7615062761506276 }, { "epoch": 1.9820022497187852, "grad_norm": 1.8016310930252075, "learning_rate": 9.41182379840243e-05, "loss": 0.6049321889877319, "step": 1762, "token_acc": 0.8046153846153846 }, { "epoch": 1.983127109111361, "grad_norm": 1.7863157987594604, "learning_rate": 9.410948225580486e-05, "loss": 0.7700032591819763, "step": 1763, "token_acc": 0.7566964285714286 }, { "epoch": 1.984251968503937, "grad_norm": 1.9508986473083496, "learning_rate": 9.410072042333428e-05, "loss": 0.8398230075836182, "step": 1764, "token_acc": 0.7629911280101395 }, { "epoch": 1.985376827896513, "grad_norm": 1.4609932899475098, "learning_rate": 9.40919524878251e-05, "loss": 0.5954247117042542, "step": 1765, "token_acc": 0.8202020202020202 }, { "epoch": 1.9865016872890888, "grad_norm": 1.646357774734497, "learning_rate": 9.40831784504907e-05, "loss": 0.8968372941017151, "step": 1766, "token_acc": 0.7313807531380753 }, { "epoch": 1.987626546681665, "grad_norm": 2.0092577934265137, "learning_rate": 9.407439831254532e-05, "loss": 0.7474929094314575, "step": 1767, "token_acc": 0.7508650519031141 }, { "epoch": 1.9887514060742406, "grad_norm": 1.314387321472168, "learning_rate": 9.406561207520401e-05, "loss": 0.5247405171394348, "step": 1768, "token_acc": 0.8416030534351145 }, { "epoch": 1.9898762654668167, "grad_norm": 1.994791865348816, "learning_rate": 9.40568197396827e-05, "loss": 0.7608156800270081, "step": 1769, "token_acc": 0.7727930535455861 }, { "epoch": 1.9910011248593926, "grad_norm": 1.7910115718841553, "learning_rate": 9.404802130719816e-05, "loss": 0.822160542011261, "step": 1770, "token_acc": 0.7496740547588006 }, { "epoch": 1.9921259842519685, "grad_norm": 1.5440673828125, "learning_rate": 9.403921677896798e-05, "loss": 0.684856116771698, "step": 1771, "token_acc": 0.7910112359550562 }, { "epoch": 1.9932508436445444, "grad_norm": 1.658937931060791, "learning_rate": 9.40304061562106e-05, "loss": 0.7293343544006348, "step": 1772, "token_acc": 0.7834951456310679 }, { "epoch": 1.9943757030371203, "grad_norm": 1.7766475677490234, "learning_rate": 9.402158944014532e-05, "loss": 0.8108954429626465, "step": 1773, "token_acc": 0.7526627218934911 }, { "epoch": 1.9955005624296964, "grad_norm": 1.7852859497070312, "learning_rate": 9.401276663199226e-05, "loss": 0.6225953102111816, "step": 1774, "token_acc": 0.8042857142857143 }, { "epoch": 1.996625421822272, "grad_norm": 1.6275174617767334, "learning_rate": 9.400393773297242e-05, "loss": 0.7315754890441895, "step": 1775, "token_acc": 0.777542372881356 }, { "epoch": 1.9977502812148482, "grad_norm": 1.8765822649002075, "learning_rate": 9.399510274430759e-05, "loss": 0.6753972768783569, "step": 1776, "token_acc": 0.7983870967741935 }, { "epoch": 1.998875140607424, "grad_norm": 1.6872707605361938, "learning_rate": 9.398626166722046e-05, "loss": 0.7224234938621521, "step": 1777, "token_acc": 0.7837259100642399 }, { "epoch": 2.0, "grad_norm": 1.5951610803604126, "learning_rate": 9.397741450293452e-05, "loss": 0.6941643953323364, "step": 1778, "token_acc": 0.7708947885939036 }, { "epoch": 2.001124859392576, "grad_norm": 1.4529222249984741, "learning_rate": 9.396856125267413e-05, "loss": 0.5729993581771851, "step": 1779, "token_acc": 0.8264738598442715 }, { "epoch": 2.002249718785152, "grad_norm": 1.320229172706604, "learning_rate": 9.395970191766445e-05, "loss": 0.48828279972076416, "step": 1780, "token_acc": 0.8482039397450754 }, { "epoch": 2.003374578177728, "grad_norm": 1.5366593599319458, "learning_rate": 9.395083649913154e-05, "loss": 0.564244270324707, "step": 1781, "token_acc": 0.8359281437125748 }, { "epoch": 2.0044994375703036, "grad_norm": 1.428917646408081, "learning_rate": 9.394196499830225e-05, "loss": 0.5672052502632141, "step": 1782, "token_acc": 0.8319088319088319 }, { "epoch": 2.0056242969628797, "grad_norm": 1.2427958250045776, "learning_rate": 9.393308741640433e-05, "loss": 0.34031832218170166, "step": 1783, "token_acc": 0.886685552407932 }, { "epoch": 2.0067491563554554, "grad_norm": 1.5740464925765991, "learning_rate": 9.39242037546663e-05, "loss": 0.606536328792572, "step": 1784, "token_acc": 0.8187134502923976 }, { "epoch": 2.0078740157480315, "grad_norm": 1.467134952545166, "learning_rate": 9.391531401431758e-05, "loss": 0.6324443817138672, "step": 1785, "token_acc": 0.8028933092224232 }, { "epoch": 2.0089988751406076, "grad_norm": 1.6861579418182373, "learning_rate": 9.39064181965884e-05, "loss": 0.5644378662109375, "step": 1786, "token_acc": 0.8182912154031288 }, { "epoch": 2.0101237345331833, "grad_norm": 1.4811404943466187, "learning_rate": 9.389751630270984e-05, "loss": 0.5523676872253418, "step": 1787, "token_acc": 0.8276157804459692 }, { "epoch": 2.0112485939257594, "grad_norm": 1.5089551210403442, "learning_rate": 9.388860833391382e-05, "loss": 0.4588226079940796, "step": 1788, "token_acc": 0.8419354838709677 }, { "epoch": 2.012373453318335, "grad_norm": 1.7989730834960938, "learning_rate": 9.38796942914331e-05, "loss": 0.549130380153656, "step": 1789, "token_acc": 0.8260292164674635 }, { "epoch": 2.013498312710911, "grad_norm": 1.4946119785308838, "learning_rate": 9.38707741765013e-05, "loss": 0.520561695098877, "step": 1790, "token_acc": 0.8321492007104796 }, { "epoch": 2.014623172103487, "grad_norm": 1.9276400804519653, "learning_rate": 9.386184799035285e-05, "loss": 0.5950493216514587, "step": 1791, "token_acc": 0.8006952491309386 }, { "epoch": 2.015748031496063, "grad_norm": 1.8098806142807007, "learning_rate": 9.385291573422301e-05, "loss": 0.5263256430625916, "step": 1792, "token_acc": 0.8463768115942029 }, { "epoch": 2.016872890888639, "grad_norm": 1.9555548429489136, "learning_rate": 9.384397740934793e-05, "loss": 0.4567889869213104, "step": 1793, "token_acc": 0.8409090909090909 }, { "epoch": 2.0179977502812148, "grad_norm": 1.8558663129806519, "learning_rate": 9.38350330169646e-05, "loss": 0.5451276302337646, "step": 1794, "token_acc": 0.8294117647058824 }, { "epoch": 2.019122609673791, "grad_norm": 1.7886114120483398, "learning_rate": 9.382608255831075e-05, "loss": 0.4431186318397522, "step": 1795, "token_acc": 0.8459937565036421 }, { "epoch": 2.0202474690663665, "grad_norm": 2.338808298110962, "learning_rate": 9.381712603462508e-05, "loss": 0.49367907643318176, "step": 1796, "token_acc": 0.8436482084690554 }, { "epoch": 2.0213723284589427, "grad_norm": 2.089036226272583, "learning_rate": 9.380816344714705e-05, "loss": 0.6674789786338806, "step": 1797, "token_acc": 0.8168224299065421 }, { "epoch": 2.0224971878515188, "grad_norm": 2.0154521465301514, "learning_rate": 9.379919479711698e-05, "loss": 0.5342321395874023, "step": 1798, "token_acc": 0.8289623717217788 }, { "epoch": 2.0236220472440944, "grad_norm": 2.297985315322876, "learning_rate": 9.379022008577603e-05, "loss": 0.5679925680160522, "step": 1799, "token_acc": 0.8362068965517241 }, { "epoch": 2.0247469066366706, "grad_norm": 2.073805809020996, "learning_rate": 9.37812393143662e-05, "loss": 0.584022045135498, "step": 1800, "token_acc": 0.8238747553816047 }, { "epoch": 2.0247469066366706, "eval_loss": 0.9598243832588196, "eval_runtime": 31.6839, "eval_samples_per_second": 25.344, "eval_steps_per_second": 3.188, "eval_token_acc": 0.7400238500099375, "step": 1800 }, { "epoch": 2.0258717660292462, "grad_norm": 2.0080225467681885, "learning_rate": 9.377225248413033e-05, "loss": 0.5035592317581177, "step": 1801, "token_acc": 0.8337595907928389 }, { "epoch": 2.0269966254218224, "grad_norm": 1.8987828493118286, "learning_rate": 9.376325959631209e-05, "loss": 0.6288524866104126, "step": 1802, "token_acc": 0.8017391304347826 }, { "epoch": 2.028121484814398, "grad_norm": 2.005178451538086, "learning_rate": 9.3754260652156e-05, "loss": 0.6622236967086792, "step": 1803, "token_acc": 0.8139931740614335 }, { "epoch": 2.029246344206974, "grad_norm": 2.1127383708953857, "learning_rate": 9.374525565290739e-05, "loss": 0.5812780857086182, "step": 1804, "token_acc": 0.8140096618357487 }, { "epoch": 2.0303712035995503, "grad_norm": 2.3743793964385986, "learning_rate": 9.373624459981249e-05, "loss": 0.54072505235672, "step": 1805, "token_acc": 0.8387553041018387 }, { "epoch": 2.031496062992126, "grad_norm": 2.212735176086426, "learning_rate": 9.372722749411829e-05, "loss": 0.468961238861084, "step": 1806, "token_acc": 0.8416149068322981 }, { "epoch": 2.032620922384702, "grad_norm": 1.8793033361434937, "learning_rate": 9.371820433707268e-05, "loss": 0.4455225169658661, "step": 1807, "token_acc": 0.8670634920634921 }, { "epoch": 2.0337457817772777, "grad_norm": 2.215373992919922, "learning_rate": 9.370917512992434e-05, "loss": 0.5943837761878967, "step": 1808, "token_acc": 0.8243688254665203 }, { "epoch": 2.034870641169854, "grad_norm": 2.1041860580444336, "learning_rate": 9.370013987392282e-05, "loss": 0.6006908416748047, "step": 1809, "token_acc": 0.8157894736842105 }, { "epoch": 2.0359955005624295, "grad_norm": 1.938887596130371, "learning_rate": 9.36910985703185e-05, "loss": 0.5448857545852661, "step": 1810, "token_acc": 0.8398230088495575 }, { "epoch": 2.0371203599550056, "grad_norm": 2.165172815322876, "learning_rate": 9.368205122036259e-05, "loss": 0.5714267492294312, "step": 1811, "token_acc": 0.8253275109170306 }, { "epoch": 2.0382452193475817, "grad_norm": 2.088820219039917, "learning_rate": 9.367299782530714e-05, "loss": 0.5905463099479675, "step": 1812, "token_acc": 0.8331479421579533 }, { "epoch": 2.0393700787401574, "grad_norm": 1.8692917823791504, "learning_rate": 9.366393838640505e-05, "loss": 0.537200927734375, "step": 1813, "token_acc": 0.8225957049486461 }, { "epoch": 2.0404949381327335, "grad_norm": 1.8953375816345215, "learning_rate": 9.365487290491002e-05, "loss": 0.5720368027687073, "step": 1814, "token_acc": 0.8320355951056729 }, { "epoch": 2.041619797525309, "grad_norm": 1.6364414691925049, "learning_rate": 9.364580138207663e-05, "loss": 0.3696390986442566, "step": 1815, "token_acc": 0.8675742574257426 }, { "epoch": 2.0427446569178853, "grad_norm": 1.6489354372024536, "learning_rate": 9.363672381916027e-05, "loss": 0.44099152088165283, "step": 1816, "token_acc": 0.8507317073170731 }, { "epoch": 2.043869516310461, "grad_norm": 2.0194814205169678, "learning_rate": 9.362764021741714e-05, "loss": 0.5875439643859863, "step": 1817, "token_acc": 0.8187995469988675 }, { "epoch": 2.044994375703037, "grad_norm": 2.560995101928711, "learning_rate": 9.361855057810437e-05, "loss": 0.5137221813201904, "step": 1818, "token_acc": 0.8383311603650587 }, { "epoch": 2.046119235095613, "grad_norm": 1.9255480766296387, "learning_rate": 9.360945490247981e-05, "loss": 0.4563831686973572, "step": 1819, "token_acc": 0.8467532467532467 }, { "epoch": 2.047244094488189, "grad_norm": 1.528159737586975, "learning_rate": 9.360035319180222e-05, "loss": 0.35568204522132874, "step": 1820, "token_acc": 0.8888888888888888 }, { "epoch": 2.048368953880765, "grad_norm": 2.007321357727051, "learning_rate": 9.35912454473312e-05, "loss": 0.6009509563446045, "step": 1821, "token_acc": 0.7944664031620553 }, { "epoch": 2.0494938132733407, "grad_norm": 1.9727510213851929, "learning_rate": 9.358213167032711e-05, "loss": 0.5430801510810852, "step": 1822, "token_acc": 0.816931216931217 }, { "epoch": 2.050618672665917, "grad_norm": 2.146092176437378, "learning_rate": 9.357301186205118e-05, "loss": 0.49481984972953796, "step": 1823, "token_acc": 0.8583450210378681 }, { "epoch": 2.0517435320584925, "grad_norm": 1.929977297782898, "learning_rate": 9.356388602376556e-05, "loss": 0.4513944387435913, "step": 1824, "token_acc": 0.8583333333333333 }, { "epoch": 2.0528683914510686, "grad_norm": 1.6132285594940186, "learning_rate": 9.355475415673311e-05, "loss": 0.4931938052177429, "step": 1825, "token_acc": 0.845771144278607 }, { "epoch": 2.0539932508436447, "grad_norm": 2.0297393798828125, "learning_rate": 9.354561626221758e-05, "loss": 0.45182302594184875, "step": 1826, "token_acc": 0.851063829787234 }, { "epoch": 2.0551181102362204, "grad_norm": 2.1618494987487793, "learning_rate": 9.353647234148357e-05, "loss": 0.38132697343826294, "step": 1827, "token_acc": 0.8698752228163993 }, { "epoch": 2.0562429696287965, "grad_norm": 1.9798177480697632, "learning_rate": 9.352732239579648e-05, "loss": 0.49974358081817627, "step": 1828, "token_acc": 0.8391534391534392 }, { "epoch": 2.057367829021372, "grad_norm": 2.103803873062134, "learning_rate": 9.351816642642256e-05, "loss": 0.4903677701950073, "step": 1829, "token_acc": 0.8509895227008148 }, { "epoch": 2.0584926884139483, "grad_norm": 1.9696425199508667, "learning_rate": 9.350900443462891e-05, "loss": 0.6162346601486206, "step": 1830, "token_acc": 0.817258883248731 }, { "epoch": 2.0596175478065244, "grad_norm": 1.9219297170639038, "learning_rate": 9.34998364216834e-05, "loss": 0.7106702327728271, "step": 1831, "token_acc": 0.7918622848200313 }, { "epoch": 2.0607424071991, "grad_norm": 1.8479409217834473, "learning_rate": 9.349066238885484e-05, "loss": 0.4023032486438751, "step": 1832, "token_acc": 0.8493150684931506 }, { "epoch": 2.061867266591676, "grad_norm": 1.8464622497558594, "learning_rate": 9.348148233741276e-05, "loss": 0.4270519018173218, "step": 1833, "token_acc": 0.8653658536585366 }, { "epoch": 2.062992125984252, "grad_norm": 1.999743938446045, "learning_rate": 9.347229626862761e-05, "loss": 0.39500898122787476, "step": 1834, "token_acc": 0.8835616438356164 }, { "epoch": 2.064116985376828, "grad_norm": 2.3011107444763184, "learning_rate": 9.346310418377063e-05, "loss": 0.5377318859100342, "step": 1835, "token_acc": 0.8419753086419753 }, { "epoch": 2.0652418447694036, "grad_norm": 1.9269235134124756, "learning_rate": 9.345390608411388e-05, "loss": 0.35719960927963257, "step": 1836, "token_acc": 0.8906048906048906 }, { "epoch": 2.0663667041619798, "grad_norm": 1.7989351749420166, "learning_rate": 9.34447019709303e-05, "loss": 0.4615640938282013, "step": 1837, "token_acc": 0.8500986193293886 }, { "epoch": 2.067491563554556, "grad_norm": 2.76644229888916, "learning_rate": 9.343549184549359e-05, "loss": 0.5467649102210999, "step": 1838, "token_acc": 0.8175182481751825 }, { "epoch": 2.0686164229471316, "grad_norm": 1.8797574043273926, "learning_rate": 9.342627570907838e-05, "loss": 0.43320900201797485, "step": 1839, "token_acc": 0.8742791234140715 }, { "epoch": 2.0697412823397077, "grad_norm": 1.9696621894836426, "learning_rate": 9.341705356296006e-05, "loss": 0.6058914661407471, "step": 1840, "token_acc": 0.8262108262108262 }, { "epoch": 2.0708661417322833, "grad_norm": 2.282411575317383, "learning_rate": 9.340782540841486e-05, "loss": 0.5845771431922913, "step": 1841, "token_acc": 0.8248175182481752 }, { "epoch": 2.0719910011248595, "grad_norm": 2.039775848388672, "learning_rate": 9.339859124671985e-05, "loss": 0.5987712740898132, "step": 1842, "token_acc": 0.817658349328215 }, { "epoch": 2.073115860517435, "grad_norm": 2.47556209564209, "learning_rate": 9.338935107915297e-05, "loss": 0.5853884220123291, "step": 1843, "token_acc": 0.8114143920595533 }, { "epoch": 2.0742407199100112, "grad_norm": 2.2046005725860596, "learning_rate": 9.33801049069929e-05, "loss": 0.43951481580734253, "step": 1844, "token_acc": 0.8608695652173913 }, { "epoch": 2.0753655793025874, "grad_norm": 2.3352725505828857, "learning_rate": 9.337085273151924e-05, "loss": 0.5199693441390991, "step": 1845, "token_acc": 0.8275862068965517 }, { "epoch": 2.076490438695163, "grad_norm": 2.2160580158233643, "learning_rate": 9.336159455401236e-05, "loss": 0.5349748730659485, "step": 1846, "token_acc": 0.8339350180505415 }, { "epoch": 2.077615298087739, "grad_norm": 1.9741121530532837, "learning_rate": 9.335233037575351e-05, "loss": 0.5549187660217285, "step": 1847, "token_acc": 0.8339731285988484 }, { "epoch": 2.078740157480315, "grad_norm": 2.1727938652038574, "learning_rate": 9.334306019802475e-05, "loss": 0.6482890844345093, "step": 1848, "token_acc": 0.8059548254620124 }, { "epoch": 2.079865016872891, "grad_norm": 2.255614995956421, "learning_rate": 9.333378402210895e-05, "loss": 0.5628150701522827, "step": 1849, "token_acc": 0.8307210031347962 }, { "epoch": 2.0809898762654666, "grad_norm": 2.1833908557891846, "learning_rate": 9.332450184928984e-05, "loss": 0.5705844163894653, "step": 1850, "token_acc": 0.8293241695303551 }, { "epoch": 2.0821147356580427, "grad_norm": 2.490352153778076, "learning_rate": 9.331521368085197e-05, "loss": 0.6584864854812622, "step": 1851, "token_acc": 0.8082901554404145 }, { "epoch": 2.083239595050619, "grad_norm": 1.9390225410461426, "learning_rate": 9.330591951808068e-05, "loss": 0.5798105001449585, "step": 1852, "token_acc": 0.8136363636363636 }, { "epoch": 2.0843644544431945, "grad_norm": 2.073866128921509, "learning_rate": 9.329661936226222e-05, "loss": 0.6313127279281616, "step": 1853, "token_acc": 0.8051282051282052 }, { "epoch": 2.0854893138357706, "grad_norm": 2.038283348083496, "learning_rate": 9.328731321468363e-05, "loss": 0.6087839603424072, "step": 1854, "token_acc": 0.8072976054732041 }, { "epoch": 2.0866141732283463, "grad_norm": 2.267965793609619, "learning_rate": 9.327800107663273e-05, "loss": 0.6907522678375244, "step": 1855, "token_acc": 0.8104325699745547 }, { "epoch": 2.0877390326209224, "grad_norm": 1.9894694089889526, "learning_rate": 9.326868294939826e-05, "loss": 0.5680023431777954, "step": 1856, "token_acc": 0.8358050847457628 }, { "epoch": 2.0888638920134985, "grad_norm": 2.007499933242798, "learning_rate": 9.325935883426971e-05, "loss": 0.5809674263000488, "step": 1857, "token_acc": 0.8124341412012644 }, { "epoch": 2.089988751406074, "grad_norm": 2.435991048812866, "learning_rate": 9.325002873253746e-05, "loss": 0.5655883550643921, "step": 1858, "token_acc": 0.8177777777777778 }, { "epoch": 2.0911136107986503, "grad_norm": 2.3087265491485596, "learning_rate": 9.324069264549266e-05, "loss": 0.5642764568328857, "step": 1859, "token_acc": 0.8281879194630872 }, { "epoch": 2.092238470191226, "grad_norm": 1.9329090118408203, "learning_rate": 9.323135057442733e-05, "loss": 0.49625784158706665, "step": 1860, "token_acc": 0.8425287356321839 }, { "epoch": 2.093363329583802, "grad_norm": 2.0574023723602295, "learning_rate": 9.322200252063432e-05, "loss": 0.4339604675769806, "step": 1861, "token_acc": 0.8657894736842106 }, { "epoch": 2.094488188976378, "grad_norm": 1.8923953771591187, "learning_rate": 9.321264848540729e-05, "loss": 0.5162262916564941, "step": 1862, "token_acc": 0.8294314381270903 }, { "epoch": 2.095613048368954, "grad_norm": 2.019705295562744, "learning_rate": 9.320328847004072e-05, "loss": 0.43255865573883057, "step": 1863, "token_acc": 0.850632911392405 }, { "epoch": 2.09673790776153, "grad_norm": 2.290052652359009, "learning_rate": 9.319392247582992e-05, "loss": 0.6026336550712585, "step": 1864, "token_acc": 0.810062893081761 }, { "epoch": 2.0978627671541057, "grad_norm": 2.0776734352111816, "learning_rate": 9.318455050407106e-05, "loss": 0.49202626943588257, "step": 1865, "token_acc": 0.8504566210045662 }, { "epoch": 2.098987626546682, "grad_norm": 1.8327233791351318, "learning_rate": 9.317517255606113e-05, "loss": 0.49580320715904236, "step": 1866, "token_acc": 0.8494288681204569 }, { "epoch": 2.1001124859392575, "grad_norm": 2.229893684387207, "learning_rate": 9.316578863309789e-05, "loss": 0.5720314979553223, "step": 1867, "token_acc": 0.831353919239905 }, { "epoch": 2.1012373453318336, "grad_norm": 2.3044612407684326, "learning_rate": 9.315639873648e-05, "loss": 0.5979510545730591, "step": 1868, "token_acc": 0.8155784650630011 }, { "epoch": 2.1023622047244093, "grad_norm": 2.124204158782959, "learning_rate": 9.31470028675069e-05, "loss": 0.5949258804321289, "step": 1869, "token_acc": 0.8115631691648822 }, { "epoch": 2.1034870641169854, "grad_norm": 2.4849390983581543, "learning_rate": 9.313760102747886e-05, "loss": 0.489757776260376, "step": 1870, "token_acc": 0.8389355742296919 }, { "epoch": 2.1046119235095615, "grad_norm": 1.7600513696670532, "learning_rate": 9.312819321769702e-05, "loss": 0.4166303873062134, "step": 1871, "token_acc": 0.8635907723169508 }, { "epoch": 2.105736782902137, "grad_norm": 2.0465142726898193, "learning_rate": 9.311877943946327e-05, "loss": 0.46542567014694214, "step": 1872, "token_acc": 0.8419753086419753 }, { "epoch": 2.1068616422947133, "grad_norm": 2.0923233032226562, "learning_rate": 9.310935969408042e-05, "loss": 0.5101466178894043, "step": 1873, "token_acc": 0.8346111719605696 }, { "epoch": 2.107986501687289, "grad_norm": 1.939997673034668, "learning_rate": 9.309993398285203e-05, "loss": 0.5358009338378906, "step": 1874, "token_acc": 0.8289473684210527 }, { "epoch": 2.109111361079865, "grad_norm": 2.296457052230835, "learning_rate": 9.30905023070825e-05, "loss": 0.5482941269874573, "step": 1875, "token_acc": 0.8148148148148148 }, { "epoch": 2.1102362204724407, "grad_norm": 2.022308111190796, "learning_rate": 9.308106466807709e-05, "loss": 0.595206618309021, "step": 1876, "token_acc": 0.8189493433395872 }, { "epoch": 2.111361079865017, "grad_norm": 2.2380013465881348, "learning_rate": 9.307162106714185e-05, "loss": 0.3469415009021759, "step": 1877, "token_acc": 0.88 }, { "epoch": 2.112485939257593, "grad_norm": 2.4180166721343994, "learning_rate": 9.306217150558367e-05, "loss": 0.5436742305755615, "step": 1878, "token_acc": 0.8149882903981265 }, { "epoch": 2.1136107986501687, "grad_norm": 2.195091724395752, "learning_rate": 9.305271598471026e-05, "loss": 0.500542163848877, "step": 1879, "token_acc": 0.8436317780580076 }, { "epoch": 2.1147356580427448, "grad_norm": 2.288172960281372, "learning_rate": 9.304325450583016e-05, "loss": 0.5977599024772644, "step": 1880, "token_acc": 0.8190045248868778 }, { "epoch": 2.1158605174353204, "grad_norm": 2.188896417617798, "learning_rate": 9.303378707025272e-05, "loss": 0.5068740844726562, "step": 1881, "token_acc": 0.8552971576227391 }, { "epoch": 2.1169853768278966, "grad_norm": 2.13777494430542, "learning_rate": 9.302431367928813e-05, "loss": 0.5607219338417053, "step": 1882, "token_acc": 0.8274793388429752 }, { "epoch": 2.1181102362204722, "grad_norm": 2.4947640895843506, "learning_rate": 9.301483433424742e-05, "loss": 0.45497071743011475, "step": 1883, "token_acc": 0.8421787709497207 }, { "epoch": 2.1192350956130483, "grad_norm": 1.9898544549942017, "learning_rate": 9.300534903644239e-05, "loss": 0.5237184166908264, "step": 1884, "token_acc": 0.8467670504871567 }, { "epoch": 2.1203599550056245, "grad_norm": 2.0019962787628174, "learning_rate": 9.299585778718572e-05, "loss": 0.477891206741333, "step": 1885, "token_acc": 0.8390191897654584 }, { "epoch": 2.1214848143982, "grad_norm": 2.0398201942443848, "learning_rate": 9.298636058779089e-05, "loss": 0.4255814850330353, "step": 1886, "token_acc": 0.8688760806916427 }, { "epoch": 2.1226096737907763, "grad_norm": 2.476125717163086, "learning_rate": 9.297685743957221e-05, "loss": 0.5074987411499023, "step": 1887, "token_acc": 0.8416547788873039 }, { "epoch": 2.123734533183352, "grad_norm": 1.9036121368408203, "learning_rate": 9.296734834384477e-05, "loss": 0.5759075880050659, "step": 1888, "token_acc": 0.8218151540383014 }, { "epoch": 2.124859392575928, "grad_norm": 2.2912051677703857, "learning_rate": 9.295783330192459e-05, "loss": 0.5521501302719116, "step": 1889, "token_acc": 0.8331288343558282 }, { "epoch": 2.1259842519685037, "grad_norm": 1.8894997835159302, "learning_rate": 9.294831231512835e-05, "loss": 0.5245054960250854, "step": 1890, "token_acc": 0.8375 }, { "epoch": 2.12710911136108, "grad_norm": 1.8031961917877197, "learning_rate": 9.293878538477375e-05, "loss": 0.5533608198165894, "step": 1891, "token_acc": 0.8225957049486461 }, { "epoch": 2.128233970753656, "grad_norm": 2.4241721630096436, "learning_rate": 9.292925251217913e-05, "loss": 0.5073611736297607, "step": 1892, "token_acc": 0.8333333333333334 }, { "epoch": 2.1293588301462316, "grad_norm": 1.9025150537490845, "learning_rate": 9.291971369866376e-05, "loss": 0.4912732243537903, "step": 1893, "token_acc": 0.8368087035358114 }, { "epoch": 2.1304836895388077, "grad_norm": 2.2372052669525146, "learning_rate": 9.29101689455477e-05, "loss": 0.6180934309959412, "step": 1894, "token_acc": 0.8051224944320713 }, { "epoch": 2.1316085489313834, "grad_norm": 2.117095947265625, "learning_rate": 9.290061825415185e-05, "loss": 0.5755094289779663, "step": 1895, "token_acc": 0.8118811881188119 }, { "epoch": 2.1327334083239595, "grad_norm": 2.322204351425171, "learning_rate": 9.28910616257979e-05, "loss": 0.4940233826637268, "step": 1896, "token_acc": 0.8294679399727148 }, { "epoch": 2.1338582677165356, "grad_norm": 2.115326404571533, "learning_rate": 9.288149906180838e-05, "loss": 0.6205376386642456, "step": 1897, "token_acc": 0.8147368421052632 }, { "epoch": 2.1349831271091113, "grad_norm": 1.9599562883377075, "learning_rate": 9.287193056350665e-05, "loss": 0.5449007749557495, "step": 1898, "token_acc": 0.821505376344086 }, { "epoch": 2.1361079865016874, "grad_norm": 2.263927459716797, "learning_rate": 9.286235613221688e-05, "loss": 0.5308307409286499, "step": 1899, "token_acc": 0.8381430363864492 }, { "epoch": 2.137232845894263, "grad_norm": 2.244786024093628, "learning_rate": 9.285277576926404e-05, "loss": 0.6108083128929138, "step": 1900, "token_acc": 0.8067685589519651 }, { "epoch": 2.138357705286839, "grad_norm": 1.8759313821792603, "learning_rate": 9.284318947597397e-05, "loss": 0.47218793630599976, "step": 1901, "token_acc": 0.8549356223175966 }, { "epoch": 2.139482564679415, "grad_norm": 1.9036582708358765, "learning_rate": 9.28335972536733e-05, "loss": 0.608550488948822, "step": 1902, "token_acc": 0.820017559262511 }, { "epoch": 2.140607424071991, "grad_norm": 1.9939334392547607, "learning_rate": 9.282399910368947e-05, "loss": 0.4002740979194641, "step": 1903, "token_acc": 0.8748403575989783 }, { "epoch": 2.141732283464567, "grad_norm": 1.9554425477981567, "learning_rate": 9.281439502735078e-05, "loss": 0.44533196091651917, "step": 1904, "token_acc": 0.8614993646759848 }, { "epoch": 2.142857142857143, "grad_norm": 1.7588645219802856, "learning_rate": 9.28047850259863e-05, "loss": 0.43421122431755066, "step": 1905, "token_acc": 0.866923818707811 }, { "epoch": 2.143982002249719, "grad_norm": 2.0675697326660156, "learning_rate": 9.279516910092595e-05, "loss": 0.49433180689811707, "step": 1906, "token_acc": 0.8366093366093366 }, { "epoch": 2.1451068616422946, "grad_norm": 2.214768886566162, "learning_rate": 9.278554725350048e-05, "loss": 0.569645881652832, "step": 1907, "token_acc": 0.8147684605757196 }, { "epoch": 2.1462317210348707, "grad_norm": 1.7874375581741333, "learning_rate": 9.277591948504143e-05, "loss": 0.5801575183868408, "step": 1908, "token_acc": 0.8145800316957211 }, { "epoch": 2.1473565804274464, "grad_norm": 2.0303118228912354, "learning_rate": 9.276628579688117e-05, "loss": 0.42944127321243286, "step": 1909, "token_acc": 0.8569570871261378 }, { "epoch": 2.1484814398200225, "grad_norm": 2.110133171081543, "learning_rate": 9.275664619035291e-05, "loss": 0.522466778755188, "step": 1910, "token_acc": 0.835075493612079 }, { "epoch": 2.1496062992125986, "grad_norm": 1.9725877046585083, "learning_rate": 9.274700066679065e-05, "loss": 0.4544048011302948, "step": 1911, "token_acc": 0.8569794050343249 }, { "epoch": 2.1507311586051743, "grad_norm": 1.9160178899765015, "learning_rate": 9.273734922752921e-05, "loss": 0.4564964771270752, "step": 1912, "token_acc": 0.8503184713375797 }, { "epoch": 2.1518560179977504, "grad_norm": 2.2793495655059814, "learning_rate": 9.272769187390427e-05, "loss": 0.5321696996688843, "step": 1913, "token_acc": 0.8320839580209896 }, { "epoch": 2.152980877390326, "grad_norm": 2.215991973876953, "learning_rate": 9.271802860725228e-05, "loss": 0.5672453045845032, "step": 1914, "token_acc": 0.8193916349809885 }, { "epoch": 2.154105736782902, "grad_norm": 2.1247527599334717, "learning_rate": 9.270835942891052e-05, "loss": 0.5877945423126221, "step": 1915, "token_acc": 0.8006912442396313 }, { "epoch": 2.1552305961754783, "grad_norm": 1.7920665740966797, "learning_rate": 9.269868434021712e-05, "loss": 0.5266413688659668, "step": 1916, "token_acc": 0.828649138712602 }, { "epoch": 2.156355455568054, "grad_norm": 2.2115824222564697, "learning_rate": 9.268900334251097e-05, "loss": 0.4921196699142456, "step": 1917, "token_acc": 0.8362573099415205 }, { "epoch": 2.15748031496063, "grad_norm": 2.1077704429626465, "learning_rate": 9.267931643713183e-05, "loss": 0.42040133476257324, "step": 1918, "token_acc": 0.8745476477683957 }, { "epoch": 2.1586051743532058, "grad_norm": 2.1594812870025635, "learning_rate": 9.266962362542024e-05, "loss": 0.49352598190307617, "step": 1919, "token_acc": 0.8420413122721749 }, { "epoch": 2.159730033745782, "grad_norm": 1.938503623008728, "learning_rate": 9.26599249087176e-05, "loss": 0.5844432711601257, "step": 1920, "token_acc": 0.8204613841524574 }, { "epoch": 2.1608548931383575, "grad_norm": 2.039954662322998, "learning_rate": 9.265022028836607e-05, "loss": 0.5787448883056641, "step": 1921, "token_acc": 0.8267543859649122 }, { "epoch": 2.1619797525309337, "grad_norm": 2.226809024810791, "learning_rate": 9.26405097657087e-05, "loss": 0.49222439527511597, "step": 1922, "token_acc": 0.8435462842242504 }, { "epoch": 2.16310461192351, "grad_norm": 2.199228048324585, "learning_rate": 9.263079334208929e-05, "loss": 0.5039257407188416, "step": 1923, "token_acc": 0.820627802690583 }, { "epoch": 2.1642294713160855, "grad_norm": 2.2552011013031006, "learning_rate": 9.262107101885247e-05, "loss": 0.48725077509880066, "step": 1924, "token_acc": 0.8411458333333334 }, { "epoch": 2.1653543307086616, "grad_norm": 2.0218029022216797, "learning_rate": 9.261134279734373e-05, "loss": 0.5448145866394043, "step": 1925, "token_acc": 0.822992700729927 }, { "epoch": 2.1664791901012372, "grad_norm": 2.0978784561157227, "learning_rate": 9.260160867890932e-05, "loss": 0.5250219702720642, "step": 1926, "token_acc": 0.8456299659477866 }, { "epoch": 2.1676040494938134, "grad_norm": 2.1939492225646973, "learning_rate": 9.259186866489636e-05, "loss": 0.5041978359222412, "step": 1927, "token_acc": 0.8455598455598455 }, { "epoch": 2.168728908886389, "grad_norm": 2.1055140495300293, "learning_rate": 9.258212275665272e-05, "loss": 0.4086991846561432, "step": 1928, "token_acc": 0.8801897983392646 }, { "epoch": 2.169853768278965, "grad_norm": 2.0539889335632324, "learning_rate": 9.257237095552717e-05, "loss": 0.47205787897109985, "step": 1929, "token_acc": 0.8428290766208252 }, { "epoch": 2.1709786276715413, "grad_norm": 2.273279905319214, "learning_rate": 9.25626132628692e-05, "loss": 0.6634788513183594, "step": 1930, "token_acc": 0.8049065420560748 }, { "epoch": 2.172103487064117, "grad_norm": 1.9759501218795776, "learning_rate": 9.255284968002918e-05, "loss": 0.4370823800563812, "step": 1931, "token_acc": 0.8505617977528089 }, { "epoch": 2.173228346456693, "grad_norm": 2.2008864879608154, "learning_rate": 9.25430802083583e-05, "loss": 0.6473489999771118, "step": 1932, "token_acc": 0.814629258517034 }, { "epoch": 2.1743532058492687, "grad_norm": 2.5692496299743652, "learning_rate": 9.253330484920853e-05, "loss": 0.5050379037857056, "step": 1933, "token_acc": 0.8381924198250729 }, { "epoch": 2.175478065241845, "grad_norm": 2.171311378479004, "learning_rate": 9.252352360393265e-05, "loss": 0.5775960683822632, "step": 1934, "token_acc": 0.8190578158458244 }, { "epoch": 2.1766029246344205, "grad_norm": 2.058746576309204, "learning_rate": 9.25137364738843e-05, "loss": 0.46797823905944824, "step": 1935, "token_acc": 0.843680709534368 }, { "epoch": 2.1777277840269966, "grad_norm": 2.478203773498535, "learning_rate": 9.25039434604179e-05, "loss": 0.780511200428009, "step": 1936, "token_acc": 0.7778855480116392 }, { "epoch": 2.1788526434195727, "grad_norm": 2.1595582962036133, "learning_rate": 9.249414456488868e-05, "loss": 0.5650418996810913, "step": 1937, "token_acc": 0.822246455834242 }, { "epoch": 2.1799775028121484, "grad_norm": 2.1102468967437744, "learning_rate": 9.24843397886527e-05, "loss": 0.46184664964675903, "step": 1938, "token_acc": 0.8571428571428571 }, { "epoch": 2.1811023622047245, "grad_norm": 2.4462311267852783, "learning_rate": 9.247452913306685e-05, "loss": 0.654560387134552, "step": 1939, "token_acc": 0.786096256684492 }, { "epoch": 2.1822272215973, "grad_norm": 2.268317222595215, "learning_rate": 9.246471259948879e-05, "loss": 0.5516202449798584, "step": 1940, "token_acc": 0.8301886792452831 }, { "epoch": 2.1833520809898763, "grad_norm": 2.0578832626342773, "learning_rate": 9.245489018927702e-05, "loss": 0.522646963596344, "step": 1941, "token_acc": 0.8540723981900452 }, { "epoch": 2.184476940382452, "grad_norm": 1.9850953817367554, "learning_rate": 9.244506190379088e-05, "loss": 0.5487270355224609, "step": 1942, "token_acc": 0.8380952380952381 }, { "epoch": 2.185601799775028, "grad_norm": 1.6331645250320435, "learning_rate": 9.243522774439044e-05, "loss": 0.32967114448547363, "step": 1943, "token_acc": 0.895609756097561 }, { "epoch": 2.1867266591676042, "grad_norm": 2.137171506881714, "learning_rate": 9.242538771243667e-05, "loss": 0.6591641306877136, "step": 1944, "token_acc": 0.7910328262610088 }, { "epoch": 2.18785151856018, "grad_norm": 1.6504747867584229, "learning_rate": 9.241554180929131e-05, "loss": 0.44978001713752747, "step": 1945, "token_acc": 0.8648208469055375 }, { "epoch": 2.188976377952756, "grad_norm": 2.366786003112793, "learning_rate": 9.240569003631693e-05, "loss": 0.5071741938591003, "step": 1946, "token_acc": 0.8402203856749312 }, { "epoch": 2.1901012373453317, "grad_norm": 2.2032907009124756, "learning_rate": 9.239583239487689e-05, "loss": 0.4892174005508423, "step": 1947, "token_acc": 0.8455598455598455 }, { "epoch": 2.191226096737908, "grad_norm": 2.724303960800171, "learning_rate": 9.238596888633538e-05, "loss": 0.5054645538330078, "step": 1948, "token_acc": 0.8472468916518651 }, { "epoch": 2.1923509561304835, "grad_norm": 2.0475449562072754, "learning_rate": 9.237609951205742e-05, "loss": 0.5576097965240479, "step": 1949, "token_acc": 0.8344965104685942 }, { "epoch": 2.1934758155230596, "grad_norm": 2.1709086894989014, "learning_rate": 9.236622427340878e-05, "loss": 0.5090921521186829, "step": 1950, "token_acc": 0.8279445727482679 }, { "epoch": 2.1946006749156357, "grad_norm": 2.154961347579956, "learning_rate": 9.23563431717561e-05, "loss": 0.46981415152549744, "step": 1951, "token_acc": 0.8335419274092616 }, { "epoch": 2.1957255343082114, "grad_norm": 2.3405086994171143, "learning_rate": 9.23464562084668e-05, "loss": 0.5516712665557861, "step": 1952, "token_acc": 0.8281081081081081 }, { "epoch": 2.1968503937007875, "grad_norm": 2.4016754627227783, "learning_rate": 9.233656338490916e-05, "loss": 0.5629606246948242, "step": 1953, "token_acc": 0.8385481852315394 }, { "epoch": 2.197975253093363, "grad_norm": 1.9778411388397217, "learning_rate": 9.232666470245219e-05, "loss": 0.5278655886650085, "step": 1954, "token_acc": 0.8385608856088561 }, { "epoch": 2.1991001124859393, "grad_norm": 2.130246877670288, "learning_rate": 9.23167601624658e-05, "loss": 0.5079561471939087, "step": 1955, "token_acc": 0.8362831858407079 }, { "epoch": 2.200224971878515, "grad_norm": 2.4204769134521484, "learning_rate": 9.230684976632063e-05, "loss": 0.7041918039321899, "step": 1956, "token_acc": 0.7789855072463768 }, { "epoch": 2.201349831271091, "grad_norm": 2.0973663330078125, "learning_rate": 9.229693351538814e-05, "loss": 0.7065743207931519, "step": 1957, "token_acc": 0.7848484848484848 }, { "epoch": 2.202474690663667, "grad_norm": 1.937951683998108, "learning_rate": 9.228701141104071e-05, "loss": 0.5413216948509216, "step": 1958, "token_acc": 0.83399209486166 }, { "epoch": 2.203599550056243, "grad_norm": 2.1124978065490723, "learning_rate": 9.227708345465138e-05, "loss": 0.5403833985328674, "step": 1959, "token_acc": 0.825635103926097 }, { "epoch": 2.204724409448819, "grad_norm": 1.8102686405181885, "learning_rate": 9.22671496475941e-05, "loss": 0.4897904396057129, "step": 1960, "token_acc": 0.8472949389179756 }, { "epoch": 2.2058492688413947, "grad_norm": 2.487037420272827, "learning_rate": 9.225720999124356e-05, "loss": 0.5279403924942017, "step": 1961, "token_acc": 0.8198198198198198 }, { "epoch": 2.2069741282339708, "grad_norm": 1.966010570526123, "learning_rate": 9.224726448697534e-05, "loss": 0.5231963396072388, "step": 1962, "token_acc": 0.8186274509803921 }, { "epoch": 2.208098987626547, "grad_norm": 1.9872127771377563, "learning_rate": 9.223731313616575e-05, "loss": 0.36529213190078735, "step": 1963, "token_acc": 0.8553191489361702 }, { "epoch": 2.2092238470191226, "grad_norm": 2.1304168701171875, "learning_rate": 9.222735594019196e-05, "loss": 0.5845769643783569, "step": 1964, "token_acc": 0.8315282791817088 }, { "epoch": 2.2103487064116987, "grad_norm": 1.9569486379623413, "learning_rate": 9.221739290043193e-05, "loss": 0.605562686920166, "step": 1965, "token_acc": 0.8147113594040968 }, { "epoch": 2.2114735658042743, "grad_norm": 2.1893041133880615, "learning_rate": 9.220742401826444e-05, "loss": 0.6836233735084534, "step": 1966, "token_acc": 0.7905859750240154 }, { "epoch": 2.2125984251968505, "grad_norm": 2.205469846725464, "learning_rate": 9.219744929506904e-05, "loss": 0.3845362663269043, "step": 1967, "token_acc": 0.8635703918722787 }, { "epoch": 2.213723284589426, "grad_norm": 2.141770362854004, "learning_rate": 9.218746873222615e-05, "loss": 0.5302398204803467, "step": 1968, "token_acc": 0.8387096774193549 }, { "epoch": 2.2148481439820022, "grad_norm": 2.1049487590789795, "learning_rate": 9.217748233111695e-05, "loss": 0.40376442670822144, "step": 1969, "token_acc": 0.8757575757575757 }, { "epoch": 2.2159730033745784, "grad_norm": 2.0128936767578125, "learning_rate": 9.216749009312345e-05, "loss": 0.5190580487251282, "step": 1970, "token_acc": 0.8265550239234449 }, { "epoch": 2.217097862767154, "grad_norm": 2.3275063037872314, "learning_rate": 9.215749201962847e-05, "loss": 0.5766487717628479, "step": 1971, "token_acc": 0.835195530726257 }, { "epoch": 2.21822272215973, "grad_norm": 2.091076612472534, "learning_rate": 9.21474881120156e-05, "loss": 0.5355849862098694, "step": 1972, "token_acc": 0.8339529120198265 }, { "epoch": 2.219347581552306, "grad_norm": 2.335930347442627, "learning_rate": 9.213747837166933e-05, "loss": 0.5388170480728149, "step": 1973, "token_acc": 0.8197424892703863 }, { "epoch": 2.220472440944882, "grad_norm": 2.2031686305999756, "learning_rate": 9.212746279997482e-05, "loss": 0.5530931949615479, "step": 1974, "token_acc": 0.8176605504587156 }, { "epoch": 2.2215973003374576, "grad_norm": 2.110511064529419, "learning_rate": 9.211744139831815e-05, "loss": 0.4667683243751526, "step": 1975, "token_acc": 0.8563458856345886 }, { "epoch": 2.2227221597300337, "grad_norm": 1.9179279804229736, "learning_rate": 9.210741416808617e-05, "loss": 0.45497652888298035, "step": 1976, "token_acc": 0.858902575587906 }, { "epoch": 2.22384701912261, "grad_norm": 1.7830930948257446, "learning_rate": 9.209738111066653e-05, "loss": 0.5664898157119751, "step": 1977, "token_acc": 0.8252911813643927 }, { "epoch": 2.2249718785151855, "grad_norm": 2.1848793029785156, "learning_rate": 9.208734222744768e-05, "loss": 0.5184774398803711, "step": 1978, "token_acc": 0.8369829683698297 }, { "epoch": 2.2260967379077616, "grad_norm": 2.2923765182495117, "learning_rate": 9.20772975198189e-05, "loss": 0.5371230840682983, "step": 1979, "token_acc": 0.81875 }, { "epoch": 2.2272215973003373, "grad_norm": 2.143824577331543, "learning_rate": 9.206724698917026e-05, "loss": 0.6076188087463379, "step": 1980, "token_acc": 0.8104089219330854 }, { "epoch": 2.2283464566929134, "grad_norm": 2.229579448699951, "learning_rate": 9.205719063689261e-05, "loss": 0.5328189134597778, "step": 1981, "token_acc": 0.8292383292383292 }, { "epoch": 2.2294713160854895, "grad_norm": 1.9595012664794922, "learning_rate": 9.204712846437771e-05, "loss": 0.49025166034698486, "step": 1982, "token_acc": 0.8558052434456929 }, { "epoch": 2.230596175478065, "grad_norm": 2.5397250652313232, "learning_rate": 9.203706047301798e-05, "loss": 0.5622285604476929, "step": 1983, "token_acc": 0.8230593607305936 }, { "epoch": 2.2317210348706413, "grad_norm": 2.1149322986602783, "learning_rate": 9.202698666420675e-05, "loss": 0.5749080181121826, "step": 1984, "token_acc": 0.8264033264033264 }, { "epoch": 2.232845894263217, "grad_norm": 2.0327188968658447, "learning_rate": 9.20169070393381e-05, "loss": 0.5681825876235962, "step": 1985, "token_acc": 0.8187311178247734 }, { "epoch": 2.233970753655793, "grad_norm": 2.773364543914795, "learning_rate": 9.200682159980695e-05, "loss": 0.6057336926460266, "step": 1986, "token_acc": 0.8186968838526912 }, { "epoch": 2.235095613048369, "grad_norm": 2.191132068634033, "learning_rate": 9.1996730347009e-05, "loss": 0.5151230096817017, "step": 1987, "token_acc": 0.838974358974359 }, { "epoch": 2.236220472440945, "grad_norm": 2.272357225418091, "learning_rate": 9.198663328234079e-05, "loss": 0.5357152819633484, "step": 1988, "token_acc": 0.8354002254791432 }, { "epoch": 2.237345331833521, "grad_norm": 2.210137128829956, "learning_rate": 9.19765304071996e-05, "loss": 0.5973916053771973, "step": 1989, "token_acc": 0.8178807947019867 }, { "epoch": 2.2384701912260967, "grad_norm": 2.2492432594299316, "learning_rate": 9.196642172298357e-05, "loss": 0.5176612138748169, "step": 1990, "token_acc": 0.8461538461538461 }, { "epoch": 2.239595050618673, "grad_norm": 2.4862489700317383, "learning_rate": 9.195630723109163e-05, "loss": 0.5914602279663086, "step": 1991, "token_acc": 0.8210526315789474 }, { "epoch": 2.2407199100112485, "grad_norm": 2.199280261993408, "learning_rate": 9.194618693292352e-05, "loss": 0.5910581350326538, "step": 1992, "token_acc": 0.8248248248248248 }, { "epoch": 2.2418447694038246, "grad_norm": 2.169420003890991, "learning_rate": 9.193606082987977e-05, "loss": 0.5098851323127747, "step": 1993, "token_acc": 0.8472222222222222 }, { "epoch": 2.2429696287964003, "grad_norm": 2.1427624225616455, "learning_rate": 9.192592892336169e-05, "loss": 0.5958712100982666, "step": 1994, "token_acc": 0.8186372745490982 }, { "epoch": 2.2440944881889764, "grad_norm": 1.8318966627120972, "learning_rate": 9.191579121477148e-05, "loss": 0.43084201216697693, "step": 1995, "token_acc": 0.8483786152497809 }, { "epoch": 2.2452193475815525, "grad_norm": 2.0366930961608887, "learning_rate": 9.190564770551201e-05, "loss": 0.6412807106971741, "step": 1996, "token_acc": 0.8208440999138673 }, { "epoch": 2.246344206974128, "grad_norm": 2.4779510498046875, "learning_rate": 9.189549839698708e-05, "loss": 0.6267911195755005, "step": 1997, "token_acc": 0.8098086124401914 }, { "epoch": 2.2474690663667043, "grad_norm": 1.7844778299331665, "learning_rate": 9.18853432906012e-05, "loss": 0.5279480218887329, "step": 1998, "token_acc": 0.8472596585804133 }, { "epoch": 2.24859392575928, "grad_norm": 2.285064220428467, "learning_rate": 9.187518238775979e-05, "loss": 0.5045962333679199, "step": 1999, "token_acc": 0.8455392809587217 }, { "epoch": 2.249718785151856, "grad_norm": 2.108365535736084, "learning_rate": 9.186501568986892e-05, "loss": 0.5561957955360413, "step": 2000, "token_acc": 0.8414507772020725 }, { "epoch": 2.2508436445444318, "grad_norm": 2.209249973297119, "learning_rate": 9.18548431983356e-05, "loss": 0.636541485786438, "step": 2001, "token_acc": 0.812004530011325 }, { "epoch": 2.251968503937008, "grad_norm": 2.208528518676758, "learning_rate": 9.184466491456756e-05, "loss": 0.6516560316085815, "step": 2002, "token_acc": 0.8102926337033299 }, { "epoch": 2.253093363329584, "grad_norm": 1.9858371019363403, "learning_rate": 9.183448083997336e-05, "loss": 0.4952147603034973, "step": 2003, "token_acc": 0.8413878562577447 }, { "epoch": 2.2542182227221597, "grad_norm": 2.0569448471069336, "learning_rate": 9.18242909759624e-05, "loss": 0.5567777752876282, "step": 2004, "token_acc": 0.8112773302646721 }, { "epoch": 2.2553430821147358, "grad_norm": 2.1383700370788574, "learning_rate": 9.181409532394478e-05, "loss": 0.6681684255599976, "step": 2005, "token_acc": 0.8013307984790875 }, { "epoch": 2.2564679415073114, "grad_norm": 1.9417698383331299, "learning_rate": 9.18038938853315e-05, "loss": 0.6823487281799316, "step": 2006, "token_acc": 0.8123938879456706 }, { "epoch": 2.2575928008998876, "grad_norm": 2.158169984817505, "learning_rate": 9.179368666153432e-05, "loss": 0.6143476963043213, "step": 2007, "token_acc": 0.8199419167473379 }, { "epoch": 2.2587176602924632, "grad_norm": 2.0998904705047607, "learning_rate": 9.17834736539658e-05, "loss": 0.5264329314231873, "step": 2008, "token_acc": 0.826797385620915 }, { "epoch": 2.2598425196850394, "grad_norm": 2.250575065612793, "learning_rate": 9.17732548640393e-05, "loss": 0.6269342303276062, "step": 2009, "token_acc": 0.799781181619256 }, { "epoch": 2.2609673790776155, "grad_norm": 2.0822737216949463, "learning_rate": 9.1763030293169e-05, "loss": 0.4739494323730469, "step": 2010, "token_acc": 0.8330871491875923 }, { "epoch": 2.262092238470191, "grad_norm": 1.4855577945709229, "learning_rate": 9.175279994276985e-05, "loss": 0.3178425431251526, "step": 2011, "token_acc": 0.8901623686723973 }, { "epoch": 2.2632170978627673, "grad_norm": 2.302767753601074, "learning_rate": 9.174256381425759e-05, "loss": 0.5340207815170288, "step": 2012, "token_acc": 0.844106463878327 }, { "epoch": 2.264341957255343, "grad_norm": 2.3313257694244385, "learning_rate": 9.173232190904884e-05, "loss": 0.6241610050201416, "step": 2013, "token_acc": 0.8219026548672567 }, { "epoch": 2.265466816647919, "grad_norm": 2.42889142036438, "learning_rate": 9.172207422856092e-05, "loss": 0.4112135171890259, "step": 2014, "token_acc": 0.8601626016260162 }, { "epoch": 2.2665916760404947, "grad_norm": 2.4184868335723877, "learning_rate": 9.171182077421201e-05, "loss": 0.6116935014724731, "step": 2015, "token_acc": 0.8182897862232779 }, { "epoch": 2.267716535433071, "grad_norm": 2.3200623989105225, "learning_rate": 9.170156154742107e-05, "loss": 0.5196659564971924, "step": 2016, "token_acc": 0.832779623477298 }, { "epoch": 2.268841394825647, "grad_norm": 2.272160530090332, "learning_rate": 9.169129654960785e-05, "loss": 0.5575538277626038, "step": 2017, "token_acc": 0.8224101479915433 }, { "epoch": 2.2699662542182226, "grad_norm": 2.28751277923584, "learning_rate": 9.168102578219293e-05, "loss": 0.6783844828605652, "step": 2018, "token_acc": 0.8005725190839694 }, { "epoch": 2.2710911136107987, "grad_norm": 1.9599204063415527, "learning_rate": 9.167074924659765e-05, "loss": 0.6036825180053711, "step": 2019, "token_acc": 0.8089068825910931 }, { "epoch": 2.2722159730033744, "grad_norm": 2.1375837326049805, "learning_rate": 9.166046694424418e-05, "loss": 0.5011723637580872, "step": 2020, "token_acc": 0.8478513356562137 }, { "epoch": 2.2733408323959505, "grad_norm": 2.3871777057647705, "learning_rate": 9.165017887655545e-05, "loss": 0.5376843214035034, "step": 2021, "token_acc": 0.8525179856115108 }, { "epoch": 2.274465691788526, "grad_norm": 2.42301344871521, "learning_rate": 9.163988504495522e-05, "loss": 0.6327304244041443, "step": 2022, "token_acc": 0.8072429906542056 }, { "epoch": 2.2755905511811023, "grad_norm": 2.300898790359497, "learning_rate": 9.162958545086806e-05, "loss": 0.6078317165374756, "step": 2023, "token_acc": 0.8137931034482758 }, { "epoch": 2.2767154105736784, "grad_norm": 2.3233327865600586, "learning_rate": 9.16192800957193e-05, "loss": 0.4769720733165741, "step": 2024, "token_acc": 0.8380952380952381 }, { "epoch": 2.277840269966254, "grad_norm": 2.347501039505005, "learning_rate": 9.160896898093509e-05, "loss": 0.4877682328224182, "step": 2025, "token_acc": 0.8454810495626822 }, { "epoch": 2.27896512935883, "grad_norm": 1.9764972925186157, "learning_rate": 9.159865210794237e-05, "loss": 0.6037557125091553, "step": 2026, "token_acc": 0.8151571164510166 }, { "epoch": 2.280089988751406, "grad_norm": 2.5516202449798584, "learning_rate": 9.158832947816886e-05, "loss": 0.7387592196464539, "step": 2027, "token_acc": 0.7803970223325062 }, { "epoch": 2.281214848143982, "grad_norm": 2.1653060913085938, "learning_rate": 9.157800109304312e-05, "loss": 0.528439462184906, "step": 2028, "token_acc": 0.8222748815165877 }, { "epoch": 2.2823397075365577, "grad_norm": 2.349520683288574, "learning_rate": 9.156766695399447e-05, "loss": 0.5334176421165466, "step": 2029, "token_acc": 0.8289473684210527 }, { "epoch": 2.283464566929134, "grad_norm": 2.0846846103668213, "learning_rate": 9.155732706245304e-05, "loss": 0.6526938080787659, "step": 2030, "token_acc": 0.8055555555555556 }, { "epoch": 2.28458942632171, "grad_norm": 2.1969070434570312, "learning_rate": 9.154698141984975e-05, "loss": 0.6086790561676025, "step": 2031, "token_acc": 0.8076923076923077 }, { "epoch": 2.2857142857142856, "grad_norm": 1.8545622825622559, "learning_rate": 9.153663002761633e-05, "loss": 0.3856395483016968, "step": 2032, "token_acc": 0.8683853459972863 }, { "epoch": 2.2868391451068617, "grad_norm": 2.0736873149871826, "learning_rate": 9.152627288718527e-05, "loss": 0.5880653858184814, "step": 2033, "token_acc": 0.8123044838373306 }, { "epoch": 2.287964004499438, "grad_norm": 1.989347219467163, "learning_rate": 9.15159099999899e-05, "loss": 0.40056920051574707, "step": 2034, "token_acc": 0.8545232273838631 }, { "epoch": 2.2890888638920135, "grad_norm": 1.9042811393737793, "learning_rate": 9.150554136746434e-05, "loss": 0.48940128087997437, "step": 2035, "token_acc": 0.8536324786324786 }, { "epoch": 2.2902137232845896, "grad_norm": 2.2749171257019043, "learning_rate": 9.149516699104347e-05, "loss": 0.503082811832428, "step": 2036, "token_acc": 0.844804318488529 }, { "epoch": 2.2913385826771653, "grad_norm": 2.7111687660217285, "learning_rate": 9.148478687216298e-05, "loss": 0.5998929142951965, "step": 2037, "token_acc": 0.8192161820480405 }, { "epoch": 2.2924634420697414, "grad_norm": 2.8762316703796387, "learning_rate": 9.147440101225938e-05, "loss": 0.6179706454277039, "step": 2038, "token_acc": 0.8176855895196506 }, { "epoch": 2.293588301462317, "grad_norm": 2.259354829788208, "learning_rate": 9.146400941276994e-05, "loss": 0.5329117774963379, "step": 2039, "token_acc": 0.8266953713670614 }, { "epoch": 2.294713160854893, "grad_norm": 2.216895818710327, "learning_rate": 9.145361207513274e-05, "loss": 0.5688716173171997, "step": 2040, "token_acc": 0.8245823389021479 }, { "epoch": 2.2958380202474693, "grad_norm": 2.196540594100952, "learning_rate": 9.144320900078667e-05, "loss": 0.6024664044380188, "step": 2041, "token_acc": 0.8202764976958525 }, { "epoch": 2.296962879640045, "grad_norm": 2.351867914199829, "learning_rate": 9.143280019117137e-05, "loss": 0.4805533289909363, "step": 2042, "token_acc": 0.8376811594202899 }, { "epoch": 2.298087739032621, "grad_norm": 2.3108019828796387, "learning_rate": 9.142238564772734e-05, "loss": 0.5870442390441895, "step": 2043, "token_acc": 0.8171926006528836 }, { "epoch": 2.2992125984251968, "grad_norm": 1.998576283454895, "learning_rate": 9.141196537189578e-05, "loss": 0.47317156195640564, "step": 2044, "token_acc": 0.8454842219804135 }, { "epoch": 2.300337457817773, "grad_norm": 2.27121901512146, "learning_rate": 9.14015393651188e-05, "loss": 0.6217599511146545, "step": 2045, "token_acc": 0.804930332261522 }, { "epoch": 2.3014623172103486, "grad_norm": 1.7856941223144531, "learning_rate": 9.139110762883918e-05, "loss": 0.4123930335044861, "step": 2046, "token_acc": 0.8665987780040734 }, { "epoch": 2.3025871766029247, "grad_norm": 2.364893913269043, "learning_rate": 9.13806701645006e-05, "loss": 0.4421628713607788, "step": 2047, "token_acc": 0.8576158940397351 }, { "epoch": 2.303712035995501, "grad_norm": 1.74433434009552, "learning_rate": 9.137022697354747e-05, "loss": 0.5761936902999878, "step": 2048, "token_acc": 0.8208494208494208 }, { "epoch": 2.3048368953880765, "grad_norm": 2.2700185775756836, "learning_rate": 9.1359778057425e-05, "loss": 0.5439042448997498, "step": 2049, "token_acc": 0.8226190476190476 }, { "epoch": 2.3059617547806526, "grad_norm": 2.0269012451171875, "learning_rate": 9.134932341757923e-05, "loss": 0.44994163513183594, "step": 2050, "token_acc": 0.8571428571428571 }, { "epoch": 2.3070866141732282, "grad_norm": 2.3286898136138916, "learning_rate": 9.133886305545692e-05, "loss": 0.6040429472923279, "step": 2051, "token_acc": 0.8207547169811321 }, { "epoch": 2.3082114735658044, "grad_norm": 2.325045108795166, "learning_rate": 9.132839697250569e-05, "loss": 0.44460543990135193, "step": 2052, "token_acc": 0.8588390501319261 }, { "epoch": 2.30933633295838, "grad_norm": 2.370772123336792, "learning_rate": 9.131792517017393e-05, "loss": 0.720882773399353, "step": 2053, "token_acc": 0.7685631629701061 }, { "epoch": 2.310461192350956, "grad_norm": 2.191025495529175, "learning_rate": 9.130744764991082e-05, "loss": 0.6310579776763916, "step": 2054, "token_acc": 0.800792864222002 }, { "epoch": 2.3115860517435323, "grad_norm": 1.8084627389907837, "learning_rate": 9.129696441316633e-05, "loss": 0.4372439980506897, "step": 2055, "token_acc": 0.8483316481294236 }, { "epoch": 2.312710911136108, "grad_norm": 1.9796794652938843, "learning_rate": 9.12864754613912e-05, "loss": 0.4447760581970215, "step": 2056, "token_acc": 0.85612968591692 }, { "epoch": 2.313835770528684, "grad_norm": 2.2007060050964355, "learning_rate": 9.1275980796037e-05, "loss": 0.6206055879592896, "step": 2057, "token_acc": 0.8179848320693391 }, { "epoch": 2.3149606299212597, "grad_norm": 2.0827555656433105, "learning_rate": 9.126548041855607e-05, "loss": 0.5350469350814819, "step": 2058, "token_acc": 0.833879781420765 }, { "epoch": 2.316085489313836, "grad_norm": 2.299593448638916, "learning_rate": 9.125497433040153e-05, "loss": 0.635506808757782, "step": 2059, "token_acc": 0.797566371681416 }, { "epoch": 2.3172103487064115, "grad_norm": 2.1898770332336426, "learning_rate": 9.124446253302734e-05, "loss": 0.6974754333496094, "step": 2060, "token_acc": 0.7956989247311828 }, { "epoch": 2.3183352080989876, "grad_norm": 2.2286558151245117, "learning_rate": 9.123394502788816e-05, "loss": 0.7643535733222961, "step": 2061, "token_acc": 0.7809439002671416 }, { "epoch": 2.3194600674915637, "grad_norm": 2.05330753326416, "learning_rate": 9.122342181643954e-05, "loss": 0.5325409173965454, "step": 2062, "token_acc": 0.8355029585798817 }, { "epoch": 2.3205849268841394, "grad_norm": 2.1986045837402344, "learning_rate": 9.121289290013775e-05, "loss": 0.5664424896240234, "step": 2063, "token_acc": 0.8329383886255924 }, { "epoch": 2.3217097862767155, "grad_norm": 1.9212571382522583, "learning_rate": 9.120235828043989e-05, "loss": 0.48559415340423584, "step": 2064, "token_acc": 0.8635907723169508 }, { "epoch": 2.322834645669291, "grad_norm": 1.6474344730377197, "learning_rate": 9.11918179588038e-05, "loss": 0.38627636432647705, "step": 2065, "token_acc": 0.8536139066788655 }, { "epoch": 2.3239595050618673, "grad_norm": 2.185544013977051, "learning_rate": 9.118127193668815e-05, "loss": 0.5084322094917297, "step": 2066, "token_acc": 0.8390243902439024 }, { "epoch": 2.325084364454443, "grad_norm": 2.4179110527038574, "learning_rate": 9.117072021555242e-05, "loss": 0.6548905968666077, "step": 2067, "token_acc": 0.800747198007472 }, { "epoch": 2.326209223847019, "grad_norm": 2.0806076526641846, "learning_rate": 9.116016279685682e-05, "loss": 0.4322526454925537, "step": 2068, "token_acc": 0.8637566137566137 }, { "epoch": 2.3273340832395952, "grad_norm": 2.1051533222198486, "learning_rate": 9.114959968206238e-05, "loss": 0.4726610779762268, "step": 2069, "token_acc": 0.8510869565217392 }, { "epoch": 2.328458942632171, "grad_norm": 2.279479742050171, "learning_rate": 9.113903087263093e-05, "loss": 0.5211130380630493, "step": 2070, "token_acc": 0.847041847041847 }, { "epoch": 2.329583802024747, "grad_norm": 2.262763023376465, "learning_rate": 9.112845637002505e-05, "loss": 0.48100990056991577, "step": 2071, "token_acc": 0.8367626886145405 }, { "epoch": 2.3307086614173227, "grad_norm": 2.481142282485962, "learning_rate": 9.111787617570815e-05, "loss": 0.5786476731300354, "step": 2072, "token_acc": 0.822360248447205 }, { "epoch": 2.331833520809899, "grad_norm": 2.5791571140289307, "learning_rate": 9.11072902911444e-05, "loss": 0.5470713376998901, "step": 2073, "token_acc": 0.8163841807909604 }, { "epoch": 2.3329583802024745, "grad_norm": 2.479328155517578, "learning_rate": 9.109669871779876e-05, "loss": 0.41489845514297485, "step": 2074, "token_acc": 0.850909090909091 }, { "epoch": 2.3340832395950506, "grad_norm": 2.04787015914917, "learning_rate": 9.108610145713699e-05, "loss": 0.3701897859573364, "step": 2075, "token_acc": 0.8735019973368842 }, { "epoch": 2.3352080989876267, "grad_norm": 2.1525282859802246, "learning_rate": 9.107549851062565e-05, "loss": 0.5278218984603882, "step": 2076, "token_acc": 0.8343881856540084 }, { "epoch": 2.3363329583802024, "grad_norm": 2.5059454441070557, "learning_rate": 9.106488987973203e-05, "loss": 0.6156703233718872, "step": 2077, "token_acc": 0.7990255785627284 }, { "epoch": 2.3374578177727785, "grad_norm": 2.2589147090911865, "learning_rate": 9.105427556592427e-05, "loss": 0.5160384774208069, "step": 2078, "token_acc": 0.8363136176066025 }, { "epoch": 2.338582677165354, "grad_norm": 2.101593255996704, "learning_rate": 9.104365557067128e-05, "loss": 0.6002730131149292, "step": 2079, "token_acc": 0.8039383561643836 }, { "epoch": 2.3397075365579303, "grad_norm": 1.8929715156555176, "learning_rate": 9.103302989544271e-05, "loss": 0.5475412607192993, "step": 2080, "token_acc": 0.8349753694581281 }, { "epoch": 2.340832395950506, "grad_norm": 2.3253610134124756, "learning_rate": 9.102239854170906e-05, "loss": 0.5826221704483032, "step": 2081, "token_acc": 0.8329238329238329 }, { "epoch": 2.341957255343082, "grad_norm": 1.6275783777236938, "learning_rate": 9.101176151094157e-05, "loss": 0.3346983790397644, "step": 2082, "token_acc": 0.8991416309012875 }, { "epoch": 2.343082114735658, "grad_norm": 1.9113969802856445, "learning_rate": 9.100111880461231e-05, "loss": 0.5595675706863403, "step": 2083, "token_acc": 0.8271739130434783 }, { "epoch": 2.344206974128234, "grad_norm": 1.9991494417190552, "learning_rate": 9.099047042419409e-05, "loss": 0.5051677227020264, "step": 2084, "token_acc": 0.8381742738589212 }, { "epoch": 2.34533183352081, "grad_norm": 2.068610429763794, "learning_rate": 9.097981637116053e-05, "loss": 0.6114462614059448, "step": 2085, "token_acc": 0.8091603053435115 }, { "epoch": 2.3464566929133857, "grad_norm": 2.4664878845214844, "learning_rate": 9.096915664698602e-05, "loss": 0.6472258567810059, "step": 2086, "token_acc": 0.7949561403508771 }, { "epoch": 2.3475815523059618, "grad_norm": 2.544793128967285, "learning_rate": 9.095849125314577e-05, "loss": 0.5688963532447815, "step": 2087, "token_acc": 0.8126272912423625 }, { "epoch": 2.3487064116985374, "grad_norm": 2.10718035697937, "learning_rate": 9.094782019111572e-05, "loss": 0.5362620949745178, "step": 2088, "token_acc": 0.8242142025611175 }, { "epoch": 2.3498312710911136, "grad_norm": 1.95978844165802, "learning_rate": 9.093714346237265e-05, "loss": 0.37988466024398804, "step": 2089, "token_acc": 0.8735224586288416 }, { "epoch": 2.3509561304836897, "grad_norm": 1.9974157810211182, "learning_rate": 9.092646106839409e-05, "loss": 0.5776438117027283, "step": 2090, "token_acc": 0.8139963167587477 }, { "epoch": 2.3520809898762653, "grad_norm": 2.499546527862549, "learning_rate": 9.091577301065834e-05, "loss": 0.647847056388855, "step": 2091, "token_acc": 0.8101851851851852 }, { "epoch": 2.3532058492688415, "grad_norm": 2.4835171699523926, "learning_rate": 9.090507929064454e-05, "loss": 0.5057550668716431, "step": 2092, "token_acc": 0.8385772913816689 }, { "epoch": 2.354330708661417, "grad_norm": 2.131324529647827, "learning_rate": 9.089437990983254e-05, "loss": 0.6578232049942017, "step": 2093, "token_acc": 0.7880041365046536 }, { "epoch": 2.3554555680539933, "grad_norm": 2.282881498336792, "learning_rate": 9.088367486970305e-05, "loss": 0.6067376732826233, "step": 2094, "token_acc": 0.7972636815920398 }, { "epoch": 2.3565804274465694, "grad_norm": 2.037916660308838, "learning_rate": 9.087296417173752e-05, "loss": 0.6096198558807373, "step": 2095, "token_acc": 0.8204613841524574 }, { "epoch": 2.357705286839145, "grad_norm": 2.5391745567321777, "learning_rate": 9.086224781741817e-05, "loss": 0.6301350593566895, "step": 2096, "token_acc": 0.7983978638184246 }, { "epoch": 2.358830146231721, "grad_norm": 2.450629472732544, "learning_rate": 9.085152580822803e-05, "loss": 0.5644147396087646, "step": 2097, "token_acc": 0.8361111111111111 }, { "epoch": 2.359955005624297, "grad_norm": 2.617091417312622, "learning_rate": 9.084079814565091e-05, "loss": 0.5534859895706177, "step": 2098, "token_acc": 0.8234398782343988 }, { "epoch": 2.361079865016873, "grad_norm": 2.3960683345794678, "learning_rate": 9.083006483117138e-05, "loss": 0.5314704179763794, "step": 2099, "token_acc": 0.8278236914600551 }, { "epoch": 2.362204724409449, "grad_norm": 2.4264883995056152, "learning_rate": 9.081932586627484e-05, "loss": 0.5082154273986816, "step": 2100, "token_acc": 0.8389955686853766 }, { "epoch": 2.362204724409449, "eval_loss": 0.9530988931655884, "eval_runtime": 31.6719, "eval_samples_per_second": 25.354, "eval_steps_per_second": 3.189, "eval_token_acc": 0.73821301592209, "step": 2100 }, { "epoch": 2.3633295838020247, "grad_norm": 2.238553047180176, "learning_rate": 9.080858125244741e-05, "loss": 0.49215978384017944, "step": 2101, "token_acc": 0.8385922330097088 }, { "epoch": 2.364454443194601, "grad_norm": 2.3955459594726562, "learning_rate": 9.079783099117604e-05, "loss": 0.5862983465194702, "step": 2102, "token_acc": 0.8241610738255034 }, { "epoch": 2.3655793025871765, "grad_norm": 2.3399806022644043, "learning_rate": 9.078707508394844e-05, "loss": 0.6780966520309448, "step": 2103, "token_acc": 0.8 }, { "epoch": 2.3667041619797526, "grad_norm": 2.2456791400909424, "learning_rate": 9.07763135322531e-05, "loss": 0.5827903747558594, "step": 2104, "token_acc": 0.8004484304932735 }, { "epoch": 2.3678290213723283, "grad_norm": 1.9494736194610596, "learning_rate": 9.07655463375793e-05, "loss": 0.4727908968925476, "step": 2105, "token_acc": 0.8610792192881745 }, { "epoch": 2.3689538807649044, "grad_norm": 2.43011736869812, "learning_rate": 9.07547735014171e-05, "loss": 0.6078325510025024, "step": 2106, "token_acc": 0.7962184873949579 }, { "epoch": 2.3700787401574805, "grad_norm": 2.351958990097046, "learning_rate": 9.074399502525735e-05, "loss": 0.5000800490379333, "step": 2107, "token_acc": 0.8403614457831325 }, { "epoch": 2.371203599550056, "grad_norm": 2.0947463512420654, "learning_rate": 9.073321091059163e-05, "loss": 0.611703097820282, "step": 2108, "token_acc": 0.8129194630872483 }, { "epoch": 2.3723284589426323, "grad_norm": 2.12644362449646, "learning_rate": 9.072242115891237e-05, "loss": 0.40011313557624817, "step": 2109, "token_acc": 0.8762626262626263 }, { "epoch": 2.373453318335208, "grad_norm": 2.0433566570281982, "learning_rate": 9.071162577171275e-05, "loss": 0.4517280161380768, "step": 2110, "token_acc": 0.8510416666666667 }, { "epoch": 2.374578177727784, "grad_norm": 2.2240984439849854, "learning_rate": 9.070082475048672e-05, "loss": 0.6016672253608704, "step": 2111, "token_acc": 0.8102345415778252 }, { "epoch": 2.37570303712036, "grad_norm": 2.0595052242279053, "learning_rate": 9.069001809672902e-05, "loss": 0.42386287450790405, "step": 2112, "token_acc": 0.8615384615384616 }, { "epoch": 2.376827896512936, "grad_norm": 2.2159008979797363, "learning_rate": 9.067920581193517e-05, "loss": 0.49067971110343933, "step": 2113, "token_acc": 0.8450184501845018 }, { "epoch": 2.377952755905512, "grad_norm": 1.960582971572876, "learning_rate": 9.066838789760144e-05, "loss": 0.45288512110710144, "step": 2114, "token_acc": 0.8641732283464567 }, { "epoch": 2.3790776152980877, "grad_norm": 2.043515205383301, "learning_rate": 9.065756435522497e-05, "loss": 0.5030186176300049, "step": 2115, "token_acc": 0.8504672897196262 }, { "epoch": 2.380202474690664, "grad_norm": 2.1490564346313477, "learning_rate": 9.064673518630356e-05, "loss": 0.5279697775840759, "step": 2116, "token_acc": 0.8314977973568282 }, { "epoch": 2.3813273340832395, "grad_norm": 2.192131996154785, "learning_rate": 9.063590039233585e-05, "loss": 0.5554949641227722, "step": 2117, "token_acc": 0.8197115384615384 }, { "epoch": 2.3824521934758156, "grad_norm": 1.8743603229522705, "learning_rate": 9.062505997482127e-05, "loss": 0.35933810472488403, "step": 2118, "token_acc": 0.8890339425587467 }, { "epoch": 2.3835770528683913, "grad_norm": 2.007718086242676, "learning_rate": 9.061421393526002e-05, "loss": 0.4812774360179901, "step": 2119, "token_acc": 0.8482532751091703 }, { "epoch": 2.3847019122609674, "grad_norm": 2.0683658123016357, "learning_rate": 9.060336227515303e-05, "loss": 0.5522487759590149, "step": 2120, "token_acc": 0.8142011834319527 }, { "epoch": 2.3858267716535435, "grad_norm": 2.0522501468658447, "learning_rate": 9.059250499600208e-05, "loss": 0.4765737056732178, "step": 2121, "token_acc": 0.8438864628820961 }, { "epoch": 2.386951631046119, "grad_norm": 2.031580686569214, "learning_rate": 9.058164209930968e-05, "loss": 0.4346432089805603, "step": 2122, "token_acc": 0.8574126534466477 }, { "epoch": 2.3880764904386953, "grad_norm": 2.6475577354431152, "learning_rate": 9.057077358657915e-05, "loss": 0.4415957033634186, "step": 2123, "token_acc": 0.8438661710037175 }, { "epoch": 2.389201349831271, "grad_norm": 1.9857639074325562, "learning_rate": 9.055989945931454e-05, "loss": 0.41238659620285034, "step": 2124, "token_acc": 0.8724279835390947 }, { "epoch": 2.390326209223847, "grad_norm": 2.359131097793579, "learning_rate": 9.054901971902072e-05, "loss": 0.5255745649337769, "step": 2125, "token_acc": 0.836986301369863 }, { "epoch": 2.3914510686164228, "grad_norm": 1.7255487442016602, "learning_rate": 9.053813436720334e-05, "loss": 0.3844480514526367, "step": 2126, "token_acc": 0.8831168831168831 }, { "epoch": 2.392575928008999, "grad_norm": 1.8901687860488892, "learning_rate": 9.052724340536876e-05, "loss": 0.5187153816223145, "step": 2127, "token_acc": 0.8388312912346843 }, { "epoch": 2.393700787401575, "grad_norm": 2.099384307861328, "learning_rate": 9.05163468350242e-05, "loss": 0.5846071243286133, "step": 2128, "token_acc": 0.8276190476190476 }, { "epoch": 2.3948256467941507, "grad_norm": 2.0500831604003906, "learning_rate": 9.050544465767762e-05, "loss": 0.4716148376464844, "step": 2129, "token_acc": 0.821301775147929 }, { "epoch": 2.395950506186727, "grad_norm": 2.2685751914978027, "learning_rate": 9.049453687483778e-05, "loss": 0.4802341163158417, "step": 2130, "token_acc": 0.8371777476255088 }, { "epoch": 2.3970753655793025, "grad_norm": 2.0590476989746094, "learning_rate": 9.048362348801414e-05, "loss": 0.52900630235672, "step": 2131, "token_acc": 0.8308051341890315 }, { "epoch": 2.3982002249718786, "grad_norm": 2.0787692070007324, "learning_rate": 9.047270449871704e-05, "loss": 0.489881694316864, "step": 2132, "token_acc": 0.8274687854710556 }, { "epoch": 2.3993250843644542, "grad_norm": 2.36312198638916, "learning_rate": 9.04617799084575e-05, "loss": 0.591330885887146, "step": 2133, "token_acc": 0.8188235294117647 }, { "epoch": 2.4004499437570304, "grad_norm": 2.0515918731689453, "learning_rate": 9.045084971874738e-05, "loss": 0.4586670398712158, "step": 2134, "token_acc": 0.8502994011976048 }, { "epoch": 2.4015748031496065, "grad_norm": 1.7525684833526611, "learning_rate": 9.043991393109929e-05, "loss": 0.35746949911117554, "step": 2135, "token_acc": 0.8944020356234097 }, { "epoch": 2.402699662542182, "grad_norm": 2.121788740158081, "learning_rate": 9.042897254702664e-05, "loss": 0.45427873730659485, "step": 2136, "token_acc": 0.8554360812425329 }, { "epoch": 2.4038245219347583, "grad_norm": 2.359095573425293, "learning_rate": 9.041802556804357e-05, "loss": 0.5966765284538269, "step": 2137, "token_acc": 0.8222523744911805 }, { "epoch": 2.404949381327334, "grad_norm": 2.4311680793762207, "learning_rate": 9.040707299566502e-05, "loss": 0.692163348197937, "step": 2138, "token_acc": 0.7734939759036145 }, { "epoch": 2.40607424071991, "grad_norm": 2.0887293815612793, "learning_rate": 9.03961148314067e-05, "loss": 0.4398709535598755, "step": 2139, "token_acc": 0.857630979498861 }, { "epoch": 2.4071991001124857, "grad_norm": 2.0746121406555176, "learning_rate": 9.038515107678511e-05, "loss": 0.7343307733535767, "step": 2140, "token_acc": 0.7666083916083916 }, { "epoch": 2.408323959505062, "grad_norm": 2.101240634918213, "learning_rate": 9.037418173331751e-05, "loss": 0.4585784673690796, "step": 2141, "token_acc": 0.8465473145780051 }, { "epoch": 2.409448818897638, "grad_norm": 2.3045248985290527, "learning_rate": 9.03632068025219e-05, "loss": 0.5621634721755981, "step": 2142, "token_acc": 0.819557625145518 }, { "epoch": 2.4105736782902136, "grad_norm": 1.962667465209961, "learning_rate": 9.035222628591712e-05, "loss": 0.46481940150260925, "step": 2143, "token_acc": 0.8558456299659478 }, { "epoch": 2.4116985376827897, "grad_norm": 1.864326000213623, "learning_rate": 9.034124018502273e-05, "loss": 0.6502777338027954, "step": 2144, "token_acc": 0.8091190108191654 }, { "epoch": 2.4128233970753654, "grad_norm": 2.3184144496917725, "learning_rate": 9.03302485013591e-05, "loss": 0.5397981405258179, "step": 2145, "token_acc": 0.8362944162436549 }, { "epoch": 2.4139482564679415, "grad_norm": 2.0622899532318115, "learning_rate": 9.031925123644734e-05, "loss": 0.6873006820678711, "step": 2146, "token_acc": 0.8005390835579514 }, { "epoch": 2.415073115860517, "grad_norm": 1.7777502536773682, "learning_rate": 9.030824839180934e-05, "loss": 0.5721862316131592, "step": 2147, "token_acc": 0.8200280112044818 }, { "epoch": 2.4161979752530933, "grad_norm": 2.1199963092803955, "learning_rate": 9.029723996896777e-05, "loss": 0.5540411472320557, "step": 2148, "token_acc": 0.8295711060948081 }, { "epoch": 2.4173228346456694, "grad_norm": 1.9109784364700317, "learning_rate": 9.028622596944609e-05, "loss": 0.5811963081359863, "step": 2149, "token_acc": 0.8263254113345521 }, { "epoch": 2.418447694038245, "grad_norm": 2.098231077194214, "learning_rate": 9.02752063947685e-05, "loss": 0.5456002354621887, "step": 2150, "token_acc": 0.8164493480441324 }, { "epoch": 2.4195725534308212, "grad_norm": 1.7295154333114624, "learning_rate": 9.026418124645998e-05, "loss": 0.39732062816619873, "step": 2151, "token_acc": 0.87122969837587 }, { "epoch": 2.420697412823397, "grad_norm": 2.301957845687866, "learning_rate": 9.025315052604629e-05, "loss": 0.5966585874557495, "step": 2152, "token_acc": 0.8280898876404494 }, { "epoch": 2.421822272215973, "grad_norm": 2.4056270122528076, "learning_rate": 9.024211423505396e-05, "loss": 0.6428985595703125, "step": 2153, "token_acc": 0.8038116591928252 }, { "epoch": 2.4229471316085487, "grad_norm": 2.0823869705200195, "learning_rate": 9.023107237501028e-05, "loss": 0.5243728756904602, "step": 2154, "token_acc": 0.8371859296482412 }, { "epoch": 2.424071991001125, "grad_norm": 2.0733275413513184, "learning_rate": 9.022002494744332e-05, "loss": 0.5555471181869507, "step": 2155, "token_acc": 0.8270833333333333 }, { "epoch": 2.425196850393701, "grad_norm": 2.6883630752563477, "learning_rate": 9.020897195388192e-05, "loss": 0.5741782784461975, "step": 2156, "token_acc": 0.8233576642335766 }, { "epoch": 2.4263217097862766, "grad_norm": 1.94225013256073, "learning_rate": 9.019791339585569e-05, "loss": 0.6021937727928162, "step": 2157, "token_acc": 0.7943336831059811 }, { "epoch": 2.4274465691788527, "grad_norm": 2.0167970657348633, "learning_rate": 9.0186849274895e-05, "loss": 0.5417763590812683, "step": 2158, "token_acc": 0.8285144566301097 }, { "epoch": 2.4285714285714284, "grad_norm": 2.3120133876800537, "learning_rate": 9.017577959253102e-05, "loss": 0.4372285008430481, "step": 2159, "token_acc": 0.8626609442060086 }, { "epoch": 2.4296962879640045, "grad_norm": 2.5655417442321777, "learning_rate": 9.016470435029564e-05, "loss": 0.5865622162818909, "step": 2160, "token_acc": 0.8012820512820513 }, { "epoch": 2.4308211473565806, "grad_norm": 2.029392957687378, "learning_rate": 9.015362354972157e-05, "loss": 0.5276869535446167, "step": 2161, "token_acc": 0.8261316872427984 }, { "epoch": 2.4319460067491563, "grad_norm": 2.1333539485931396, "learning_rate": 9.014253719234226e-05, "loss": 0.582217812538147, "step": 2162, "token_acc": 0.8217054263565892 }, { "epoch": 2.4330708661417324, "grad_norm": 2.028465747833252, "learning_rate": 9.013144527969192e-05, "loss": 0.5871524214744568, "step": 2163, "token_acc": 0.8266166822867854 }, { "epoch": 2.434195725534308, "grad_norm": 1.8183387517929077, "learning_rate": 9.012034781330558e-05, "loss": 0.6891286373138428, "step": 2164, "token_acc": 0.7911439114391144 }, { "epoch": 2.435320584926884, "grad_norm": 2.2594046592712402, "learning_rate": 9.010924479471897e-05, "loss": 0.5703680515289307, "step": 2165, "token_acc": 0.8214765100671141 }, { "epoch": 2.4364454443194603, "grad_norm": 2.324141502380371, "learning_rate": 9.009813622546863e-05, "loss": 0.6015094518661499, "step": 2166, "token_acc": 0.820577164366374 }, { "epoch": 2.437570303712036, "grad_norm": 2.1335089206695557, "learning_rate": 9.008702210709188e-05, "loss": 0.6087887287139893, "step": 2167, "token_acc": 0.8115942028985508 }, { "epoch": 2.438695163104612, "grad_norm": 2.025693893432617, "learning_rate": 9.007590244112676e-05, "loss": 0.5454702377319336, "step": 2168, "token_acc": 0.8269230769230769 }, { "epoch": 2.4398200224971878, "grad_norm": 1.9149949550628662, "learning_rate": 9.006477722911211e-05, "loss": 0.6061416864395142, "step": 2169, "token_acc": 0.8120967741935484 }, { "epoch": 2.440944881889764, "grad_norm": 2.4803714752197266, "learning_rate": 9.005364647258755e-05, "loss": 0.4955855906009674, "step": 2170, "token_acc": 0.8459016393442623 }, { "epoch": 2.4420697412823396, "grad_norm": 2.2255547046661377, "learning_rate": 9.004251017309344e-05, "loss": 0.6101786494255066, "step": 2171, "token_acc": 0.8112407211028632 }, { "epoch": 2.4431946006749157, "grad_norm": 2.4368999004364014, "learning_rate": 9.003136833217091e-05, "loss": 0.5622411966323853, "step": 2172, "token_acc": 0.8185654008438819 }, { "epoch": 2.444319460067492, "grad_norm": 2.0048842430114746, "learning_rate": 9.002022095136187e-05, "loss": 0.464627206325531, "step": 2173, "token_acc": 0.8632855567805954 }, { "epoch": 2.4454443194600675, "grad_norm": 2.0170974731445312, "learning_rate": 9.0009068032209e-05, "loss": 0.3716835677623749, "step": 2174, "token_acc": 0.8813559322033898 }, { "epoch": 2.4465691788526436, "grad_norm": 1.7867763042449951, "learning_rate": 8.999790957625573e-05, "loss": 0.37666794657707214, "step": 2175, "token_acc": 0.872848948374761 }, { "epoch": 2.4476940382452193, "grad_norm": 1.6906224489212036, "learning_rate": 8.998674558504626e-05, "loss": 0.36458146572113037, "step": 2176, "token_acc": 0.8843873517786561 }, { "epoch": 2.4488188976377954, "grad_norm": 2.2450578212738037, "learning_rate": 8.997557606012556e-05, "loss": 0.5701820850372314, "step": 2177, "token_acc": 0.8156028368794326 }, { "epoch": 2.449943757030371, "grad_norm": 1.8268944025039673, "learning_rate": 8.996440100303938e-05, "loss": 0.40541401505470276, "step": 2178, "token_acc": 0.8626834381551363 }, { "epoch": 2.451068616422947, "grad_norm": 2.4915926456451416, "learning_rate": 8.995322041533419e-05, "loss": 0.4890587627887726, "step": 2179, "token_acc": 0.8299595141700404 }, { "epoch": 2.4521934758155233, "grad_norm": 2.354952812194824, "learning_rate": 8.994203429855728e-05, "loss": 0.5869815349578857, "step": 2180, "token_acc": 0.8307692307692308 }, { "epoch": 2.453318335208099, "grad_norm": 2.259944200515747, "learning_rate": 8.993084265425669e-05, "loss": 0.5955942869186401, "step": 2181, "token_acc": 0.8186274509803921 }, { "epoch": 2.454443194600675, "grad_norm": 2.416679859161377, "learning_rate": 8.99196454839812e-05, "loss": 0.5787041187286377, "step": 2182, "token_acc": 0.8188494492044064 }, { "epoch": 2.4555680539932507, "grad_norm": 2.545804023742676, "learning_rate": 8.990844278928037e-05, "loss": 0.6119397878646851, "step": 2183, "token_acc": 0.8020527859237536 }, { "epoch": 2.456692913385827, "grad_norm": 2.0352492332458496, "learning_rate": 8.989723457170452e-05, "loss": 0.4763561487197876, "step": 2184, "token_acc": 0.8347996089931574 }, { "epoch": 2.4578177727784025, "grad_norm": 2.3495383262634277, "learning_rate": 8.988602083280476e-05, "loss": 0.4939883351325989, "step": 2185, "token_acc": 0.8450184501845018 }, { "epoch": 2.4589426321709786, "grad_norm": 2.453570604324341, "learning_rate": 8.987480157413293e-05, "loss": 0.6512126326560974, "step": 2186, "token_acc": 0.8009367681498829 }, { "epoch": 2.4600674915635548, "grad_norm": 1.8602850437164307, "learning_rate": 8.986357679724166e-05, "loss": 0.6084855794906616, "step": 2187, "token_acc": 0.808885754583921 }, { "epoch": 2.4611923509561304, "grad_norm": 2.129620313644409, "learning_rate": 8.98523465036843e-05, "loss": 0.5477997064590454, "step": 2188, "token_acc": 0.8274058577405857 }, { "epoch": 2.4623172103487065, "grad_norm": 2.0999560356140137, "learning_rate": 8.984111069501504e-05, "loss": 0.4647306799888611, "step": 2189, "token_acc": 0.8469945355191257 }, { "epoch": 2.463442069741282, "grad_norm": 2.151183843612671, "learning_rate": 8.982986937278876e-05, "loss": 0.6078482866287231, "step": 2190, "token_acc": 0.8050682261208577 }, { "epoch": 2.4645669291338583, "grad_norm": 2.523965835571289, "learning_rate": 8.981862253856115e-05, "loss": 0.7040865421295166, "step": 2191, "token_acc": 0.7905824039653035 }, { "epoch": 2.465691788526434, "grad_norm": 2.3030173778533936, "learning_rate": 8.980737019388862e-05, "loss": 0.5374072790145874, "step": 2192, "token_acc": 0.8293706293706293 }, { "epoch": 2.46681664791901, "grad_norm": 2.1421008110046387, "learning_rate": 8.979611234032837e-05, "loss": 0.6147312521934509, "step": 2193, "token_acc": 0.8127962085308057 }, { "epoch": 2.4679415073115862, "grad_norm": 2.2688748836517334, "learning_rate": 8.978484897943838e-05, "loss": 0.6242407560348511, "step": 2194, "token_acc": 0.8115577889447236 }, { "epoch": 2.469066366704162, "grad_norm": 2.8052563667297363, "learning_rate": 8.977358011277736e-05, "loss": 0.6678999066352844, "step": 2195, "token_acc": 0.795131845841785 }, { "epoch": 2.470191226096738, "grad_norm": 2.3097620010375977, "learning_rate": 8.976230574190479e-05, "loss": 0.6110339164733887, "step": 2196, "token_acc": 0.8045563549160671 }, { "epoch": 2.4713160854893137, "grad_norm": 2.0675065517425537, "learning_rate": 8.975102586838091e-05, "loss": 0.5773439407348633, "step": 2197, "token_acc": 0.8198952879581152 }, { "epoch": 2.47244094488189, "grad_norm": 1.8007458448410034, "learning_rate": 8.973974049376674e-05, "loss": 0.4741649627685547, "step": 2198, "token_acc": 0.8571428571428571 }, { "epoch": 2.4735658042744655, "grad_norm": 1.891829490661621, "learning_rate": 8.972844961962404e-05, "loss": 0.5557007789611816, "step": 2199, "token_acc": 0.8330308529945554 }, { "epoch": 2.4746906636670416, "grad_norm": 2.4702274799346924, "learning_rate": 8.971715324751533e-05, "loss": 0.5693678855895996, "step": 2200, "token_acc": 0.8145780051150895 }, { "epoch": 2.4758155230596177, "grad_norm": 1.999772071838379, "learning_rate": 8.970585137900391e-05, "loss": 0.5418729186058044, "step": 2201, "token_acc": 0.8138528138528138 }, { "epoch": 2.4769403824521934, "grad_norm": 2.370666980743408, "learning_rate": 8.969454401565385e-05, "loss": 0.5553703308105469, "step": 2202, "token_acc": 0.8094594594594594 }, { "epoch": 2.4780652418447695, "grad_norm": 2.1048712730407715, "learning_rate": 8.968323115902992e-05, "loss": 0.5494575500488281, "step": 2203, "token_acc": 0.8344370860927153 }, { "epoch": 2.479190101237345, "grad_norm": 2.0831964015960693, "learning_rate": 8.967191281069772e-05, "loss": 0.57783442735672, "step": 2204, "token_acc": 0.8148984198645598 }, { "epoch": 2.4803149606299213, "grad_norm": 2.03066349029541, "learning_rate": 8.966058897222356e-05, "loss": 0.5698750019073486, "step": 2205, "token_acc": 0.8320610687022901 }, { "epoch": 2.481439820022497, "grad_norm": 1.788527011871338, "learning_rate": 8.964925964517454e-05, "loss": 0.41955602169036865, "step": 2206, "token_acc": 0.8623435722411832 }, { "epoch": 2.482564679415073, "grad_norm": 2.5586893558502197, "learning_rate": 8.963792483111853e-05, "loss": 0.5625073313713074, "step": 2207, "token_acc": 0.815668202764977 }, { "epoch": 2.483689538807649, "grad_norm": 2.0632050037384033, "learning_rate": 8.962658453162411e-05, "loss": 0.7405001521110535, "step": 2208, "token_acc": 0.7762938230383973 }, { "epoch": 2.484814398200225, "grad_norm": 2.3916232585906982, "learning_rate": 8.961523874826065e-05, "loss": 0.5138804912567139, "step": 2209, "token_acc": 0.8227272727272728 }, { "epoch": 2.485939257592801, "grad_norm": 2.1652073860168457, "learning_rate": 8.960388748259832e-05, "loss": 0.5413231253623962, "step": 2210, "token_acc": 0.8414043583535109 }, { "epoch": 2.4870641169853767, "grad_norm": 2.450326681137085, "learning_rate": 8.959253073620794e-05, "loss": 0.6875571012496948, "step": 2211, "token_acc": 0.7924720244150559 }, { "epoch": 2.4881889763779528, "grad_norm": 2.1616721153259277, "learning_rate": 8.95811685106612e-05, "loss": 0.49639642238616943, "step": 2212, "token_acc": 0.8425806451612903 }, { "epoch": 2.4893138357705284, "grad_norm": 2.3184757232666016, "learning_rate": 8.956980080753049e-05, "loss": 0.5133970975875854, "step": 2213, "token_acc": 0.8361344537815126 }, { "epoch": 2.4904386951631046, "grad_norm": 2.491267442703247, "learning_rate": 8.955842762838897e-05, "loss": 0.5099117159843445, "step": 2214, "token_acc": 0.837037037037037 }, { "epoch": 2.4915635545556807, "grad_norm": 2.1189935207366943, "learning_rate": 8.954704897481056e-05, "loss": 0.5240675210952759, "step": 2215, "token_acc": 0.834056399132321 }, { "epoch": 2.4926884139482564, "grad_norm": 1.9854788780212402, "learning_rate": 8.953566484836995e-05, "loss": 0.4292532801628113, "step": 2216, "token_acc": 0.86625 }, { "epoch": 2.4938132733408325, "grad_norm": 2.3678207397460938, "learning_rate": 8.952427525064256e-05, "loss": 0.5348440408706665, "step": 2217, "token_acc": 0.8091334894613583 }, { "epoch": 2.494938132733408, "grad_norm": 2.190084457397461, "learning_rate": 8.951288018320457e-05, "loss": 0.5319751501083374, "step": 2218, "token_acc": 0.8211845102505695 }, { "epoch": 2.4960629921259843, "grad_norm": 2.1799023151397705, "learning_rate": 8.950147964763294e-05, "loss": 0.7197425365447998, "step": 2219, "token_acc": 0.7850877192982456 }, { "epoch": 2.49718785151856, "grad_norm": 1.8469575643539429, "learning_rate": 8.949007364550538e-05, "loss": 0.5214238166809082, "step": 2220, "token_acc": 0.8512773722627737 }, { "epoch": 2.498312710911136, "grad_norm": 2.037092685699463, "learning_rate": 8.947866217840034e-05, "loss": 0.6242448091506958, "step": 2221, "token_acc": 0.8086802480070859 }, { "epoch": 2.499437570303712, "grad_norm": 2.3754823207855225, "learning_rate": 8.946724524789705e-05, "loss": 0.5611311197280884, "step": 2222, "token_acc": 0.8160621761658031 }, { "epoch": 2.500562429696288, "grad_norm": 1.907645344734192, "learning_rate": 8.945582285557548e-05, "loss": 0.5221796631813049, "step": 2223, "token_acc": 0.8371454711802379 }, { "epoch": 2.501687289088864, "grad_norm": 2.1249117851257324, "learning_rate": 8.944439500301633e-05, "loss": 0.4026978015899658, "step": 2224, "token_acc": 0.8545454545454545 }, { "epoch": 2.50281214848144, "grad_norm": 2.084609270095825, "learning_rate": 8.943296169180114e-05, "loss": 0.55014568567276, "step": 2225, "token_acc": 0.8213483146067416 }, { "epoch": 2.5039370078740157, "grad_norm": 2.4203410148620605, "learning_rate": 8.942152292351212e-05, "loss": 0.4624706506729126, "step": 2226, "token_acc": 0.848714069591528 }, { "epoch": 2.5050618672665914, "grad_norm": 1.9188058376312256, "learning_rate": 8.941007869973224e-05, "loss": 0.4867342412471771, "step": 2227, "token_acc": 0.8395196506550219 }, { "epoch": 2.5061867266591675, "grad_norm": 2.4566354751586914, "learning_rate": 8.93986290220453e-05, "loss": 0.6982690691947937, "step": 2228, "token_acc": 0.7836185819070904 }, { "epoch": 2.5073115860517436, "grad_norm": 1.7535227537155151, "learning_rate": 8.938717389203577e-05, "loss": 0.7844363451004028, "step": 2229, "token_acc": 0.779467680608365 }, { "epoch": 2.5084364454443193, "grad_norm": 2.1119861602783203, "learning_rate": 8.937571331128893e-05, "loss": 0.5943514704704285, "step": 2230, "token_acc": 0.8063891577928364 }, { "epoch": 2.5095613048368954, "grad_norm": 1.8583616018295288, "learning_rate": 8.936424728139079e-05, "loss": 0.5118105411529541, "step": 2231, "token_acc": 0.8599382080329557 }, { "epoch": 2.5106861642294716, "grad_norm": 1.984112024307251, "learning_rate": 8.935277580392812e-05, "loss": 0.4215822219848633, "step": 2232, "token_acc": 0.8668373879641486 }, { "epoch": 2.5118110236220472, "grad_norm": 2.071572780609131, "learning_rate": 8.934129888048842e-05, "loss": 0.48543888330459595, "step": 2233, "token_acc": 0.8472584856396866 }, { "epoch": 2.512935883014623, "grad_norm": 2.0290465354919434, "learning_rate": 8.932981651265997e-05, "loss": 0.446824312210083, "step": 2234, "token_acc": 0.8432835820895522 }, { "epoch": 2.514060742407199, "grad_norm": 1.9236445426940918, "learning_rate": 8.931832870203182e-05, "loss": 0.5412256121635437, "step": 2235, "token_acc": 0.8335056876938987 }, { "epoch": 2.515185601799775, "grad_norm": 2.331292152404785, "learning_rate": 8.930683545019377e-05, "loss": 0.5156001448631287, "step": 2236, "token_acc": 0.819327731092437 }, { "epoch": 2.516310461192351, "grad_norm": 2.167912483215332, "learning_rate": 8.929533675873631e-05, "loss": 0.547976553440094, "step": 2237, "token_acc": 0.8282352941176471 }, { "epoch": 2.517435320584927, "grad_norm": 2.0968568325042725, "learning_rate": 8.928383262925075e-05, "loss": 0.5561610460281372, "step": 2238, "token_acc": 0.8130530973451328 }, { "epoch": 2.518560179977503, "grad_norm": 2.151181936264038, "learning_rate": 8.927232306332914e-05, "loss": 0.44148361682891846, "step": 2239, "token_acc": 0.8554913294797688 }, { "epoch": 2.5196850393700787, "grad_norm": 2.2203361988067627, "learning_rate": 8.926080806256424e-05, "loss": 0.49140292406082153, "step": 2240, "token_acc": 0.8428405122235157 }, { "epoch": 2.520809898762655, "grad_norm": 2.0761613845825195, "learning_rate": 8.924928762854963e-05, "loss": 0.4892350435256958, "step": 2241, "token_acc": 0.834326579261025 }, { "epoch": 2.5219347581552305, "grad_norm": 2.251380205154419, "learning_rate": 8.92377617628796e-05, "loss": 0.5050259232521057, "step": 2242, "token_acc": 0.8409638554216867 }, { "epoch": 2.5230596175478066, "grad_norm": 2.1098592281341553, "learning_rate": 8.922623046714918e-05, "loss": 0.4277192950248718, "step": 2243, "token_acc": 0.8569604086845466 }, { "epoch": 2.5241844769403823, "grad_norm": 2.065439462661743, "learning_rate": 8.921469374295419e-05, "loss": 0.5240780115127563, "step": 2244, "token_acc": 0.8395657418576599 }, { "epoch": 2.5253093363329584, "grad_norm": 2.4990029335021973, "learning_rate": 8.920315159189116e-05, "loss": 0.7560194730758667, "step": 2245, "token_acc": 0.7757225433526012 }, { "epoch": 2.5264341957255345, "grad_norm": 2.0936672687530518, "learning_rate": 8.919160401555741e-05, "loss": 0.4347074329853058, "step": 2246, "token_acc": 0.8534107402031931 }, { "epoch": 2.52755905511811, "grad_norm": 2.2240524291992188, "learning_rate": 8.918005101555102e-05, "loss": 0.38096287846565247, "step": 2247, "token_acc": 0.8590831918505942 }, { "epoch": 2.5286839145106863, "grad_norm": 2.0929627418518066, "learning_rate": 8.916849259347074e-05, "loss": 0.523375928401947, "step": 2248, "token_acc": 0.8290094339622641 }, { "epoch": 2.529808773903262, "grad_norm": 2.4452154636383057, "learning_rate": 8.915692875091615e-05, "loss": 0.5948878526687622, "step": 2249, "token_acc": 0.8066666666666666 }, { "epoch": 2.530933633295838, "grad_norm": 1.9282021522521973, "learning_rate": 8.914535948948756e-05, "loss": 0.4383208155632019, "step": 2250, "token_acc": 0.8536335721596725 }, { "epoch": 2.5320584926884138, "grad_norm": 2.223580837249756, "learning_rate": 8.913378481078602e-05, "loss": 0.42973437905311584, "step": 2251, "token_acc": 0.8609467455621301 }, { "epoch": 2.53318335208099, "grad_norm": 2.467116594314575, "learning_rate": 8.912220471641333e-05, "loss": 0.6022491455078125, "step": 2252, "token_acc": 0.8157894736842105 }, { "epoch": 2.534308211473566, "grad_norm": 2.099886655807495, "learning_rate": 8.911061920797204e-05, "loss": 0.6043612957000732, "step": 2253, "token_acc": 0.829938900203666 }, { "epoch": 2.5354330708661417, "grad_norm": 2.148611545562744, "learning_rate": 8.909902828706546e-05, "loss": 0.5281271934509277, "step": 2254, "token_acc": 0.8328912466843501 }, { "epoch": 2.536557930258718, "grad_norm": 2.308431625366211, "learning_rate": 8.908743195529766e-05, "loss": 0.47263479232788086, "step": 2255, "token_acc": 0.8556998556998557 }, { "epoch": 2.5376827896512935, "grad_norm": 1.708796501159668, "learning_rate": 8.90758302142734e-05, "loss": 0.44863349199295044, "step": 2256, "token_acc": 0.8519553072625698 }, { "epoch": 2.5388076490438696, "grad_norm": 2.0539321899414062, "learning_rate": 8.906422306559823e-05, "loss": 0.5641480684280396, "step": 2257, "token_acc": 0.8257993384785005 }, { "epoch": 2.5399325084364452, "grad_norm": 2.017099142074585, "learning_rate": 8.905261051087849e-05, "loss": 0.45900899171829224, "step": 2258, "token_acc": 0.8658227848101265 }, { "epoch": 2.5410573678290214, "grad_norm": 2.068955659866333, "learning_rate": 8.904099255172121e-05, "loss": 0.5423837304115295, "step": 2259, "token_acc": 0.8316831683168316 }, { "epoch": 2.5421822272215975, "grad_norm": 1.7688682079315186, "learning_rate": 8.902936918973415e-05, "loss": 0.49332523345947266, "step": 2260, "token_acc": 0.8481262327416174 }, { "epoch": 2.543307086614173, "grad_norm": 2.351844549179077, "learning_rate": 8.90177404265259e-05, "loss": 0.6062198877334595, "step": 2261, "token_acc": 0.8168028004667445 }, { "epoch": 2.5444319460067493, "grad_norm": 2.327388048171997, "learning_rate": 8.900610626370572e-05, "loss": 0.5990354418754578, "step": 2262, "token_acc": 0.813953488372093 }, { "epoch": 2.545556805399325, "grad_norm": 1.8483651876449585, "learning_rate": 8.899446670288365e-05, "loss": 0.44928544759750366, "step": 2263, "token_acc": 0.8553191489361702 }, { "epoch": 2.546681664791901, "grad_norm": 2.202739715576172, "learning_rate": 8.898282174567047e-05, "loss": 0.57154381275177, "step": 2264, "token_acc": 0.8221970554926388 }, { "epoch": 2.5478065241844767, "grad_norm": 1.7276766300201416, "learning_rate": 8.897117139367772e-05, "loss": 0.40321043133735657, "step": 2265, "token_acc": 0.86977648202138 }, { "epoch": 2.548931383577053, "grad_norm": 2.288198709487915, "learning_rate": 8.895951564851767e-05, "loss": 0.6253942251205444, "step": 2266, "token_acc": 0.803030303030303 }, { "epoch": 2.550056242969629, "grad_norm": 2.045196056365967, "learning_rate": 8.894785451180334e-05, "loss": 0.4774344861507416, "step": 2267, "token_acc": 0.8507462686567164 }, { "epoch": 2.5511811023622046, "grad_norm": 2.329496383666992, "learning_rate": 8.893618798514852e-05, "loss": 0.5160833597183228, "step": 2268, "token_acc": 0.8323262839879154 }, { "epoch": 2.5523059617547807, "grad_norm": 2.4179861545562744, "learning_rate": 8.892451607016768e-05, "loss": 0.5043627023696899, "step": 2269, "token_acc": 0.8256624825662483 }, { "epoch": 2.5534308211473564, "grad_norm": 1.9226417541503906, "learning_rate": 8.891283876847612e-05, "loss": 0.5325154066085815, "step": 2270, "token_acc": 0.8300359712230215 }, { "epoch": 2.5545556805399325, "grad_norm": 2.7535181045532227, "learning_rate": 8.890115608168984e-05, "loss": 0.574966549873352, "step": 2271, "token_acc": 0.8363047001620746 }, { "epoch": 2.555680539932508, "grad_norm": 2.1693027019500732, "learning_rate": 8.888946801142558e-05, "loss": 0.7055951356887817, "step": 2272, "token_acc": 0.7954345917471466 }, { "epoch": 2.5568053993250843, "grad_norm": 1.838950514793396, "learning_rate": 8.887777455930084e-05, "loss": 0.39474010467529297, "step": 2273, "token_acc": 0.8701594533029613 }, { "epoch": 2.5579302587176604, "grad_norm": 2.165261745452881, "learning_rate": 8.886607572693386e-05, "loss": 0.5237976908683777, "step": 2274, "token_acc": 0.823271130625686 }, { "epoch": 2.559055118110236, "grad_norm": 2.5065383911132812, "learning_rate": 8.885437151594362e-05, "loss": 0.5734295845031738, "step": 2275, "token_acc": 0.8251445086705202 }, { "epoch": 2.5601799775028122, "grad_norm": 2.3552467823028564, "learning_rate": 8.884266192794987e-05, "loss": 0.6262892484664917, "step": 2276, "token_acc": 0.8036890645586298 }, { "epoch": 2.5613048368953883, "grad_norm": 2.195756196975708, "learning_rate": 8.883094696457307e-05, "loss": 0.5826191902160645, "step": 2277, "token_acc": 0.8210757409440176 }, { "epoch": 2.562429696287964, "grad_norm": 2.8804128170013428, "learning_rate": 8.881922662743442e-05, "loss": 0.6199411153793335, "step": 2278, "token_acc": 0.8042813455657493 }, { "epoch": 2.5635545556805397, "grad_norm": 2.479398727416992, "learning_rate": 8.880750091815593e-05, "loss": 0.642818808555603, "step": 2279, "token_acc": 0.8102409638554217 }, { "epoch": 2.564679415073116, "grad_norm": 1.9688870906829834, "learning_rate": 8.879576983836025e-05, "loss": 0.5848183631896973, "step": 2280, "token_acc": 0.8216818642350557 }, { "epoch": 2.565804274465692, "grad_norm": 2.115251064300537, "learning_rate": 8.878403338967088e-05, "loss": 0.5283672213554382, "step": 2281, "token_acc": 0.8291571753986332 }, { "epoch": 2.5669291338582676, "grad_norm": 2.004777431488037, "learning_rate": 8.877229157371195e-05, "loss": 0.515012800693512, "step": 2282, "token_acc": 0.8434782608695652 }, { "epoch": 2.5680539932508437, "grad_norm": 1.8755625486373901, "learning_rate": 8.876054439210844e-05, "loss": 0.36012569069862366, "step": 2283, "token_acc": 0.8761776581426649 }, { "epoch": 2.56917885264342, "grad_norm": 2.3052425384521484, "learning_rate": 8.874879184648603e-05, "loss": 0.6255204677581787, "step": 2284, "token_acc": 0.8036951501154734 }, { "epoch": 2.5703037120359955, "grad_norm": 1.8366637229919434, "learning_rate": 8.873703393847111e-05, "loss": 0.5503592491149902, "step": 2285, "token_acc": 0.8164763458401305 }, { "epoch": 2.571428571428571, "grad_norm": 2.073563814163208, "learning_rate": 8.872527066969085e-05, "loss": 0.5613895058631897, "step": 2286, "token_acc": 0.8136645962732919 }, { "epoch": 2.5725534308211473, "grad_norm": 1.7097514867782593, "learning_rate": 8.871350204177317e-05, "loss": 0.4560651183128357, "step": 2287, "token_acc": 0.8471971066907775 }, { "epoch": 2.5736782902137234, "grad_norm": 2.277705430984497, "learning_rate": 8.870172805634669e-05, "loss": 0.470863401889801, "step": 2288, "token_acc": 0.8410404624277457 }, { "epoch": 2.574803149606299, "grad_norm": 1.9284517765045166, "learning_rate": 8.868994871504082e-05, "loss": 0.5279775857925415, "step": 2289, "token_acc": 0.8383733055265902 }, { "epoch": 2.575928008998875, "grad_norm": 2.222261905670166, "learning_rate": 8.867816401948566e-05, "loss": 0.5077803730964661, "step": 2290, "token_acc": 0.8406676783004552 }, { "epoch": 2.5770528683914513, "grad_norm": 1.9752064943313599, "learning_rate": 8.86663739713121e-05, "loss": 0.4674282968044281, "step": 2291, "token_acc": 0.84375 }, { "epoch": 2.578177727784027, "grad_norm": 2.2226648330688477, "learning_rate": 8.865457857215173e-05, "loss": 0.5314900875091553, "step": 2292, "token_acc": 0.8266360505166476 }, { "epoch": 2.5793025871766027, "grad_norm": 2.22822642326355, "learning_rate": 8.864277782363693e-05, "loss": 0.5224112272262573, "step": 2293, "token_acc": 0.8181818181818182 }, { "epoch": 2.5804274465691788, "grad_norm": 2.164950132369995, "learning_rate": 8.863097172740076e-05, "loss": 0.5346912145614624, "step": 2294, "token_acc": 0.8232662192393736 }, { "epoch": 2.581552305961755, "grad_norm": 1.9468154907226562, "learning_rate": 8.861916028507707e-05, "loss": 0.6060233116149902, "step": 2295, "token_acc": 0.8155416012558869 }, { "epoch": 2.5826771653543306, "grad_norm": 2.475328207015991, "learning_rate": 8.86073434983004e-05, "loss": 0.499397337436676, "step": 2296, "token_acc": 0.8302158273381295 }, { "epoch": 2.5838020247469067, "grad_norm": 2.4805831909179688, "learning_rate": 8.859552136870608e-05, "loss": 0.48186400532722473, "step": 2297, "token_acc": 0.8411330049261084 }, { "epoch": 2.584926884139483, "grad_norm": 2.1111502647399902, "learning_rate": 8.858369389793015e-05, "loss": 0.523478627204895, "step": 2298, "token_acc": 0.8378712871287128 }, { "epoch": 2.5860517435320585, "grad_norm": 1.9061323404312134, "learning_rate": 8.857186108760942e-05, "loss": 0.6116962432861328, "step": 2299, "token_acc": 0.8118081180811808 }, { "epoch": 2.5871766029246346, "grad_norm": 2.4486236572265625, "learning_rate": 8.85600229393814e-05, "loss": 0.4898260235786438, "step": 2300, "token_acc": 0.8338762214983714 }, { "epoch": 2.5883014623172103, "grad_norm": 2.0733680725097656, "learning_rate": 8.854817945488434e-05, "loss": 0.60550457239151, "step": 2301, "token_acc": 0.8288364249578415 }, { "epoch": 2.5894263217097864, "grad_norm": 1.8105976581573486, "learning_rate": 8.853633063575727e-05, "loss": 0.4010845422744751, "step": 2302, "token_acc": 0.8760964912280702 }, { "epoch": 2.590551181102362, "grad_norm": 2.118316173553467, "learning_rate": 8.852447648363992e-05, "loss": 0.554383397102356, "step": 2303, "token_acc": 0.8181818181818182 }, { "epoch": 2.591676040494938, "grad_norm": 2.4020230770111084, "learning_rate": 8.851261700017276e-05, "loss": 0.5294139981269836, "step": 2304, "token_acc": 0.8293299620733249 }, { "epoch": 2.5928008998875143, "grad_norm": 2.1320931911468506, "learning_rate": 8.850075218699702e-05, "loss": 0.5598171949386597, "step": 2305, "token_acc": 0.8211991434689507 }, { "epoch": 2.59392575928009, "grad_norm": 2.2755813598632812, "learning_rate": 8.848888204575467e-05, "loss": 0.44282475113868713, "step": 2306, "token_acc": 0.856508875739645 }, { "epoch": 2.595050618672666, "grad_norm": 2.0766096115112305, "learning_rate": 8.847700657808839e-05, "loss": 0.59539794921875, "step": 2307, "token_acc": 0.8223255813953488 }, { "epoch": 2.5961754780652417, "grad_norm": 1.903409719467163, "learning_rate": 8.846512578564159e-05, "loss": 0.45194825530052185, "step": 2308, "token_acc": 0.8559411146161935 }, { "epoch": 2.597300337457818, "grad_norm": 1.9000309705734253, "learning_rate": 8.845323967005845e-05, "loss": 0.4393186569213867, "step": 2309, "token_acc": 0.8557046979865772 }, { "epoch": 2.5984251968503935, "grad_norm": 2.2464518547058105, "learning_rate": 8.844134823298389e-05, "loss": 0.5389447808265686, "step": 2310, "token_acc": 0.8177920685959271 }, { "epoch": 2.5995500562429696, "grad_norm": 2.1742606163024902, "learning_rate": 8.842945147606351e-05, "loss": 0.3924875259399414, "step": 2311, "token_acc": 0.877643504531722 }, { "epoch": 2.6006749156355458, "grad_norm": 2.1857266426086426, "learning_rate": 8.841754940094374e-05, "loss": 0.5330128073692322, "step": 2312, "token_acc": 0.8323494687131051 }, { "epoch": 2.6017997750281214, "grad_norm": 1.8520770072937012, "learning_rate": 8.840564200927166e-05, "loss": 0.4170076847076416, "step": 2313, "token_acc": 0.8565891472868217 }, { "epoch": 2.6029246344206975, "grad_norm": 2.259075880050659, "learning_rate": 8.839372930269512e-05, "loss": 0.6177486181259155, "step": 2314, "token_acc": 0.7983193277310925 }, { "epoch": 2.604049493813273, "grad_norm": 2.0092520713806152, "learning_rate": 8.838181128286269e-05, "loss": 0.3606550991535187, "step": 2315, "token_acc": 0.9 }, { "epoch": 2.6051743532058493, "grad_norm": 1.9811819791793823, "learning_rate": 8.836988795142373e-05, "loss": 0.5613416433334351, "step": 2316, "token_acc": 0.8161634103019538 }, { "epoch": 2.606299212598425, "grad_norm": 2.230940341949463, "learning_rate": 8.835795931002825e-05, "loss": 0.5507628321647644, "step": 2317, "token_acc": 0.8244514106583072 }, { "epoch": 2.607424071991001, "grad_norm": 2.167011260986328, "learning_rate": 8.834602536032705e-05, "loss": 0.4994688332080841, "step": 2318, "token_acc": 0.834319526627219 }, { "epoch": 2.6085489313835772, "grad_norm": 2.3164494037628174, "learning_rate": 8.833408610397167e-05, "loss": 0.6251778602600098, "step": 2319, "token_acc": 0.8050955414012739 }, { "epoch": 2.609673790776153, "grad_norm": 2.1808369159698486, "learning_rate": 8.832214154261435e-05, "loss": 0.5878485441207886, "step": 2320, "token_acc": 0.8203285420944558 }, { "epoch": 2.610798650168729, "grad_norm": 2.373159408569336, "learning_rate": 8.831019167790808e-05, "loss": 0.5816386342048645, "step": 2321, "token_acc": 0.8209718670076727 }, { "epoch": 2.6119235095613047, "grad_norm": 2.3098068237304688, "learning_rate": 8.829823651150661e-05, "loss": 0.6485542058944702, "step": 2322, "token_acc": 0.7867132867132867 }, { "epoch": 2.613048368953881, "grad_norm": 2.2974281311035156, "learning_rate": 8.828627604506439e-05, "loss": 0.7254525423049927, "step": 2323, "token_acc": 0.7904328018223234 }, { "epoch": 2.6141732283464565, "grad_norm": 2.1619153022766113, "learning_rate": 8.827431028023658e-05, "loss": 0.5022085309028625, "step": 2324, "token_acc": 0.8397435897435898 }, { "epoch": 2.6152980877390326, "grad_norm": 1.9094500541687012, "learning_rate": 8.826233921867914e-05, "loss": 0.5457830429077148, "step": 2325, "token_acc": 0.8182674199623352 }, { "epoch": 2.6164229471316087, "grad_norm": 2.279531478881836, "learning_rate": 8.825036286204872e-05, "loss": 0.5543209314346313, "step": 2326, "token_acc": 0.8118466898954704 }, { "epoch": 2.6175478065241844, "grad_norm": 2.209101438522339, "learning_rate": 8.823838121200271e-05, "loss": 0.3846828043460846, "step": 2327, "token_acc": 0.8711256117455138 }, { "epoch": 2.6186726659167605, "grad_norm": 2.1744511127471924, "learning_rate": 8.822639427019923e-05, "loss": 0.4957914352416992, "step": 2328, "token_acc": 0.8357380688124306 }, { "epoch": 2.619797525309336, "grad_norm": 2.45169734954834, "learning_rate": 8.821440203829716e-05, "loss": 0.5250037908554077, "step": 2329, "token_acc": 0.8325859491778774 }, { "epoch": 2.6209223847019123, "grad_norm": 2.220100164413452, "learning_rate": 8.820240451795604e-05, "loss": 0.47316619753837585, "step": 2330, "token_acc": 0.837730870712401 }, { "epoch": 2.622047244094488, "grad_norm": 2.236177682876587, "learning_rate": 8.819040171083626e-05, "loss": 0.5607627630233765, "step": 2331, "token_acc": 0.8180737217598097 }, { "epoch": 2.623172103487064, "grad_norm": 1.8377236127853394, "learning_rate": 8.817839361859881e-05, "loss": 0.36625832319259644, "step": 2332, "token_acc": 0.8830409356725146 }, { "epoch": 2.62429696287964, "grad_norm": 2.0332107543945312, "learning_rate": 8.816638024290549e-05, "loss": 0.6331790685653687, "step": 2333, "token_acc": 0.8176470588235294 }, { "epoch": 2.625421822272216, "grad_norm": 2.4058048725128174, "learning_rate": 8.815436158541883e-05, "loss": 0.5168034434318542, "step": 2334, "token_acc": 0.8333333333333334 }, { "epoch": 2.626546681664792, "grad_norm": 2.0603229999542236, "learning_rate": 8.814233764780208e-05, "loss": 0.4302440583705902, "step": 2335, "token_acc": 0.8692628650904033 }, { "epoch": 2.6276715410573677, "grad_norm": 1.9689726829528809, "learning_rate": 8.813030843171918e-05, "loss": 0.36420348286628723, "step": 2336, "token_acc": 0.8849144634525661 }, { "epoch": 2.628796400449944, "grad_norm": 1.85297429561615, "learning_rate": 8.811827393883489e-05, "loss": 0.5175032615661621, "step": 2337, "token_acc": 0.8226691042047533 }, { "epoch": 2.6299212598425195, "grad_norm": 2.1239230632781982, "learning_rate": 8.810623417081459e-05, "loss": 0.6666853427886963, "step": 2338, "token_acc": 0.8016085790884718 }, { "epoch": 2.6310461192350956, "grad_norm": 1.7303190231323242, "learning_rate": 8.809418912932448e-05, "loss": 0.5015659332275391, "step": 2339, "token_acc": 0.8457364341085272 }, { "epoch": 2.6321709786276717, "grad_norm": 2.1452951431274414, "learning_rate": 8.808213881603147e-05, "loss": 0.6055326461791992, "step": 2340, "token_acc": 0.8175542406311637 }, { "epoch": 2.6332958380202474, "grad_norm": 2.335448980331421, "learning_rate": 8.807008323260316e-05, "loss": 0.524565577507019, "step": 2341, "token_acc": 0.8299180327868853 }, { "epoch": 2.6344206974128235, "grad_norm": 2.1504993438720703, "learning_rate": 8.80580223807079e-05, "loss": 0.5026498436927795, "step": 2342, "token_acc": 0.8529411764705882 }, { "epoch": 2.6355455568053996, "grad_norm": 2.4545421600341797, "learning_rate": 8.804595626201479e-05, "loss": 0.5772387981414795, "step": 2343, "token_acc": 0.8085381630012937 }, { "epoch": 2.6366704161979753, "grad_norm": 2.251359224319458, "learning_rate": 8.803388487819364e-05, "loss": 0.4810890555381775, "step": 2344, "token_acc": 0.8225352112676056 }, { "epoch": 2.637795275590551, "grad_norm": 1.8203459978103638, "learning_rate": 8.802180823091502e-05, "loss": 0.4426918029785156, "step": 2345, "token_acc": 0.8527204502814258 }, { "epoch": 2.638920134983127, "grad_norm": 2.1009573936462402, "learning_rate": 8.800972632185013e-05, "loss": 0.5227056741714478, "step": 2346, "token_acc": 0.8361138370951914 }, { "epoch": 2.640044994375703, "grad_norm": 2.28541898727417, "learning_rate": 8.799763915267105e-05, "loss": 0.5496712923049927, "step": 2347, "token_acc": 0.8205461638491548 }, { "epoch": 2.641169853768279, "grad_norm": 2.018653631210327, "learning_rate": 8.798554672505046e-05, "loss": 0.4531659185886383, "step": 2348, "token_acc": 0.8522372528616025 }, { "epoch": 2.642294713160855, "grad_norm": 2.1726901531219482, "learning_rate": 8.797344904066184e-05, "loss": 0.6893976330757141, "step": 2349, "token_acc": 0.7962166809974205 }, { "epoch": 2.643419572553431, "grad_norm": 2.0547194480895996, "learning_rate": 8.796134610117934e-05, "loss": 0.5585248470306396, "step": 2350, "token_acc": 0.8236842105263158 }, { "epoch": 2.6445444319460067, "grad_norm": 2.405895471572876, "learning_rate": 8.79492379082779e-05, "loss": 0.6029088497161865, "step": 2351, "token_acc": 0.8165605095541402 }, { "epoch": 2.6456692913385824, "grad_norm": 1.9551315307617188, "learning_rate": 8.793712446363312e-05, "loss": 0.4201914072036743, "step": 2352, "token_acc": 0.8657407407407407 }, { "epoch": 2.6467941507311585, "grad_norm": 2.352712869644165, "learning_rate": 8.792500576892142e-05, "loss": 0.5904959440231323, "step": 2353, "token_acc": 0.8166332665330661 }, { "epoch": 2.6479190101237347, "grad_norm": 2.2643120288848877, "learning_rate": 8.791288182581982e-05, "loss": 0.47057515382766724, "step": 2354, "token_acc": 0.8373408769448374 }, { "epoch": 2.6490438695163103, "grad_norm": 2.0991854667663574, "learning_rate": 8.790075263600621e-05, "loss": 0.5942401885986328, "step": 2355, "token_acc": 0.8109640831758034 }, { "epoch": 2.6501687289088864, "grad_norm": 2.225628137588501, "learning_rate": 8.788861820115906e-05, "loss": 0.4728797674179077, "step": 2356, "token_acc": 0.8500651890482399 }, { "epoch": 2.6512935883014626, "grad_norm": 2.293668746948242, "learning_rate": 8.787647852295768e-05, "loss": 0.5977993011474609, "step": 2357, "token_acc": 0.810918774966711 }, { "epoch": 2.6524184476940382, "grad_norm": 2.104485273361206, "learning_rate": 8.786433360308206e-05, "loss": 0.45518404245376587, "step": 2358, "token_acc": 0.8466111771700356 }, { "epoch": 2.653543307086614, "grad_norm": 2.2304999828338623, "learning_rate": 8.785218344321292e-05, "loss": 0.5713731050491333, "step": 2359, "token_acc": 0.8129770992366412 }, { "epoch": 2.65466816647919, "grad_norm": 2.20731782913208, "learning_rate": 8.78400280450317e-05, "loss": 0.5311572551727295, "step": 2360, "token_acc": 0.8222490931076178 }, { "epoch": 2.655793025871766, "grad_norm": 2.396094560623169, "learning_rate": 8.782786741022055e-05, "loss": 0.6152166724205017, "step": 2361, "token_acc": 0.8059536934950385 }, { "epoch": 2.656917885264342, "grad_norm": 2.004544973373413, "learning_rate": 8.78157015404624e-05, "loss": 0.41209298372268677, "step": 2362, "token_acc": 0.8689492325855962 }, { "epoch": 2.658042744656918, "grad_norm": 2.0971922874450684, "learning_rate": 8.780353043744085e-05, "loss": 0.5578532814979553, "step": 2363, "token_acc": 0.8275862068965517 }, { "epoch": 2.659167604049494, "grad_norm": 2.0947935581207275, "learning_rate": 8.779135410284023e-05, "loss": 0.45375728607177734, "step": 2364, "token_acc": 0.8612945838837517 }, { "epoch": 2.6602924634420697, "grad_norm": 2.1943423748016357, "learning_rate": 8.777917253834562e-05, "loss": 0.5572977662086487, "step": 2365, "token_acc": 0.8317535545023697 }, { "epoch": 2.661417322834646, "grad_norm": 2.0454981327056885, "learning_rate": 8.77669857456428e-05, "loss": 0.4725586473941803, "step": 2366, "token_acc": 0.8408071748878924 }, { "epoch": 2.6625421822272215, "grad_norm": 2.020484209060669, "learning_rate": 8.77547937264183e-05, "loss": 0.4167543351650238, "step": 2367, "token_acc": 0.8584070796460177 }, { "epoch": 2.6636670416197976, "grad_norm": 2.2037901878356934, "learning_rate": 8.774259648235936e-05, "loss": 0.5446538329124451, "step": 2368, "token_acc": 0.8251295336787565 }, { "epoch": 2.6647919010123733, "grad_norm": 1.8454216718673706, "learning_rate": 8.773039401515391e-05, "loss": 0.46599453687667847, "step": 2369, "token_acc": 0.8515240904621436 }, { "epoch": 2.6659167604049494, "grad_norm": 2.276185989379883, "learning_rate": 8.771818632649067e-05, "loss": 0.5985088348388672, "step": 2370, "token_acc": 0.8193624557260921 }, { "epoch": 2.6670416197975255, "grad_norm": 2.764082193374634, "learning_rate": 8.770597341805901e-05, "loss": 0.5631766319274902, "step": 2371, "token_acc": 0.8258642765685019 }, { "epoch": 2.668166479190101, "grad_norm": 2.3670613765716553, "learning_rate": 8.769375529154909e-05, "loss": 0.6044989824295044, "step": 2372, "token_acc": 0.8113695090439277 }, { "epoch": 2.6692913385826773, "grad_norm": 2.127769708633423, "learning_rate": 8.768153194865172e-05, "loss": 0.51302170753479, "step": 2373, "token_acc": 0.8320512820512821 }, { "epoch": 2.670416197975253, "grad_norm": 2.3004543781280518, "learning_rate": 8.766930339105852e-05, "loss": 0.548078179359436, "step": 2374, "token_acc": 0.8254716981132075 }, { "epoch": 2.671541057367829, "grad_norm": 2.298928737640381, "learning_rate": 8.765706962046174e-05, "loss": 0.5158215165138245, "step": 2375, "token_acc": 0.8248792270531401 }, { "epoch": 2.6726659167604048, "grad_norm": 2.186647891998291, "learning_rate": 8.764483063855443e-05, "loss": 0.6100223660469055, "step": 2376, "token_acc": 0.8091009988901221 }, { "epoch": 2.673790776152981, "grad_norm": 2.1449227333068848, "learning_rate": 8.763258644703029e-05, "loss": 0.5440102815628052, "step": 2377, "token_acc": 0.8484472049689441 }, { "epoch": 2.674915635545557, "grad_norm": 2.3853695392608643, "learning_rate": 8.762033704758383e-05, "loss": 0.5970637798309326, "step": 2378, "token_acc": 0.8219584569732937 }, { "epoch": 2.6760404949381327, "grad_norm": 2.2572884559631348, "learning_rate": 8.760808244191018e-05, "loss": 0.5005003213882446, "step": 2379, "token_acc": 0.8398876404494382 }, { "epoch": 2.677165354330709, "grad_norm": 2.0935921669006348, "learning_rate": 8.759582263170525e-05, "loss": 0.40189045667648315, "step": 2380, "token_acc": 0.8753709198813057 }, { "epoch": 2.6782902137232845, "grad_norm": 2.2465882301330566, "learning_rate": 8.758355761866566e-05, "loss": 0.6493396759033203, "step": 2381, "token_acc": 0.8051391862955032 }, { "epoch": 2.6794150731158606, "grad_norm": 2.185372829437256, "learning_rate": 8.757128740448875e-05, "loss": 0.4735550880432129, "step": 2382, "token_acc": 0.8516320474777448 }, { "epoch": 2.6805399325084363, "grad_norm": 2.05954647064209, "learning_rate": 8.755901199087259e-05, "loss": 0.4038301706314087, "step": 2383, "token_acc": 0.8601823708206687 }, { "epoch": 2.6816647919010124, "grad_norm": 2.1140382289886475, "learning_rate": 8.754673137951593e-05, "loss": 0.5385488271713257, "step": 2384, "token_acc": 0.8288075560802833 }, { "epoch": 2.6827896512935885, "grad_norm": 2.082801342010498, "learning_rate": 8.75344455721183e-05, "loss": 0.5658057928085327, "step": 2385, "token_acc": 0.8229885057471265 }, { "epoch": 2.683914510686164, "grad_norm": 2.173579454421997, "learning_rate": 8.752215457037989e-05, "loss": 0.5019638538360596, "step": 2386, "token_acc": 0.8426051560379919 }, { "epoch": 2.6850393700787403, "grad_norm": 1.9497604370117188, "learning_rate": 8.750985837600163e-05, "loss": 0.4898516535758972, "step": 2387, "token_acc": 0.8419282511210763 }, { "epoch": 2.686164229471316, "grad_norm": 2.1177852153778076, "learning_rate": 8.74975569906852e-05, "loss": 0.5490419268608093, "step": 2388, "token_acc": 0.8349007314524556 }, { "epoch": 2.687289088863892, "grad_norm": 2.351943016052246, "learning_rate": 8.748525041613299e-05, "loss": 0.5455265045166016, "step": 2389, "token_acc": 0.8186274509803921 }, { "epoch": 2.6884139482564677, "grad_norm": 2.1374199390411377, "learning_rate": 8.747293865404802e-05, "loss": 0.4525059163570404, "step": 2390, "token_acc": 0.8547557840616966 }, { "epoch": 2.689538807649044, "grad_norm": 1.679702639579773, "learning_rate": 8.746062170613414e-05, "loss": 0.3267248868942261, "step": 2391, "token_acc": 0.8873720136518771 }, { "epoch": 2.69066366704162, "grad_norm": 1.9866360425949097, "learning_rate": 8.744829957409588e-05, "loss": 0.5216642618179321, "step": 2392, "token_acc": 0.8381226053639846 }, { "epoch": 2.6917885264341956, "grad_norm": 1.9020847082138062, "learning_rate": 8.743597225963849e-05, "loss": 0.41238388419151306, "step": 2393, "token_acc": 0.8638743455497382 }, { "epoch": 2.6929133858267718, "grad_norm": 2.1750378608703613, "learning_rate": 8.74236397644679e-05, "loss": 0.45839428901672363, "step": 2394, "token_acc": 0.8370473537604457 }, { "epoch": 2.6940382452193474, "grad_norm": 2.218501329421997, "learning_rate": 8.741130209029082e-05, "loss": 0.6329976320266724, "step": 2395, "token_acc": 0.8164705882352942 }, { "epoch": 2.6951631046119235, "grad_norm": 2.3047077655792236, "learning_rate": 8.739895923881462e-05, "loss": 0.6627726554870605, "step": 2396, "token_acc": 0.7985611510791367 }, { "epoch": 2.696287964004499, "grad_norm": 2.1221721172332764, "learning_rate": 8.738661121174741e-05, "loss": 0.5397186875343323, "step": 2397, "token_acc": 0.8379272326350606 }, { "epoch": 2.6974128233970753, "grad_norm": 2.2704391479492188, "learning_rate": 8.737425801079804e-05, "loss": 0.4510071575641632, "step": 2398, "token_acc": 0.8507042253521127 }, { "epoch": 2.6985376827896514, "grad_norm": 2.2525136470794678, "learning_rate": 8.736189963767602e-05, "loss": 0.66277676820755, "step": 2399, "token_acc": 0.7954779033915724 }, { "epoch": 2.699662542182227, "grad_norm": 1.9840245246887207, "learning_rate": 8.734953609409164e-05, "loss": 0.5658850073814392, "step": 2400, "token_acc": 0.8240343347639485 }, { "epoch": 2.699662542182227, "eval_loss": 0.9402201175689697, "eval_runtime": 31.6621, "eval_samples_per_second": 25.362, "eval_steps_per_second": 3.19, "eval_token_acc": 0.7387871828279928, "step": 2400 }, { "epoch": 2.7007874015748032, "grad_norm": 2.2271335124969482, "learning_rate": 8.733716738175586e-05, "loss": 0.6470315456390381, "step": 2401, "token_acc": 0.8072805139186295 }, { "epoch": 2.7019122609673794, "grad_norm": 2.3804850578308105, "learning_rate": 8.732479350238036e-05, "loss": 0.4094052314758301, "step": 2402, "token_acc": 0.8558421851289834 }, { "epoch": 2.703037120359955, "grad_norm": 2.1309733390808105, "learning_rate": 8.731241445767758e-05, "loss": 0.5742270946502686, "step": 2403, "token_acc": 0.8167330677290837 }, { "epoch": 2.7041619797525307, "grad_norm": 2.4844117164611816, "learning_rate": 8.730003024936057e-05, "loss": 0.6296152472496033, "step": 2404, "token_acc": 0.8074454428754814 }, { "epoch": 2.705286839145107, "grad_norm": 2.2276222705841064, "learning_rate": 8.728764087914324e-05, "loss": 0.5434293746948242, "step": 2405, "token_acc": 0.8341523341523341 }, { "epoch": 2.706411698537683, "grad_norm": 2.0138731002807617, "learning_rate": 8.72752463487401e-05, "loss": 0.606696605682373, "step": 2406, "token_acc": 0.820281124497992 }, { "epoch": 2.7075365579302586, "grad_norm": 2.0701711177825928, "learning_rate": 8.726284665986641e-05, "loss": 0.6303012371063232, "step": 2407, "token_acc": 0.8189823874755382 }, { "epoch": 2.7086614173228347, "grad_norm": 1.856727123260498, "learning_rate": 8.725044181423816e-05, "loss": 0.6182779669761658, "step": 2408, "token_acc": 0.8145814581458146 }, { "epoch": 2.709786276715411, "grad_norm": 2.1862053871154785, "learning_rate": 8.723803181357202e-05, "loss": 0.5499426126480103, "step": 2409, "token_acc": 0.8156182212581344 }, { "epoch": 2.7109111361079865, "grad_norm": 2.198259115219116, "learning_rate": 8.72256166595854e-05, "loss": 0.6262006759643555, "step": 2410, "token_acc": 0.811949069539667 }, { "epoch": 2.712035995500562, "grad_norm": 2.3092751502990723, "learning_rate": 8.721319635399642e-05, "loss": 0.6260242462158203, "step": 2411, "token_acc": 0.7991169977924945 }, { "epoch": 2.7131608548931383, "grad_norm": 2.297774314880371, "learning_rate": 8.720077089852394e-05, "loss": 0.4899294674396515, "step": 2412, "token_acc": 0.8207934336525308 }, { "epoch": 2.7142857142857144, "grad_norm": 2.189847230911255, "learning_rate": 8.718834029488745e-05, "loss": 0.6385602951049805, "step": 2413, "token_acc": 0.8008342022940563 }, { "epoch": 2.71541057367829, "grad_norm": 2.0198495388031006, "learning_rate": 8.717590454480724e-05, "loss": 0.5686298608779907, "step": 2414, "token_acc": 0.8412228796844181 }, { "epoch": 2.716535433070866, "grad_norm": 2.0915091037750244, "learning_rate": 8.716346365000425e-05, "loss": 0.5853947401046753, "step": 2415, "token_acc": 0.8254545454545454 }, { "epoch": 2.7176602924634423, "grad_norm": 2.175464391708374, "learning_rate": 8.715101761220018e-05, "loss": 0.5464926958084106, "step": 2416, "token_acc": 0.8213457076566125 }, { "epoch": 2.718785151856018, "grad_norm": 2.071291446685791, "learning_rate": 8.71385664331174e-05, "loss": 0.64871746301651, "step": 2417, "token_acc": 0.8048048048048048 }, { "epoch": 2.7199100112485937, "grad_norm": 2.289489507675171, "learning_rate": 8.712611011447904e-05, "loss": 0.6310921907424927, "step": 2418, "token_acc": 0.79204107830552 }, { "epoch": 2.72103487064117, "grad_norm": 1.7199697494506836, "learning_rate": 8.711364865800889e-05, "loss": 0.4124705195426941, "step": 2419, "token_acc": 0.8434782608695652 }, { "epoch": 2.722159730033746, "grad_norm": 1.945853352546692, "learning_rate": 8.710118206543149e-05, "loss": 0.49702227115631104, "step": 2420, "token_acc": 0.8475524475524475 }, { "epoch": 2.7232845894263216, "grad_norm": 2.138094425201416, "learning_rate": 8.708871033847206e-05, "loss": 0.493960976600647, "step": 2421, "token_acc": 0.8397040690505548 }, { "epoch": 2.7244094488188977, "grad_norm": 1.6822394132614136, "learning_rate": 8.707623347885653e-05, "loss": 0.5275578498840332, "step": 2422, "token_acc": 0.8362139917695474 }, { "epoch": 2.725534308211474, "grad_norm": 1.8275266885757446, "learning_rate": 8.70637514883116e-05, "loss": 0.4640558362007141, "step": 2423, "token_acc": 0.8563049853372434 }, { "epoch": 2.7266591676040495, "grad_norm": 1.9452691078186035, "learning_rate": 8.705126436856461e-05, "loss": 0.4056554436683655, "step": 2424, "token_acc": 0.8655569782330346 }, { "epoch": 2.7277840269966256, "grad_norm": 1.80211341381073, "learning_rate": 8.703877212134363e-05, "loss": 0.47853875160217285, "step": 2425, "token_acc": 0.8456659619450317 }, { "epoch": 2.7289088863892013, "grad_norm": 2.1688945293426514, "learning_rate": 8.702627474837745e-05, "loss": 0.41680222749710083, "step": 2426, "token_acc": 0.8587896253602305 }, { "epoch": 2.7300337457817774, "grad_norm": 2.1441500186920166, "learning_rate": 8.701377225139559e-05, "loss": 0.49332767724990845, "step": 2427, "token_acc": 0.8353808353808354 }, { "epoch": 2.731158605174353, "grad_norm": 2.0988810062408447, "learning_rate": 8.700126463212821e-05, "loss": 0.66497802734375, "step": 2428, "token_acc": 0.7967128027681661 }, { "epoch": 2.732283464566929, "grad_norm": 1.9827889204025269, "learning_rate": 8.698875189230626e-05, "loss": 0.6086753606796265, "step": 2429, "token_acc": 0.8178472861085556 }, { "epoch": 2.7334083239595053, "grad_norm": 2.182051181793213, "learning_rate": 8.697623403366134e-05, "loss": 0.4749993085861206, "step": 2430, "token_acc": 0.8523936170212766 }, { "epoch": 2.734533183352081, "grad_norm": 2.6166481971740723, "learning_rate": 8.696371105792576e-05, "loss": 0.5672958493232727, "step": 2431, "token_acc": 0.8227060653188181 }, { "epoch": 2.735658042744657, "grad_norm": 2.112847089767456, "learning_rate": 8.69511829668326e-05, "loss": 0.671058177947998, "step": 2432, "token_acc": 0.8055842812823164 }, { "epoch": 2.7367829021372327, "grad_norm": 2.141573905944824, "learning_rate": 8.693864976211559e-05, "loss": 0.5244104266166687, "step": 2433, "token_acc": 0.8535211267605634 }, { "epoch": 2.737907761529809, "grad_norm": 2.1176342964172363, "learning_rate": 8.692611144550917e-05, "loss": 0.6777842044830322, "step": 2434, "token_acc": 0.7978825794032723 }, { "epoch": 2.7390326209223845, "grad_norm": 1.9573252201080322, "learning_rate": 8.69135680187485e-05, "loss": 0.7074543237686157, "step": 2435, "token_acc": 0.7732793522267206 }, { "epoch": 2.7401574803149606, "grad_norm": 1.9153236150741577, "learning_rate": 8.690101948356948e-05, "loss": 0.5224462747573853, "step": 2436, "token_acc": 0.8532388663967612 }, { "epoch": 2.7412823397075368, "grad_norm": 2.2398552894592285, "learning_rate": 8.688846584170864e-05, "loss": 0.48652976751327515, "step": 2437, "token_acc": 0.8447293447293447 }, { "epoch": 2.7424071991001124, "grad_norm": 1.7967239618301392, "learning_rate": 8.68759070949033e-05, "loss": 0.44118639826774597, "step": 2438, "token_acc": 0.8559322033898306 }, { "epoch": 2.7435320584926886, "grad_norm": 1.9572639465332031, "learning_rate": 8.686334324489142e-05, "loss": 0.5724310874938965, "step": 2439, "token_acc": 0.8224101479915433 }, { "epoch": 2.7446569178852642, "grad_norm": 2.2943952083587646, "learning_rate": 8.685077429341173e-05, "loss": 0.578500509262085, "step": 2440, "token_acc": 0.8140394088669951 }, { "epoch": 2.7457817772778403, "grad_norm": 1.9614813327789307, "learning_rate": 8.68382002422036e-05, "loss": 0.4862633943557739, "step": 2441, "token_acc": 0.8479381443298969 }, { "epoch": 2.746906636670416, "grad_norm": 2.2558822631835938, "learning_rate": 8.682562109300711e-05, "loss": 0.6097584366798401, "step": 2442, "token_acc": 0.8126361655773421 }, { "epoch": 2.748031496062992, "grad_norm": 2.2396531105041504, "learning_rate": 8.681303684756314e-05, "loss": 0.6468811631202698, "step": 2443, "token_acc": 0.802013422818792 }, { "epoch": 2.7491563554555682, "grad_norm": 2.1693055629730225, "learning_rate": 8.680044750761316e-05, "loss": 0.6676660776138306, "step": 2444, "token_acc": 0.7839248434237995 }, { "epoch": 2.750281214848144, "grad_norm": 2.0940752029418945, "learning_rate": 8.678785307489938e-05, "loss": 0.5104308724403381, "step": 2445, "token_acc": 0.8333333333333334 }, { "epoch": 2.75140607424072, "grad_norm": 2.3539369106292725, "learning_rate": 8.677525355116477e-05, "loss": 0.5224475264549255, "step": 2446, "token_acc": 0.8355957767722474 }, { "epoch": 2.7525309336332957, "grad_norm": 2.236621379852295, "learning_rate": 8.676264893815294e-05, "loss": 0.614896297454834, "step": 2447, "token_acc": 0.7959866220735786 }, { "epoch": 2.753655793025872, "grad_norm": 2.0441768169403076, "learning_rate": 8.67500392376082e-05, "loss": 0.5874647498130798, "step": 2448, "token_acc": 0.8113948919449901 }, { "epoch": 2.7547806524184475, "grad_norm": 2.060884714126587, "learning_rate": 8.673742445127563e-05, "loss": 0.4728061258792877, "step": 2449, "token_acc": 0.8519362186788155 }, { "epoch": 2.7559055118110236, "grad_norm": 2.396528720855713, "learning_rate": 8.672480458090096e-05, "loss": 0.5460003614425659, "step": 2450, "token_acc": 0.8247282608695652 }, { "epoch": 2.7570303712035997, "grad_norm": 2.0098676681518555, "learning_rate": 8.671217962823064e-05, "loss": 0.4171689748764038, "step": 2451, "token_acc": 0.8709302325581395 }, { "epoch": 2.7581552305961754, "grad_norm": 2.182600736618042, "learning_rate": 8.669954959501178e-05, "loss": 0.5686829686164856, "step": 2452, "token_acc": 0.8276231263383298 }, { "epoch": 2.7592800899887515, "grad_norm": 2.064582347869873, "learning_rate": 8.66869144829923e-05, "loss": 0.5337350368499756, "step": 2453, "token_acc": 0.8378109452736319 }, { "epoch": 2.760404949381327, "grad_norm": 2.3108720779418945, "learning_rate": 8.66742742939207e-05, "loss": 0.6294680833816528, "step": 2454, "token_acc": 0.8101415094339622 }, { "epoch": 2.7615298087739033, "grad_norm": 2.1572017669677734, "learning_rate": 8.666162902954628e-05, "loss": 0.6115537881851196, "step": 2455, "token_acc": 0.8187311178247734 }, { "epoch": 2.762654668166479, "grad_norm": 2.187887668609619, "learning_rate": 8.664897869161896e-05, "loss": 0.5893443822860718, "step": 2456, "token_acc": 0.8183556405353728 }, { "epoch": 2.763779527559055, "grad_norm": 2.0242717266082764, "learning_rate": 8.663632328188945e-05, "loss": 0.5067061185836792, "step": 2457, "token_acc": 0.831413612565445 }, { "epoch": 2.764904386951631, "grad_norm": 2.2404253482818604, "learning_rate": 8.662366280210907e-05, "loss": 0.6412867903709412, "step": 2458, "token_acc": 0.7869318181818182 }, { "epoch": 2.766029246344207, "grad_norm": 2.1171934604644775, "learning_rate": 8.66109972540299e-05, "loss": 0.560824990272522, "step": 2459, "token_acc": 0.8289224952741021 }, { "epoch": 2.767154105736783, "grad_norm": 1.918181300163269, "learning_rate": 8.659832663940472e-05, "loss": 0.48771199584007263, "step": 2460, "token_acc": 0.8422222222222222 }, { "epoch": 2.7682789651293587, "grad_norm": 1.9726442098617554, "learning_rate": 8.6585650959987e-05, "loss": 0.5359795093536377, "step": 2461, "token_acc": 0.8317265556529361 }, { "epoch": 2.769403824521935, "grad_norm": 2.1498727798461914, "learning_rate": 8.65729702175309e-05, "loss": 0.4859767556190491, "step": 2462, "token_acc": 0.8437067773167358 }, { "epoch": 2.7705286839145105, "grad_norm": 2.2992451190948486, "learning_rate": 8.656028441379129e-05, "loss": 0.6134165525436401, "step": 2463, "token_acc": 0.7981366459627329 }, { "epoch": 2.7716535433070866, "grad_norm": 2.4623959064483643, "learning_rate": 8.654759355052374e-05, "loss": 0.519795298576355, "step": 2464, "token_acc": 0.8415545590433483 }, { "epoch": 2.7727784026996627, "grad_norm": 2.383960723876953, "learning_rate": 8.653489762948453e-05, "loss": 0.5648220777511597, "step": 2465, "token_acc": 0.8238841978287093 }, { "epoch": 2.7739032620922384, "grad_norm": 2.0334534645080566, "learning_rate": 8.652219665243064e-05, "loss": 0.491288423538208, "step": 2466, "token_acc": 0.8429833169774289 }, { "epoch": 2.7750281214848145, "grad_norm": 2.3374900817871094, "learning_rate": 8.650949062111971e-05, "loss": 0.5297865867614746, "step": 2467, "token_acc": 0.8186666666666667 }, { "epoch": 2.7761529808773906, "grad_norm": 2.407191038131714, "learning_rate": 8.649677953731013e-05, "loss": 0.6662536859512329, "step": 2468, "token_acc": 0.7963414634146342 }, { "epoch": 2.7772778402699663, "grad_norm": 2.0111868381500244, "learning_rate": 8.648406340276098e-05, "loss": 0.6336173415184021, "step": 2469, "token_acc": 0.8212689901697945 }, { "epoch": 2.778402699662542, "grad_norm": 2.2441303730010986, "learning_rate": 8.647134221923202e-05, "loss": 0.5829351544380188, "step": 2470, "token_acc": 0.8166127292340885 }, { "epoch": 2.779527559055118, "grad_norm": 2.081007242202759, "learning_rate": 8.645861598848368e-05, "loss": 0.5434110760688782, "step": 2471, "token_acc": 0.8175026680896478 }, { "epoch": 2.780652418447694, "grad_norm": 2.304238796234131, "learning_rate": 8.64458847122772e-05, "loss": 0.5127564668655396, "step": 2472, "token_acc": 0.8375184638109305 }, { "epoch": 2.78177727784027, "grad_norm": 2.373213291168213, "learning_rate": 8.643314839237437e-05, "loss": 0.6297751665115356, "step": 2473, "token_acc": 0.8152454780361758 }, { "epoch": 2.782902137232846, "grad_norm": 2.327741861343384, "learning_rate": 8.64204070305378e-05, "loss": 0.6215295791625977, "step": 2474, "token_acc": 0.8035294117647059 }, { "epoch": 2.784026996625422, "grad_norm": 1.9453678131103516, "learning_rate": 8.640766062853074e-05, "loss": 0.5466164946556091, "step": 2475, "token_acc": 0.8174157303370787 }, { "epoch": 2.7851518560179978, "grad_norm": 2.2871618270874023, "learning_rate": 8.639490918811713e-05, "loss": 0.5012751817703247, "step": 2476, "token_acc": 0.8372093023255814 }, { "epoch": 2.7862767154105734, "grad_norm": 2.296329975128174, "learning_rate": 8.638215271106165e-05, "loss": 0.5417336225509644, "step": 2477, "token_acc": 0.825136612021858 }, { "epoch": 2.7874015748031495, "grad_norm": 2.1898610591888428, "learning_rate": 8.636939119912964e-05, "loss": 0.6004650592803955, "step": 2478, "token_acc": 0.8063157894736842 }, { "epoch": 2.7885264341957257, "grad_norm": 2.151932716369629, "learning_rate": 8.635662465408712e-05, "loss": 0.5153090953826904, "step": 2479, "token_acc": 0.8561549100968188 }, { "epoch": 2.7896512935883013, "grad_norm": 2.2893223762512207, "learning_rate": 8.634385307770088e-05, "loss": 0.5457690954208374, "step": 2480, "token_acc": 0.8235294117647058 }, { "epoch": 2.7907761529808774, "grad_norm": 1.9884223937988281, "learning_rate": 8.633107647173835e-05, "loss": 0.4253578186035156, "step": 2481, "token_acc": 0.8538565629228687 }, { "epoch": 2.7919010123734536, "grad_norm": 2.229776620864868, "learning_rate": 8.631829483796765e-05, "loss": 0.4712368845939636, "step": 2482, "token_acc": 0.8537234042553191 }, { "epoch": 2.7930258717660292, "grad_norm": 2.0630736351013184, "learning_rate": 8.630550817815762e-05, "loss": 0.4741145968437195, "step": 2483, "token_acc": 0.8462469733656174 }, { "epoch": 2.794150731158605, "grad_norm": 2.0375356674194336, "learning_rate": 8.629271649407779e-05, "loss": 0.556221067905426, "step": 2484, "token_acc": 0.8204878048780487 }, { "epoch": 2.795275590551181, "grad_norm": 2.0248444080352783, "learning_rate": 8.627991978749838e-05, "loss": 0.5040885210037231, "step": 2485, "token_acc": 0.8256658595641646 }, { "epoch": 2.796400449943757, "grad_norm": 2.06744122505188, "learning_rate": 8.626711806019032e-05, "loss": 0.4041694700717926, "step": 2486, "token_acc": 0.8534351145038168 }, { "epoch": 2.797525309336333, "grad_norm": 2.1165080070495605, "learning_rate": 8.625431131392523e-05, "loss": 0.5597134828567505, "step": 2487, "token_acc": 0.8193832599118943 }, { "epoch": 2.798650168728909, "grad_norm": 2.143308639526367, "learning_rate": 8.62414995504754e-05, "loss": 0.5875572562217712, "step": 2488, "token_acc": 0.8165038002171553 }, { "epoch": 2.799775028121485, "grad_norm": 2.390256404876709, "learning_rate": 8.622868277161383e-05, "loss": 0.5697943568229675, "step": 2489, "token_acc": 0.8189189189189189 }, { "epoch": 2.8008998875140607, "grad_norm": 2.18747878074646, "learning_rate": 8.62158609791142e-05, "loss": 0.5612473487854004, "step": 2490, "token_acc": 0.8276670574443142 }, { "epoch": 2.802024746906637, "grad_norm": 2.3678643703460693, "learning_rate": 8.620303417475095e-05, "loss": 0.5912525653839111, "step": 2491, "token_acc": 0.8189189189189189 }, { "epoch": 2.8031496062992125, "grad_norm": 2.0927164554595947, "learning_rate": 8.619020236029913e-05, "loss": 0.4888541102409363, "step": 2492, "token_acc": 0.8333333333333334 }, { "epoch": 2.8042744656917886, "grad_norm": 2.4288177490234375, "learning_rate": 8.617736553753454e-05, "loss": 0.6127632260322571, "step": 2493, "token_acc": 0.801909307875895 }, { "epoch": 2.8053993250843643, "grad_norm": 2.1339428424835205, "learning_rate": 8.616452370823362e-05, "loss": 0.6013804078102112, "step": 2494, "token_acc": 0.8222003929273084 }, { "epoch": 2.8065241844769404, "grad_norm": 2.4049904346466064, "learning_rate": 8.615167687417355e-05, "loss": 0.640178918838501, "step": 2495, "token_acc": 0.8018494055482166 }, { "epoch": 2.8076490438695165, "grad_norm": 2.339657783508301, "learning_rate": 8.61388250371322e-05, "loss": 0.5064844489097595, "step": 2496, "token_acc": 0.819371727748691 }, { "epoch": 2.808773903262092, "grad_norm": 2.1938836574554443, "learning_rate": 8.61259681988881e-05, "loss": 0.6103842258453369, "step": 2497, "token_acc": 0.8054862842892768 }, { "epoch": 2.8098987626546683, "grad_norm": 2.3212013244628906, "learning_rate": 8.611310636122047e-05, "loss": 0.5008069276809692, "step": 2498, "token_acc": 0.8346213292117465 }, { "epoch": 2.811023622047244, "grad_norm": 2.117738962173462, "learning_rate": 8.61002395259093e-05, "loss": 0.5647708177566528, "step": 2499, "token_acc": 0.8277356446370531 }, { "epoch": 2.81214848143982, "grad_norm": 2.149714946746826, "learning_rate": 8.608736769473515e-05, "loss": 0.4627293050289154, "step": 2500, "token_acc": 0.8516405135520685 }, { "epoch": 2.8132733408323958, "grad_norm": 2.160086154937744, "learning_rate": 8.607449086947938e-05, "loss": 0.507956862449646, "step": 2501, "token_acc": 0.8387942332896461 }, { "epoch": 2.814398200224972, "grad_norm": 2.2143490314483643, "learning_rate": 8.606160905192398e-05, "loss": 0.7207454442977905, "step": 2502, "token_acc": 0.782307025151778 }, { "epoch": 2.815523059617548, "grad_norm": 2.257194995880127, "learning_rate": 8.604872224385165e-05, "loss": 0.5823127627372742, "step": 2503, "token_acc": 0.8143036386449184 }, { "epoch": 2.8166479190101237, "grad_norm": 2.0214109420776367, "learning_rate": 8.603583044704578e-05, "loss": 0.5673695802688599, "step": 2504, "token_acc": 0.8227712137486574 }, { "epoch": 2.8177727784027, "grad_norm": 2.4380059242248535, "learning_rate": 8.602293366329044e-05, "loss": 0.8152918815612793, "step": 2505, "token_acc": 0.7643142476697736 }, { "epoch": 2.8188976377952755, "grad_norm": 2.0778632164001465, "learning_rate": 8.601003189437039e-05, "loss": 0.6010805368423462, "step": 2506, "token_acc": 0.7986870897155361 }, { "epoch": 2.8200224971878516, "grad_norm": 1.8536736965179443, "learning_rate": 8.599712514207111e-05, "loss": 0.5204453468322754, "step": 2507, "token_acc": 0.8325123152709359 }, { "epoch": 2.8211473565804273, "grad_norm": 2.1061882972717285, "learning_rate": 8.598421340817874e-05, "loss": 0.5399712324142456, "step": 2508, "token_acc": 0.8270925110132159 }, { "epoch": 2.8222722159730034, "grad_norm": 2.1669304370880127, "learning_rate": 8.597129669448012e-05, "loss": 0.47692054510116577, "step": 2509, "token_acc": 0.8436317780580076 }, { "epoch": 2.8233970753655795, "grad_norm": 2.17208194732666, "learning_rate": 8.595837500276276e-05, "loss": 0.5856907367706299, "step": 2510, "token_acc": 0.8119218910585817 }, { "epoch": 2.824521934758155, "grad_norm": 1.8605278730392456, "learning_rate": 8.594544833481489e-05, "loss": 0.43297457695007324, "step": 2511, "token_acc": 0.8571428571428571 }, { "epoch": 2.8256467941507313, "grad_norm": 2.2030630111694336, "learning_rate": 8.593251669242541e-05, "loss": 0.48823288083076477, "step": 2512, "token_acc": 0.8575197889182058 }, { "epoch": 2.826771653543307, "grad_norm": 2.268447160720825, "learning_rate": 8.591958007738393e-05, "loss": 0.5207059979438782, "step": 2513, "token_acc": 0.8165760869565217 }, { "epoch": 2.827896512935883, "grad_norm": 2.1183037757873535, "learning_rate": 8.59066384914807e-05, "loss": 0.5977498292922974, "step": 2514, "token_acc": 0.7980295566502463 }, { "epoch": 2.8290213723284587, "grad_norm": 2.0269670486450195, "learning_rate": 8.589369193650672e-05, "loss": 0.5435729026794434, "step": 2515, "token_acc": 0.8212809917355371 }, { "epoch": 2.830146231721035, "grad_norm": 2.1180074214935303, "learning_rate": 8.58807404142536e-05, "loss": 0.4626794159412384, "step": 2516, "token_acc": 0.8536585365853658 }, { "epoch": 2.831271091113611, "grad_norm": 2.0961384773254395, "learning_rate": 8.586778392651373e-05, "loss": 0.511221170425415, "step": 2517, "token_acc": 0.8415233415233415 }, { "epoch": 2.8323959505061866, "grad_norm": 1.7481144666671753, "learning_rate": 8.585482247508013e-05, "loss": 0.49243244528770447, "step": 2518, "token_acc": 0.8542581211589113 }, { "epoch": 2.8335208098987628, "grad_norm": 2.026745557785034, "learning_rate": 8.584185606174651e-05, "loss": 0.4511185884475708, "step": 2519, "token_acc": 0.8568306010928962 }, { "epoch": 2.8346456692913384, "grad_norm": 1.769853115081787, "learning_rate": 8.582888468830725e-05, "loss": 0.4806375801563263, "step": 2520, "token_acc": 0.8613953488372093 }, { "epoch": 2.8357705286839145, "grad_norm": 2.108705520629883, "learning_rate": 8.58159083565575e-05, "loss": 0.5585565567016602, "step": 2521, "token_acc": 0.8337696335078534 }, { "epoch": 2.83689538807649, "grad_norm": 2.0626964569091797, "learning_rate": 8.580292706829299e-05, "loss": 0.5717641115188599, "step": 2522, "token_acc": 0.8205828779599271 }, { "epoch": 2.8380202474690663, "grad_norm": 2.2134108543395996, "learning_rate": 8.578994082531018e-05, "loss": 0.530868411064148, "step": 2523, "token_acc": 0.8398486759142497 }, { "epoch": 2.8391451068616425, "grad_norm": 2.396097421646118, "learning_rate": 8.577694962940624e-05, "loss": 0.5210138559341431, "step": 2524, "token_acc": 0.8232044198895028 }, { "epoch": 2.840269966254218, "grad_norm": 2.2475342750549316, "learning_rate": 8.576395348237901e-05, "loss": 0.5462197065353394, "step": 2525, "token_acc": 0.8379084967320262 }, { "epoch": 2.8413948256467942, "grad_norm": 1.8181638717651367, "learning_rate": 8.5750952386027e-05, "loss": 0.44994327425956726, "step": 2526, "token_acc": 0.8540218470705064 }, { "epoch": 2.84251968503937, "grad_norm": 1.9083164930343628, "learning_rate": 8.573794634214939e-05, "loss": 0.5441379547119141, "step": 2527, "token_acc": 0.8258064516129032 }, { "epoch": 2.843644544431946, "grad_norm": 2.0921337604522705, "learning_rate": 8.57249353525461e-05, "loss": 0.6307775378227234, "step": 2528, "token_acc": 0.8137254901960784 }, { "epoch": 2.8447694038245217, "grad_norm": 2.084582567214966, "learning_rate": 8.571191941901768e-05, "loss": 0.5265278816223145, "step": 2529, "token_acc": 0.8301158301158301 }, { "epoch": 2.845894263217098, "grad_norm": 2.4056997299194336, "learning_rate": 8.569889854336542e-05, "loss": 0.4971960186958313, "step": 2530, "token_acc": 0.8361204013377926 }, { "epoch": 2.847019122609674, "grad_norm": 1.8008288145065308, "learning_rate": 8.568587272739124e-05, "loss": 0.3632245361804962, "step": 2531, "token_acc": 0.8915956151035322 }, { "epoch": 2.8481439820022496, "grad_norm": 2.054952383041382, "learning_rate": 8.567284197289775e-05, "loss": 0.49847710132598877, "step": 2532, "token_acc": 0.8378698224852071 }, { "epoch": 2.8492688413948257, "grad_norm": 2.4599862098693848, "learning_rate": 8.56598062816883e-05, "loss": 0.6555575728416443, "step": 2533, "token_acc": 0.8241626794258373 }, { "epoch": 2.850393700787402, "grad_norm": 2.0120596885681152, "learning_rate": 8.564676565556685e-05, "loss": 0.6476647257804871, "step": 2534, "token_acc": 0.80259222333001 }, { "epoch": 2.8515185601799775, "grad_norm": 2.380603313446045, "learning_rate": 8.563372009633805e-05, "loss": 0.5592936277389526, "step": 2535, "token_acc": 0.828169014084507 }, { "epoch": 2.852643419572553, "grad_norm": 2.0277771949768066, "learning_rate": 8.562066960580731e-05, "loss": 0.5969913005828857, "step": 2536, "token_acc": 0.8255698711595639 }, { "epoch": 2.8537682789651293, "grad_norm": 2.17069411277771, "learning_rate": 8.560761418578065e-05, "loss": 0.541232705116272, "step": 2537, "token_acc": 0.82829373650108 }, { "epoch": 2.8548931383577054, "grad_norm": 1.9474443197250366, "learning_rate": 8.559455383806479e-05, "loss": 0.4525517523288727, "step": 2538, "token_acc": 0.8462484624846248 }, { "epoch": 2.856017997750281, "grad_norm": 1.9233194589614868, "learning_rate": 8.558148856446713e-05, "loss": 0.46946027874946594, "step": 2539, "token_acc": 0.8537455410225921 }, { "epoch": 2.857142857142857, "grad_norm": 2.347580909729004, "learning_rate": 8.556841836679575e-05, "loss": 0.6369637846946716, "step": 2540, "token_acc": 0.8090010976948409 }, { "epoch": 2.8582677165354333, "grad_norm": 2.5206120014190674, "learning_rate": 8.555534324685943e-05, "loss": 0.6344416737556458, "step": 2541, "token_acc": 0.8065843621399177 }, { "epoch": 2.859392575928009, "grad_norm": 2.212989091873169, "learning_rate": 8.554226320646761e-05, "loss": 0.6359409093856812, "step": 2542, "token_acc": 0.7969151670951157 }, { "epoch": 2.8605174353205847, "grad_norm": 1.9661906957626343, "learning_rate": 8.552917824743043e-05, "loss": 0.5948880910873413, "step": 2543, "token_acc": 0.8068592057761733 }, { "epoch": 2.861642294713161, "grad_norm": 2.2615272998809814, "learning_rate": 8.551608837155869e-05, "loss": 0.6290392875671387, "step": 2544, "token_acc": 0.8002309468822171 }, { "epoch": 2.862767154105737, "grad_norm": 2.6051559448242188, "learning_rate": 8.550299358066387e-05, "loss": 0.6548011898994446, "step": 2545, "token_acc": 0.7809762202753442 }, { "epoch": 2.8638920134983126, "grad_norm": 2.3201823234558105, "learning_rate": 8.548989387655817e-05, "loss": 0.6460992097854614, "step": 2546, "token_acc": 0.7858627858627859 }, { "epoch": 2.8650168728908887, "grad_norm": 2.2345383167266846, "learning_rate": 8.54767892610544e-05, "loss": 0.5574315190315247, "step": 2547, "token_acc": 0.8071519795657727 }, { "epoch": 2.866141732283465, "grad_norm": 2.608111619949341, "learning_rate": 8.546367973596614e-05, "loss": 0.7112652659416199, "step": 2548, "token_acc": 0.781038374717833 }, { "epoch": 2.8672665916760405, "grad_norm": 2.187640428543091, "learning_rate": 8.545056530310755e-05, "loss": 0.522688627243042, "step": 2549, "token_acc": 0.841717791411043 }, { "epoch": 2.868391451068616, "grad_norm": 2.082866907119751, "learning_rate": 8.543744596429355e-05, "loss": 0.6098537445068359, "step": 2550, "token_acc": 0.8146929824561403 }, { "epoch": 2.8695163104611923, "grad_norm": 1.8606888055801392, "learning_rate": 8.54243217213397e-05, "loss": 0.5115789175033569, "step": 2551, "token_acc": 0.8414496036240091 }, { "epoch": 2.8706411698537684, "grad_norm": 2.2433018684387207, "learning_rate": 8.541119257606223e-05, "loss": 0.45590710639953613, "step": 2552, "token_acc": 0.8420221169036335 }, { "epoch": 2.871766029246344, "grad_norm": 2.017927646636963, "learning_rate": 8.539805853027809e-05, "loss": 0.49691516160964966, "step": 2553, "token_acc": 0.8344519015659956 }, { "epoch": 2.87289088863892, "grad_norm": 2.1844072341918945, "learning_rate": 8.538491958580485e-05, "loss": 0.5590268969535828, "step": 2554, "token_acc": 0.8283208020050126 }, { "epoch": 2.8740157480314963, "grad_norm": 2.4474587440490723, "learning_rate": 8.537177574446083e-05, "loss": 0.5552148818969727, "step": 2555, "token_acc": 0.813953488372093 }, { "epoch": 2.875140607424072, "grad_norm": 2.28570818901062, "learning_rate": 8.535862700806497e-05, "loss": 0.5902047157287598, "step": 2556, "token_acc": 0.8254172015404364 }, { "epoch": 2.876265466816648, "grad_norm": 2.5100817680358887, "learning_rate": 8.53454733784369e-05, "loss": 0.6034073829650879, "step": 2557, "token_acc": 0.8002560819462228 }, { "epoch": 2.8773903262092237, "grad_norm": 2.1232213973999023, "learning_rate": 8.533231485739694e-05, "loss": 0.7012327909469604, "step": 2558, "token_acc": 0.7965009208103131 }, { "epoch": 2.8785151856018, "grad_norm": 2.1195342540740967, "learning_rate": 8.531915144676607e-05, "loss": 0.5652674436569214, "step": 2559, "token_acc": 0.8109090909090909 }, { "epoch": 2.8796400449943755, "grad_norm": 1.9624929428100586, "learning_rate": 8.530598314836599e-05, "loss": 0.39900118112564087, "step": 2560, "token_acc": 0.8751608751608752 }, { "epoch": 2.8807649043869517, "grad_norm": 2.282860517501831, "learning_rate": 8.529280996401899e-05, "loss": 0.6010736227035522, "step": 2561, "token_acc": 0.7990255785627284 }, { "epoch": 2.8818897637795278, "grad_norm": 1.9255961179733276, "learning_rate": 8.527963189554813e-05, "loss": 0.5624324083328247, "step": 2562, "token_acc": 0.8208208208208209 }, { "epoch": 2.8830146231721034, "grad_norm": 1.960082769393921, "learning_rate": 8.526644894477709e-05, "loss": 0.5914342999458313, "step": 2563, "token_acc": 0.8086785009861933 }, { "epoch": 2.8841394825646796, "grad_norm": 2.169123888015747, "learning_rate": 8.525326111353023e-05, "loss": 0.5380339622497559, "step": 2564, "token_acc": 0.8531791907514451 }, { "epoch": 2.8852643419572552, "grad_norm": 2.1060101985931396, "learning_rate": 8.524006840363263e-05, "loss": 0.49421507120132446, "step": 2565, "token_acc": 0.8440925700365408 }, { "epoch": 2.8863892013498313, "grad_norm": 1.9414187669754028, "learning_rate": 8.522687081690997e-05, "loss": 0.6238324046134949, "step": 2566, "token_acc": 0.8044052863436123 }, { "epoch": 2.887514060742407, "grad_norm": 2.0340492725372314, "learning_rate": 8.521366835518867e-05, "loss": 0.510863184928894, "step": 2567, "token_acc": 0.8244274809160306 }, { "epoch": 2.888638920134983, "grad_norm": 2.1346142292022705, "learning_rate": 8.52004610202958e-05, "loss": 0.444989413022995, "step": 2568, "token_acc": 0.8382559774964838 }, { "epoch": 2.8897637795275593, "grad_norm": 2.1845271587371826, "learning_rate": 8.51872488140591e-05, "loss": 0.5960903167724609, "step": 2569, "token_acc": 0.835016835016835 }, { "epoch": 2.890888638920135, "grad_norm": 1.94596266746521, "learning_rate": 8.517403173830698e-05, "loss": 0.3524889051914215, "step": 2570, "token_acc": 0.8726207906295754 }, { "epoch": 2.892013498312711, "grad_norm": 2.442826271057129, "learning_rate": 8.516080979486856e-05, "loss": 0.5917656421661377, "step": 2571, "token_acc": 0.8067885117493473 }, { "epoch": 2.8931383577052867, "grad_norm": 1.9383790493011475, "learning_rate": 8.514758298557357e-05, "loss": 0.48779523372650146, "step": 2572, "token_acc": 0.8355601233299075 }, { "epoch": 2.894263217097863, "grad_norm": 2.2068986892700195, "learning_rate": 8.513435131225247e-05, "loss": 0.6735610365867615, "step": 2573, "token_acc": 0.7830188679245284 }, { "epoch": 2.8953880764904385, "grad_norm": 1.9262882471084595, "learning_rate": 8.512111477673639e-05, "loss": 0.5757591724395752, "step": 2574, "token_acc": 0.823049001814882 }, { "epoch": 2.8965129358830146, "grad_norm": 1.9948649406433105, "learning_rate": 8.510787338085708e-05, "loss": 0.36602336168289185, "step": 2575, "token_acc": 0.8763040238450075 }, { "epoch": 2.8976377952755907, "grad_norm": 1.8886839151382446, "learning_rate": 8.509462712644704e-05, "loss": 0.43752074241638184, "step": 2576, "token_acc": 0.860178970917226 }, { "epoch": 2.8987626546681664, "grad_norm": 1.8279756307601929, "learning_rate": 8.508137601533937e-05, "loss": 0.5012289881706238, "step": 2577, "token_acc": 0.8428709990300679 }, { "epoch": 2.8998875140607425, "grad_norm": 1.7731120586395264, "learning_rate": 8.506812004936789e-05, "loss": 0.3944995403289795, "step": 2578, "token_acc": 0.8783910196445276 }, { "epoch": 2.901012373453318, "grad_norm": 2.155755043029785, "learning_rate": 8.505485923036705e-05, "loss": 0.5918765068054199, "step": 2579, "token_acc": 0.8107287449392713 }, { "epoch": 2.9021372328458943, "grad_norm": 1.8232136964797974, "learning_rate": 8.504159356017202e-05, "loss": 0.4091030955314636, "step": 2580, "token_acc": 0.8634577603143418 }, { "epoch": 2.90326209223847, "grad_norm": 2.0536370277404785, "learning_rate": 8.50283230406186e-05, "loss": 0.5552256107330322, "step": 2581, "token_acc": 0.8467507274490785 }, { "epoch": 2.904386951631046, "grad_norm": 2.2288639545440674, "learning_rate": 8.501504767354331e-05, "loss": 0.700329065322876, "step": 2582, "token_acc": 0.7861111111111111 }, { "epoch": 2.905511811023622, "grad_norm": 2.209451675415039, "learning_rate": 8.50017674607833e-05, "loss": 0.4354158341884613, "step": 2583, "token_acc": 0.8529032258064516 }, { "epoch": 2.906636670416198, "grad_norm": 2.5079195499420166, "learning_rate": 8.498848240417637e-05, "loss": 0.6673790216445923, "step": 2584, "token_acc": 0.8115942028985508 }, { "epoch": 2.907761529808774, "grad_norm": 2.0836374759674072, "learning_rate": 8.497519250556106e-05, "loss": 0.5955841541290283, "step": 2585, "token_acc": 0.8129092609915809 }, { "epoch": 2.9088863892013497, "grad_norm": 2.1955018043518066, "learning_rate": 8.496189776677652e-05, "loss": 0.5659691095352173, "step": 2586, "token_acc": 0.8331374853113983 }, { "epoch": 2.910011248593926, "grad_norm": 1.738875150680542, "learning_rate": 8.494859818966257e-05, "loss": 0.4557726979255676, "step": 2587, "token_acc": 0.8494809688581315 }, { "epoch": 2.9111361079865015, "grad_norm": 2.146493911743164, "learning_rate": 8.493529377605978e-05, "loss": 0.5994327664375305, "step": 2588, "token_acc": 0.8040665434380776 }, { "epoch": 2.9122609673790776, "grad_norm": 2.3058676719665527, "learning_rate": 8.492198452780926e-05, "loss": 0.5034809112548828, "step": 2589, "token_acc": 0.836627140974967 }, { "epoch": 2.9133858267716537, "grad_norm": 2.3872952461242676, "learning_rate": 8.490867044675292e-05, "loss": 0.5366867184638977, "step": 2590, "token_acc": 0.8274456521739131 }, { "epoch": 2.9145106861642294, "grad_norm": 2.2728238105773926, "learning_rate": 8.489535153473324e-05, "loss": 0.5652950406074524, "step": 2591, "token_acc": 0.8262331838565022 }, { "epoch": 2.9156355455568055, "grad_norm": 2.4228744506835938, "learning_rate": 8.488202779359343e-05, "loss": 0.6669379472732544, "step": 2592, "token_acc": 0.7832618025751072 }, { "epoch": 2.9167604049493816, "grad_norm": 2.282580614089966, "learning_rate": 8.486869922517731e-05, "loss": 0.6706390380859375, "step": 2593, "token_acc": 0.7943107221006565 }, { "epoch": 2.9178852643419573, "grad_norm": 2.1509554386138916, "learning_rate": 8.485536583132942e-05, "loss": 0.5325647592544556, "step": 2594, "token_acc": 0.8276209677419355 }, { "epoch": 2.919010123734533, "grad_norm": 2.037179708480835, "learning_rate": 8.484202761389497e-05, "loss": 0.42761310935020447, "step": 2595, "token_acc": 0.8530150753768844 }, { "epoch": 2.920134983127109, "grad_norm": 2.2923779487609863, "learning_rate": 8.48286845747198e-05, "loss": 0.6135848760604858, "step": 2596, "token_acc": 0.80065717415115 }, { "epoch": 2.921259842519685, "grad_norm": 1.8776135444641113, "learning_rate": 8.481533671565043e-05, "loss": 0.32434338331222534, "step": 2597, "token_acc": 0.8911465892597968 }, { "epoch": 2.922384701912261, "grad_norm": 2.352210521697998, "learning_rate": 8.480198403853406e-05, "loss": 0.5172638893127441, "step": 2598, "token_acc": 0.8432934926958832 }, { "epoch": 2.923509561304837, "grad_norm": 2.315340042114258, "learning_rate": 8.478862654521855e-05, "loss": 0.6677885055541992, "step": 2599, "token_acc": 0.7857911733046287 }, { "epoch": 2.924634420697413, "grad_norm": 1.9701159000396729, "learning_rate": 8.477526423755243e-05, "loss": 0.49926069378852844, "step": 2600, "token_acc": 0.8504672897196262 }, { "epoch": 2.9257592800899888, "grad_norm": 2.1970725059509277, "learning_rate": 8.476189711738487e-05, "loss": 0.5959769487380981, "step": 2601, "token_acc": 0.8297872340425532 }, { "epoch": 2.9268841394825644, "grad_norm": 2.3665456771850586, "learning_rate": 8.474852518656575e-05, "loss": 0.5518344640731812, "step": 2602, "token_acc": 0.8328267477203647 }, { "epoch": 2.9280089988751405, "grad_norm": 2.239797353744507, "learning_rate": 8.473514844694558e-05, "loss": 0.5807715654373169, "step": 2603, "token_acc": 0.8194993412384717 }, { "epoch": 2.9291338582677167, "grad_norm": 2.0417027473449707, "learning_rate": 8.472176690037555e-05, "loss": 0.5719929933547974, "step": 2604, "token_acc": 0.810752688172043 }, { "epoch": 2.9302587176602923, "grad_norm": 2.188922166824341, "learning_rate": 8.470838054870754e-05, "loss": 0.7009865045547485, "step": 2605, "token_acc": 0.798828125 }, { "epoch": 2.9313835770528684, "grad_norm": 1.9950305223464966, "learning_rate": 8.469498939379401e-05, "loss": 0.47198164463043213, "step": 2606, "token_acc": 0.8484076433121019 }, { "epoch": 2.9325084364454446, "grad_norm": 2.1988790035247803, "learning_rate": 8.468159343748821e-05, "loss": 0.6035135984420776, "step": 2607, "token_acc": 0.8024242424242424 }, { "epoch": 2.9336332958380202, "grad_norm": 2.1515965461730957, "learning_rate": 8.466819268164394e-05, "loss": 0.48232370615005493, "step": 2608, "token_acc": 0.8331242158092849 }, { "epoch": 2.934758155230596, "grad_norm": 1.7583684921264648, "learning_rate": 8.465478712811576e-05, "loss": 0.603721559047699, "step": 2609, "token_acc": 0.8168789808917197 }, { "epoch": 2.935883014623172, "grad_norm": 2.32401180267334, "learning_rate": 8.464137677875879e-05, "loss": 0.5753487944602966, "step": 2610, "token_acc": 0.8185011709601874 }, { "epoch": 2.937007874015748, "grad_norm": 2.5633509159088135, "learning_rate": 8.46279616354289e-05, "loss": 0.6308541297912598, "step": 2611, "token_acc": 0.8219013237063778 }, { "epoch": 2.938132733408324, "grad_norm": 2.28743314743042, "learning_rate": 8.46145416999826e-05, "loss": 0.519384503364563, "step": 2612, "token_acc": 0.8268974700399467 }, { "epoch": 2.9392575928009, "grad_norm": 1.909102201461792, "learning_rate": 8.460111697427702e-05, "loss": 0.5280698537826538, "step": 2613, "token_acc": 0.8421550094517959 }, { "epoch": 2.940382452193476, "grad_norm": 2.3715224266052246, "learning_rate": 8.458768746017006e-05, "loss": 0.6458710432052612, "step": 2614, "token_acc": 0.8067331670822943 }, { "epoch": 2.9415073115860517, "grad_norm": 2.3101613521575928, "learning_rate": 8.457425315952013e-05, "loss": 0.5945824980735779, "step": 2615, "token_acc": 0.8054892601431981 }, { "epoch": 2.942632170978628, "grad_norm": 2.0214977264404297, "learning_rate": 8.456081407418644e-05, "loss": 0.5527798533439636, "step": 2616, "token_acc": 0.8317460317460318 }, { "epoch": 2.9437570303712035, "grad_norm": 2.180790424346924, "learning_rate": 8.454737020602878e-05, "loss": 0.5930659174919128, "step": 2617, "token_acc": 0.8142548596112311 }, { "epoch": 2.9448818897637796, "grad_norm": 2.2711005210876465, "learning_rate": 8.453392155690765e-05, "loss": 0.6099676489830017, "step": 2618, "token_acc": 0.7985524728588661 }, { "epoch": 2.9460067491563553, "grad_norm": 2.1728458404541016, "learning_rate": 8.452046812868418e-05, "loss": 0.4856499135494232, "step": 2619, "token_acc": 0.8401585204755614 }, { "epoch": 2.9471316085489314, "grad_norm": 1.9505850076675415, "learning_rate": 8.450700992322016e-05, "loss": 0.6276124715805054, "step": 2620, "token_acc": 0.7969798657718121 }, { "epoch": 2.9482564679415075, "grad_norm": 2.148881673812866, "learning_rate": 8.449354694237807e-05, "loss": 0.6742518544197083, "step": 2621, "token_acc": 0.78860103626943 }, { "epoch": 2.949381327334083, "grad_norm": 1.948671579360962, "learning_rate": 8.448007918802103e-05, "loss": 0.5503969192504883, "step": 2622, "token_acc": 0.8220802919708029 }, { "epoch": 2.9505061867266593, "grad_norm": 1.986157774925232, "learning_rate": 8.446660666201282e-05, "loss": 0.5132067203521729, "step": 2623, "token_acc": 0.8348214285714286 }, { "epoch": 2.951631046119235, "grad_norm": 2.1224570274353027, "learning_rate": 8.445312936621788e-05, "loss": 0.6628903150558472, "step": 2624, "token_acc": 0.8070175438596491 }, { "epoch": 2.952755905511811, "grad_norm": 1.9506548643112183, "learning_rate": 8.443964730250132e-05, "loss": 0.36084240674972534, "step": 2625, "token_acc": 0.8888888888888888 }, { "epoch": 2.953880764904387, "grad_norm": 1.7672545909881592, "learning_rate": 8.44261604727289e-05, "loss": 0.3597657084465027, "step": 2626, "token_acc": 0.8794416243654822 }, { "epoch": 2.955005624296963, "grad_norm": 1.577393651008606, "learning_rate": 8.441266887876706e-05, "loss": 0.3706282675266266, "step": 2627, "token_acc": 0.8700290979631425 }, { "epoch": 2.956130483689539, "grad_norm": 2.552495002746582, "learning_rate": 8.439917252248287e-05, "loss": 0.6470178961753845, "step": 2628, "token_acc": 0.8106995884773662 }, { "epoch": 2.9572553430821147, "grad_norm": 2.1945536136627197, "learning_rate": 8.438567140574407e-05, "loss": 0.5736914873123169, "step": 2629, "token_acc": 0.8058252427184466 }, { "epoch": 2.958380202474691, "grad_norm": 2.0053815841674805, "learning_rate": 8.437216553041907e-05, "loss": 0.5860317349433899, "step": 2630, "token_acc": 0.8041136141038198 }, { "epoch": 2.9595050618672665, "grad_norm": 1.8112828731536865, "learning_rate": 8.435865489837691e-05, "loss": 0.6566574573516846, "step": 2631, "token_acc": 0.7991769547325103 }, { "epoch": 2.9606299212598426, "grad_norm": 2.1368229389190674, "learning_rate": 8.434513951148733e-05, "loss": 0.6043751835823059, "step": 2632, "token_acc": 0.8059701492537313 }, { "epoch": 2.9617547806524183, "grad_norm": 2.1933093070983887, "learning_rate": 8.433161937162071e-05, "loss": 0.6146339774131775, "step": 2633, "token_acc": 0.8025613660618997 }, { "epoch": 2.9628796400449944, "grad_norm": 2.1124584674835205, "learning_rate": 8.431809448064805e-05, "loss": 0.5842646360397339, "step": 2634, "token_acc": 0.8120805369127517 }, { "epoch": 2.9640044994375705, "grad_norm": 1.7797198295593262, "learning_rate": 8.430456484044109e-05, "loss": 0.44734689593315125, "step": 2635, "token_acc": 0.8566392479435958 }, { "epoch": 2.965129358830146, "grad_norm": 1.9352881908416748, "learning_rate": 8.429103045287214e-05, "loss": 0.6154146194458008, "step": 2636, "token_acc": 0.8093869731800766 }, { "epoch": 2.9662542182227223, "grad_norm": 2.21960186958313, "learning_rate": 8.427749131981421e-05, "loss": 0.6089456081390381, "step": 2637, "token_acc": 0.8100320170757738 }, { "epoch": 2.967379077615298, "grad_norm": 2.116417646408081, "learning_rate": 8.4263947443141e-05, "loss": 0.7032605409622192, "step": 2638, "token_acc": 0.790990990990991 }, { "epoch": 2.968503937007874, "grad_norm": 2.0751147270202637, "learning_rate": 8.425039882472674e-05, "loss": 0.5447802543640137, "step": 2639, "token_acc": 0.8140068886337543 }, { "epoch": 2.9696287964004497, "grad_norm": 1.9559693336486816, "learning_rate": 8.423684546644653e-05, "loss": 0.5395944118499756, "step": 2640, "token_acc": 0.836801752464403 }, { "epoch": 2.970753655793026, "grad_norm": 1.8840358257293701, "learning_rate": 8.422328737017588e-05, "loss": 0.6002767086029053, "step": 2641, "token_acc": 0.8269617706237424 }, { "epoch": 2.971878515185602, "grad_norm": 2.272270917892456, "learning_rate": 8.420972453779117e-05, "loss": 0.5617266893386841, "step": 2642, "token_acc": 0.8266666666666667 }, { "epoch": 2.9730033745781776, "grad_norm": 2.350485324859619, "learning_rate": 8.419615697116928e-05, "loss": 0.6894421577453613, "step": 2643, "token_acc": 0.7989637305699482 }, { "epoch": 2.9741282339707538, "grad_norm": 2.1960628032684326, "learning_rate": 8.418258467218782e-05, "loss": 0.5246453285217285, "step": 2644, "token_acc": 0.8326474622770919 }, { "epoch": 2.9752530933633294, "grad_norm": 1.8652009963989258, "learning_rate": 8.416900764272509e-05, "loss": 0.5227068662643433, "step": 2645, "token_acc": 0.8432692307692308 }, { "epoch": 2.9763779527559056, "grad_norm": 2.410881519317627, "learning_rate": 8.415542588465992e-05, "loss": 0.516501784324646, "step": 2646, "token_acc": 0.8370497427101201 }, { "epoch": 2.9775028121484812, "grad_norm": 1.889757513999939, "learning_rate": 8.414183939987192e-05, "loss": 0.4250949025154114, "step": 2647, "token_acc": 0.8510362694300518 }, { "epoch": 2.9786276715410573, "grad_norm": 2.2182998657226562, "learning_rate": 8.41282481902413e-05, "loss": 0.5452112555503845, "step": 2648, "token_acc": 0.8155467720685112 }, { "epoch": 2.9797525309336335, "grad_norm": 2.0771002769470215, "learning_rate": 8.411465225764892e-05, "loss": 0.6941447257995605, "step": 2649, "token_acc": 0.7934595524956971 }, { "epoch": 2.980877390326209, "grad_norm": 2.135591745376587, "learning_rate": 8.41010516039763e-05, "loss": 0.6049238443374634, "step": 2650, "token_acc": 0.8178178178178178 }, { "epoch": 2.9820022497187852, "grad_norm": 2.2866413593292236, "learning_rate": 8.408744623110562e-05, "loss": 0.6201270818710327, "step": 2651, "token_acc": 0.8098901098901099 }, { "epoch": 2.983127109111361, "grad_norm": 2.1742546558380127, "learning_rate": 8.407383614091973e-05, "loss": 0.5481008887290955, "step": 2652, "token_acc": 0.832 }, { "epoch": 2.984251968503937, "grad_norm": 2.0765693187713623, "learning_rate": 8.406022133530207e-05, "loss": 0.4353817403316498, "step": 2653, "token_acc": 0.8540145985401459 }, { "epoch": 2.9853768278965127, "grad_norm": 2.4796841144561768, "learning_rate": 8.404660181613682e-05, "loss": 0.493132084608078, "step": 2654, "token_acc": 0.8248847926267281 }, { "epoch": 2.986501687289089, "grad_norm": 2.2007198333740234, "learning_rate": 8.403297758530872e-05, "loss": 0.5580452084541321, "step": 2655, "token_acc": 0.8220858895705522 }, { "epoch": 2.987626546681665, "grad_norm": 2.049464225769043, "learning_rate": 8.401934864470325e-05, "loss": 0.48465868830680847, "step": 2656, "token_acc": 0.8282208588957055 }, { "epoch": 2.9887514060742406, "grad_norm": 2.1806654930114746, "learning_rate": 8.400571499620648e-05, "loss": 0.5936434864997864, "step": 2657, "token_acc": 0.81789802289282 }, { "epoch": 2.9898762654668167, "grad_norm": 2.326939582824707, "learning_rate": 8.399207664170514e-05, "loss": 0.5728318691253662, "step": 2658, "token_acc": 0.8125 }, { "epoch": 2.991001124859393, "grad_norm": 2.3365440368652344, "learning_rate": 8.397843358308666e-05, "loss": 0.5416728258132935, "step": 2659, "token_acc": 0.8333333333333334 }, { "epoch": 2.9921259842519685, "grad_norm": 2.4457452297210693, "learning_rate": 8.396478582223904e-05, "loss": 0.6342925429344177, "step": 2660, "token_acc": 0.7997557997557998 }, { "epoch": 2.993250843644544, "grad_norm": 1.824754238128662, "learning_rate": 8.395113336105101e-05, "loss": 0.36795681715011597, "step": 2661, "token_acc": 0.8803763440860215 }, { "epoch": 2.9943757030371203, "grad_norm": 2.3972949981689453, "learning_rate": 8.39374762014119e-05, "loss": 0.616113543510437, "step": 2662, "token_acc": 0.7924297924297924 }, { "epoch": 2.9955005624296964, "grad_norm": 2.3359525203704834, "learning_rate": 8.392381434521171e-05, "loss": 0.6011024117469788, "step": 2663, "token_acc": 0.8207681365576103 }, { "epoch": 2.996625421822272, "grad_norm": 2.376873254776001, "learning_rate": 8.391014779434108e-05, "loss": 0.5900087356567383, "step": 2664, "token_acc": 0.8060781476121563 }, { "epoch": 2.997750281214848, "grad_norm": 1.894140362739563, "learning_rate": 8.389647655069132e-05, "loss": 0.5806020498275757, "step": 2665, "token_acc": 0.8090107737512243 }, { "epoch": 2.9988751406074243, "grad_norm": 2.2172908782958984, "learning_rate": 8.388280061615435e-05, "loss": 0.593827486038208, "step": 2666, "token_acc": 0.8116883116883117 }, { "epoch": 3.0, "grad_norm": 2.1322505474090576, "learning_rate": 8.386911999262279e-05, "loss": 0.3937807083129883, "step": 2667, "token_acc": 0.8662613981762918 }, { "epoch": 3.001124859392576, "grad_norm": 1.490234613418579, "learning_rate": 8.385543468198987e-05, "loss": 0.3155137598514557, "step": 2668, "token_acc": 0.9107763615295481 }, { "epoch": 3.002249718785152, "grad_norm": 1.6235419511795044, "learning_rate": 8.384174468614947e-05, "loss": 0.3743090033531189, "step": 2669, "token_acc": 0.8913250714966635 }, { "epoch": 3.003374578177728, "grad_norm": 1.5319925546646118, "learning_rate": 8.382805000699617e-05, "loss": 0.3625819981098175, "step": 2670, "token_acc": 0.8778966131907309 }, { "epoch": 3.0044994375703036, "grad_norm": 1.9396179914474487, "learning_rate": 8.381435064642511e-05, "loss": 0.4178939461708069, "step": 2671, "token_acc": 0.8779874213836478 }, { "epoch": 3.0056242969628797, "grad_norm": 1.2902990579605103, "learning_rate": 8.380064660633213e-05, "loss": 0.25144529342651367, "step": 2672, "token_acc": 0.93025283347864 }, { "epoch": 3.0067491563554554, "grad_norm": 1.6661369800567627, "learning_rate": 8.378693788861376e-05, "loss": 0.34826403856277466, "step": 2673, "token_acc": 0.9031945788964182 }, { "epoch": 3.0078740157480315, "grad_norm": 1.7942512035369873, "learning_rate": 8.37732244951671e-05, "loss": 0.34133195877075195, "step": 2674, "token_acc": 0.9045287637698899 }, { "epoch": 3.0089988751406076, "grad_norm": 1.6690059900283813, "learning_rate": 8.375950642788992e-05, "loss": 0.38038361072540283, "step": 2675, "token_acc": 0.8948891031822566 }, { "epoch": 3.0101237345331833, "grad_norm": 1.8108584880828857, "learning_rate": 8.374578368868065e-05, "loss": 0.31026917695999146, "step": 2676, "token_acc": 0.8981308411214953 }, { "epoch": 3.0112485939257594, "grad_norm": 2.0967323780059814, "learning_rate": 8.373205627943835e-05, "loss": 0.3697819113731384, "step": 2677, "token_acc": 0.8855721393034826 }, { "epoch": 3.012373453318335, "grad_norm": 1.7381733655929565, "learning_rate": 8.371832420206278e-05, "loss": 0.2403431236743927, "step": 2678, "token_acc": 0.9171332586786114 }, { "epoch": 3.013498312710911, "grad_norm": 1.8507170677185059, "learning_rate": 8.370458745845426e-05, "loss": 0.27963533997535706, "step": 2679, "token_acc": 0.9032586558044806 }, { "epoch": 3.014623172103487, "grad_norm": 1.752069354057312, "learning_rate": 8.369084605051382e-05, "loss": 0.2841719090938568, "step": 2680, "token_acc": 0.9156722354813047 }, { "epoch": 3.015748031496063, "grad_norm": 2.5396618843078613, "learning_rate": 8.367709998014311e-05, "loss": 0.3368159830570221, "step": 2681, "token_acc": 0.8796680497925311 }, { "epoch": 3.016872890888639, "grad_norm": 2.522132635116577, "learning_rate": 8.366334924924442e-05, "loss": 0.3739146590232849, "step": 2682, "token_acc": 0.8855421686746988 }, { "epoch": 3.0179977502812148, "grad_norm": 2.7398149967193604, "learning_rate": 8.364959385972072e-05, "loss": 0.3988858163356781, "step": 2683, "token_acc": 0.8588888888888889 }, { "epoch": 3.019122609673791, "grad_norm": 2.61519718170166, "learning_rate": 8.363583381347555e-05, "loss": 0.32306498289108276, "step": 2684, "token_acc": 0.8842482100238663 }, { "epoch": 3.0202474690663665, "grad_norm": 2.668642282485962, "learning_rate": 8.36220691124132e-05, "loss": 0.37442731857299805, "step": 2685, "token_acc": 0.87890625 }, { "epoch": 3.0213723284589427, "grad_norm": 2.8484482765197754, "learning_rate": 8.360829975843853e-05, "loss": 0.33393794298171997, "step": 2686, "token_acc": 0.8956521739130435 }, { "epoch": 3.0224971878515188, "grad_norm": 2.7691967487335205, "learning_rate": 8.359452575345706e-05, "loss": 0.27556249499320984, "step": 2687, "token_acc": 0.9153175591531756 }, { "epoch": 3.0236220472440944, "grad_norm": 2.3977034091949463, "learning_rate": 8.358074709937493e-05, "loss": 0.30495017766952515, "step": 2688, "token_acc": 0.9074262461851476 }, { "epoch": 3.0247469066366706, "grad_norm": 2.779285192489624, "learning_rate": 8.356696379809901e-05, "loss": 0.36865997314453125, "step": 2689, "token_acc": 0.8953771289537713 }, { "epoch": 3.0258717660292462, "grad_norm": 2.5684144496917725, "learning_rate": 8.355317585153668e-05, "loss": 0.2977878451347351, "step": 2690, "token_acc": 0.9037974683544304 }, { "epoch": 3.0269966254218224, "grad_norm": 2.7490127086639404, "learning_rate": 8.35393832615961e-05, "loss": 0.3690895438194275, "step": 2691, "token_acc": 0.8728908886389202 }, { "epoch": 3.028121484814398, "grad_norm": 2.838083267211914, "learning_rate": 8.352558603018596e-05, "loss": 0.320401132106781, "step": 2692, "token_acc": 0.8964194373401535 }, { "epoch": 3.029246344206974, "grad_norm": 2.603482723236084, "learning_rate": 8.351178415921565e-05, "loss": 0.32269376516342163, "step": 2693, "token_acc": 0.8871139510117146 }, { "epoch": 3.0303712035995503, "grad_norm": 2.5561602115631104, "learning_rate": 8.349797765059522e-05, "loss": 0.334370493888855, "step": 2694, "token_acc": 0.8926673751328374 }, { "epoch": 3.031496062992126, "grad_norm": 2.6726059913635254, "learning_rate": 8.34841665062353e-05, "loss": 0.3296845555305481, "step": 2695, "token_acc": 0.8899408284023669 }, { "epoch": 3.032620922384702, "grad_norm": 2.466337203979492, "learning_rate": 8.347035072804723e-05, "loss": 0.3444674611091614, "step": 2696, "token_acc": 0.891566265060241 }, { "epoch": 3.0337457817772777, "grad_norm": 2.1838338375091553, "learning_rate": 8.345653031794292e-05, "loss": 0.34932196140289307, "step": 2697, "token_acc": 0.884780739466896 }, { "epoch": 3.034870641169854, "grad_norm": 2.4408011436462402, "learning_rate": 8.344270527783497e-05, "loss": 0.3312312960624695, "step": 2698, "token_acc": 0.8866599799398195 }, { "epoch": 3.0359955005624295, "grad_norm": 2.3702006340026855, "learning_rate": 8.342887560963662e-05, "loss": 0.2770477533340454, "step": 2699, "token_acc": 0.9073881373569199 }, { "epoch": 3.0371203599550056, "grad_norm": 2.063124895095825, "learning_rate": 8.341504131526172e-05, "loss": 0.26287442445755005, "step": 2700, "token_acc": 0.9082774049217002 }, { "epoch": 3.0371203599550056, "eval_loss": 1.0625320672988892, "eval_runtime": 31.923, "eval_samples_per_second": 25.154, "eval_steps_per_second": 3.164, "eval_token_acc": 0.7371088487953537, "step": 2700 }, { "epoch": 3.0382452193475817, "grad_norm": 2.382382869720459, "learning_rate": 8.34012023966248e-05, "loss": 0.31334543228149414, "step": 2701, "token_acc": 0.9011406844106464 }, { "epoch": 3.0393700787401574, "grad_norm": 2.2588672637939453, "learning_rate": 8.338735885564102e-05, "loss": 0.27341529726982117, "step": 2702, "token_acc": 0.9165613147914032 }, { "epoch": 3.0404949381327335, "grad_norm": 2.164257049560547, "learning_rate": 8.337351069422611e-05, "loss": 0.2731754183769226, "step": 2703, "token_acc": 0.9148665819567979 }, { "epoch": 3.041619797525309, "grad_norm": 2.453089475631714, "learning_rate": 8.335965791429657e-05, "loss": 0.2890811860561371, "step": 2704, "token_acc": 0.9048275862068965 }, { "epoch": 3.0427446569178853, "grad_norm": 2.4026854038238525, "learning_rate": 8.334580051776942e-05, "loss": 0.33549007773399353, "step": 2705, "token_acc": 0.8913612565445026 }, { "epoch": 3.043869516310461, "grad_norm": 2.4254608154296875, "learning_rate": 8.333193850656237e-05, "loss": 0.3006638288497925, "step": 2706, "token_acc": 0.8919631093544137 }, { "epoch": 3.044994375703037, "grad_norm": 2.591735363006592, "learning_rate": 8.331807188259378e-05, "loss": 0.32855498790740967, "step": 2707, "token_acc": 0.8877840909090909 }, { "epoch": 3.046119235095613, "grad_norm": 1.9664371013641357, "learning_rate": 8.330420064778264e-05, "loss": 0.26429417729377747, "step": 2708, "token_acc": 0.9118329466357309 }, { "epoch": 3.047244094488189, "grad_norm": 2.3311662673950195, "learning_rate": 8.329032480404855e-05, "loss": 0.344741553068161, "step": 2709, "token_acc": 0.8951048951048951 }, { "epoch": 3.048368953880765, "grad_norm": 2.5864527225494385, "learning_rate": 8.327644435331178e-05, "loss": 0.35769131779670715, "step": 2710, "token_acc": 0.8748335552596538 }, { "epoch": 3.0494938132733407, "grad_norm": 2.371797800064087, "learning_rate": 8.326255929749322e-05, "loss": 0.3068006634712219, "step": 2711, "token_acc": 0.8994285714285715 }, { "epoch": 3.050618672665917, "grad_norm": 1.8585577011108398, "learning_rate": 8.324866963851441e-05, "loss": 0.28800109028816223, "step": 2712, "token_acc": 0.9018348623853211 }, { "epoch": 3.0517435320584925, "grad_norm": 1.6021182537078857, "learning_rate": 8.323477537829753e-05, "loss": 0.2200898677110672, "step": 2713, "token_acc": 0.9392774788624135 }, { "epoch": 3.0528683914510686, "grad_norm": 2.7852365970611572, "learning_rate": 8.322087651876537e-05, "loss": 0.331179141998291, "step": 2714, "token_acc": 0.8979343863912516 }, { "epoch": 3.0539932508436447, "grad_norm": 2.801643133163452, "learning_rate": 8.320697306184139e-05, "loss": 0.35184720158576965, "step": 2715, "token_acc": 0.9010339734121122 }, { "epoch": 3.0551181102362204, "grad_norm": 2.9606451988220215, "learning_rate": 8.319306500944966e-05, "loss": 0.33931219577789307, "step": 2716, "token_acc": 0.8934426229508197 }, { "epoch": 3.0562429696287965, "grad_norm": 2.240360736846924, "learning_rate": 8.31791523635149e-05, "loss": 0.3883867859840393, "step": 2717, "token_acc": 0.8778778778778779 }, { "epoch": 3.057367829021372, "grad_norm": 2.3586790561676025, "learning_rate": 8.316523512596246e-05, "loss": 0.26841413974761963, "step": 2718, "token_acc": 0.9182763744427934 }, { "epoch": 3.0584926884139483, "grad_norm": 2.1755478382110596, "learning_rate": 8.315131329871831e-05, "loss": 0.3576991558074951, "step": 2719, "token_acc": 0.8805429864253393 }, { "epoch": 3.0596175478065244, "grad_norm": 2.3376002311706543, "learning_rate": 8.313738688370911e-05, "loss": 0.3205920457839966, "step": 2720, "token_acc": 0.9025229357798165 }, { "epoch": 3.0607424071991, "grad_norm": 2.548424482345581, "learning_rate": 8.31234558828621e-05, "loss": 0.348305881023407, "step": 2721, "token_acc": 0.8841099163679809 }, { "epoch": 3.061867266591676, "grad_norm": 2.5152902603149414, "learning_rate": 8.310952029810517e-05, "loss": 0.35863196849823, "step": 2722, "token_acc": 0.8906752411575563 }, { "epoch": 3.062992125984252, "grad_norm": 2.1691646575927734, "learning_rate": 8.309558013136684e-05, "loss": 0.20475958287715912, "step": 2723, "token_acc": 0.9329032258064516 }, { "epoch": 3.064116985376828, "grad_norm": 2.398674964904785, "learning_rate": 8.308163538457628e-05, "loss": 0.2929203510284424, "step": 2724, "token_acc": 0.8963486454652533 }, { "epoch": 3.0652418447694036, "grad_norm": 2.285360097885132, "learning_rate": 8.306768605966329e-05, "loss": 0.416828989982605, "step": 2725, "token_acc": 0.8645640074211502 }, { "epoch": 3.0663667041619798, "grad_norm": 2.6225507259368896, "learning_rate": 8.305373215855829e-05, "loss": 0.31328731775283813, "step": 2726, "token_acc": 0.8964088397790055 }, { "epoch": 3.067491563554556, "grad_norm": 2.1721787452697754, "learning_rate": 8.303977368319233e-05, "loss": 0.30434155464172363, "step": 2727, "token_acc": 0.9037735849056604 }, { "epoch": 3.0686164229471316, "grad_norm": 2.168168067932129, "learning_rate": 8.302581063549714e-05, "loss": 0.27089351415634155, "step": 2728, "token_acc": 0.9119683481701286 }, { "epoch": 3.0697412823397077, "grad_norm": 2.3881990909576416, "learning_rate": 8.301184301740501e-05, "loss": 0.32334136962890625, "step": 2729, "token_acc": 0.8896797153024911 }, { "epoch": 3.0708661417322833, "grad_norm": 2.4507927894592285, "learning_rate": 8.299787083084891e-05, "loss": 0.33864423632621765, "step": 2730, "token_acc": 0.8817427385892116 }, { "epoch": 3.0719910011248595, "grad_norm": 2.8711252212524414, "learning_rate": 8.298389407776244e-05, "loss": 0.3071357011795044, "step": 2731, "token_acc": 0.9012693935119888 }, { "epoch": 3.073115860517435, "grad_norm": 2.4598333835601807, "learning_rate": 8.296991276007981e-05, "loss": 0.36376720666885376, "step": 2732, "token_acc": 0.887546468401487 }, { "epoch": 3.0742407199100112, "grad_norm": 3.070298433303833, "learning_rate": 8.295592687973591e-05, "loss": 0.3382341265678406, "step": 2733, "token_acc": 0.8990683229813664 }, { "epoch": 3.0753655793025874, "grad_norm": 2.4836978912353516, "learning_rate": 8.294193643866618e-05, "loss": 0.4048500955104828, "step": 2734, "token_acc": 0.8948521358159912 }, { "epoch": 3.076490438695163, "grad_norm": 2.1454532146453857, "learning_rate": 8.292794143880675e-05, "loss": 0.26539838314056396, "step": 2735, "token_acc": 0.9147286821705426 }, { "epoch": 3.077615298087739, "grad_norm": 2.6663339138031006, "learning_rate": 8.291394188209441e-05, "loss": 0.3391234278678894, "step": 2736, "token_acc": 0.9002375296912114 }, { "epoch": 3.078740157480315, "grad_norm": 2.267488718032837, "learning_rate": 8.289993777046647e-05, "loss": 0.2967016100883484, "step": 2737, "token_acc": 0.9091620986687549 }, { "epoch": 3.079865016872891, "grad_norm": 2.5301015377044678, "learning_rate": 8.288592910586102e-05, "loss": 0.3486330211162567, "step": 2738, "token_acc": 0.8818283166109253 }, { "epoch": 3.0809898762654666, "grad_norm": 2.2160401344299316, "learning_rate": 8.287191589021663e-05, "loss": 0.290349543094635, "step": 2739, "token_acc": 0.8961038961038961 }, { "epoch": 3.0821147356580427, "grad_norm": 2.4645931720733643, "learning_rate": 8.28578981254726e-05, "loss": 0.3810186982154846, "step": 2740, "token_acc": 0.8794642857142857 }, { "epoch": 3.083239595050619, "grad_norm": 1.9739477634429932, "learning_rate": 8.284387581356884e-05, "loss": 0.25888675451278687, "step": 2741, "token_acc": 0.9257679180887372 }, { "epoch": 3.0843644544431945, "grad_norm": 2.3477656841278076, "learning_rate": 8.282984895644585e-05, "loss": 0.3251199722290039, "step": 2742, "token_acc": 0.8920454545454546 }, { "epoch": 3.0854893138357706, "grad_norm": 2.4909069538116455, "learning_rate": 8.281581755604482e-05, "loss": 0.2951022982597351, "step": 2743, "token_acc": 0.8965071151358344 }, { "epoch": 3.0866141732283463, "grad_norm": 2.488577365875244, "learning_rate": 8.28017816143075e-05, "loss": 0.4072890281677246, "step": 2744, "token_acc": 0.8667287977632805 }, { "epoch": 3.0877390326209224, "grad_norm": 2.4586644172668457, "learning_rate": 8.278774113317633e-05, "loss": 0.36613595485687256, "step": 2745, "token_acc": 0.8909657320872274 }, { "epoch": 3.0888638920134985, "grad_norm": 2.6454527378082275, "learning_rate": 8.277369611459437e-05, "loss": 0.3501565456390381, "step": 2746, "token_acc": 0.8875739644970414 }, { "epoch": 3.089988751406074, "grad_norm": 2.847018241882324, "learning_rate": 8.275964656050526e-05, "loss": 0.2685320973396301, "step": 2747, "token_acc": 0.9098228663446055 }, { "epoch": 3.0911136107986503, "grad_norm": 2.2039616107940674, "learning_rate": 8.27455924728533e-05, "loss": 0.33129656314849854, "step": 2748, "token_acc": 0.8872320596458527 }, { "epoch": 3.092238470191226, "grad_norm": 1.9138977527618408, "learning_rate": 8.273153385358341e-05, "loss": 0.19175395369529724, "step": 2749, "token_acc": 0.9400749063670412 }, { "epoch": 3.093363329583802, "grad_norm": 2.386157989501953, "learning_rate": 8.271747070464117e-05, "loss": 0.34973424673080444, "step": 2750, "token_acc": 0.8881578947368421 }, { "epoch": 3.094488188976378, "grad_norm": 2.2147445678710938, "learning_rate": 8.270340302797274e-05, "loss": 0.3713892102241516, "step": 2751, "token_acc": 0.8763837638376384 }, { "epoch": 3.095613048368954, "grad_norm": 2.4817237854003906, "learning_rate": 8.268933082552494e-05, "loss": 0.38342976570129395, "step": 2752, "token_acc": 0.8739316239316239 }, { "epoch": 3.09673790776153, "grad_norm": 2.348233938217163, "learning_rate": 8.267525409924522e-05, "loss": 0.3474554419517517, "step": 2753, "token_acc": 0.8865619546247818 }, { "epoch": 3.0978627671541057, "grad_norm": 2.3708527088165283, "learning_rate": 8.266117285108158e-05, "loss": 0.3508988618850708, "step": 2754, "token_acc": 0.8985365853658537 }, { "epoch": 3.098987626546682, "grad_norm": 2.563892364501953, "learning_rate": 8.264708708298275e-05, "loss": 0.26236534118652344, "step": 2755, "token_acc": 0.9199475065616798 }, { "epoch": 3.1001124859392575, "grad_norm": 2.631194591522217, "learning_rate": 8.263299679689804e-05, "loss": 0.27182263135910034, "step": 2756, "token_acc": 0.9045801526717557 }, { "epoch": 3.1012373453318336, "grad_norm": 2.3108408451080322, "learning_rate": 8.261890199477736e-05, "loss": 0.24251200258731842, "step": 2757, "token_acc": 0.9208731241473397 }, { "epoch": 3.1023622047244093, "grad_norm": 2.7359888553619385, "learning_rate": 8.260480267857129e-05, "loss": 0.42411407828330994, "step": 2758, "token_acc": 0.8646464646464647 }, { "epoch": 3.1034870641169854, "grad_norm": 2.3922228813171387, "learning_rate": 8.259069885023101e-05, "loss": 0.26081952452659607, "step": 2759, "token_acc": 0.9126344086021505 }, { "epoch": 3.1046119235095615, "grad_norm": 2.199056625366211, "learning_rate": 8.257659051170836e-05, "loss": 0.34746962785720825, "step": 2760, "token_acc": 0.8981854838709677 }, { "epoch": 3.105736782902137, "grad_norm": 2.1695854663848877, "learning_rate": 8.256247766495571e-05, "loss": 0.38153010606765747, "step": 2761, "token_acc": 0.8756388415672913 }, { "epoch": 3.1068616422947133, "grad_norm": 2.16798734664917, "learning_rate": 8.254836031192617e-05, "loss": 0.23568333685398102, "step": 2762, "token_acc": 0.9152173913043479 }, { "epoch": 3.107986501687289, "grad_norm": 2.327793836593628, "learning_rate": 8.25342384545734e-05, "loss": 0.29511070251464844, "step": 2763, "token_acc": 0.914572864321608 }, { "epoch": 3.109111361079865, "grad_norm": 2.6187241077423096, "learning_rate": 8.252011209485169e-05, "loss": 0.3239271938800812, "step": 2764, "token_acc": 0.8976982097186701 }, { "epoch": 3.1102362204724407, "grad_norm": 2.60313081741333, "learning_rate": 8.2505981234716e-05, "loss": 0.4342755079269409, "step": 2765, "token_acc": 0.8705255140898706 }, { "epoch": 3.111361079865017, "grad_norm": 2.598740816116333, "learning_rate": 8.249184587612187e-05, "loss": 0.4373962879180908, "step": 2766, "token_acc": 0.8657604078164826 }, { "epoch": 3.112485939257593, "grad_norm": 2.300748825073242, "learning_rate": 8.247770602102547e-05, "loss": 0.3848584294319153, "step": 2767, "token_acc": 0.8791208791208791 }, { "epoch": 3.1136107986501687, "grad_norm": 2.6556360721588135, "learning_rate": 8.246356167138358e-05, "loss": 0.3654754161834717, "step": 2768, "token_acc": 0.8783505154639175 }, { "epoch": 3.1147356580427448, "grad_norm": 2.5283491611480713, "learning_rate": 8.244941282915364e-05, "loss": 0.3415446877479553, "step": 2769, "token_acc": 0.8943396226415095 }, { "epoch": 3.1158605174353204, "grad_norm": 2.5740928649902344, "learning_rate": 8.24352594962937e-05, "loss": 0.35712188482284546, "step": 2770, "token_acc": 0.8949115044247787 }, { "epoch": 3.1169853768278966, "grad_norm": 2.6859500408172607, "learning_rate": 8.242110167476238e-05, "loss": 0.3138366937637329, "step": 2771, "token_acc": 0.8862433862433863 }, { "epoch": 3.1181102362204722, "grad_norm": 2.3216893672943115, "learning_rate": 8.240693936651897e-05, "loss": 0.259859561920166, "step": 2772, "token_acc": 0.9148665819567979 }, { "epoch": 3.1192350956130483, "grad_norm": 2.40205454826355, "learning_rate": 8.239277257352342e-05, "loss": 0.31522858142852783, "step": 2773, "token_acc": 0.8850174216027874 }, { "epoch": 3.1203599550056245, "grad_norm": 2.2605373859405518, "learning_rate": 8.23786012977362e-05, "loss": 0.2712886333465576, "step": 2774, "token_acc": 0.9120567375886525 }, { "epoch": 3.1214848143982, "grad_norm": 2.310401678085327, "learning_rate": 8.236442554111846e-05, "loss": 0.29120269417762756, "step": 2775, "token_acc": 0.9110212335692619 }, { "epoch": 3.1226096737907763, "grad_norm": 2.3360283374786377, "learning_rate": 8.2350245305632e-05, "loss": 0.30132821202278137, "step": 2776, "token_acc": 0.90311004784689 }, { "epoch": 3.123734533183352, "grad_norm": 2.4333505630493164, "learning_rate": 8.233606059323916e-05, "loss": 0.36914676427841187, "step": 2777, "token_acc": 0.8868501529051988 }, { "epoch": 3.124859392575928, "grad_norm": 2.3952150344848633, "learning_rate": 8.232187140590298e-05, "loss": 0.3241305947303772, "step": 2778, "token_acc": 0.89375 }, { "epoch": 3.1259842519685037, "grad_norm": 2.5606536865234375, "learning_rate": 8.230767774558705e-05, "loss": 0.26183217763900757, "step": 2779, "token_acc": 0.923728813559322 }, { "epoch": 3.12710911136108, "grad_norm": 2.319284200668335, "learning_rate": 8.229347961425563e-05, "loss": 0.3362519145011902, "step": 2780, "token_acc": 0.8876508820798514 }, { "epoch": 3.128233970753656, "grad_norm": 2.5711867809295654, "learning_rate": 8.227927701387357e-05, "loss": 0.3300153911113739, "step": 2781, "token_acc": 0.8894668400520156 }, { "epoch": 3.1293588301462316, "grad_norm": 2.2359869480133057, "learning_rate": 8.226506994640637e-05, "loss": 0.27733466029167175, "step": 2782, "token_acc": 0.910913140311804 }, { "epoch": 3.1304836895388077, "grad_norm": 2.2938058376312256, "learning_rate": 8.22508584138201e-05, "loss": 0.2669510841369629, "step": 2783, "token_acc": 0.9233983286908078 }, { "epoch": 3.1316085489313834, "grad_norm": 2.5308637619018555, "learning_rate": 8.223664241808149e-05, "loss": 0.41143369674682617, "step": 2784, "token_acc": 0.8736162361623616 }, { "epoch": 3.1327334083239595, "grad_norm": 2.2558813095092773, "learning_rate": 8.222242196115785e-05, "loss": 0.21044126152992249, "step": 2785, "token_acc": 0.9312101910828026 }, { "epoch": 3.1338582677165356, "grad_norm": 2.455904483795166, "learning_rate": 8.220819704501717e-05, "loss": 0.26053130626678467, "step": 2786, "token_acc": 0.9109947643979057 }, { "epoch": 3.1349831271091113, "grad_norm": 1.9377845525741577, "learning_rate": 8.2193967671628e-05, "loss": 0.27361154556274414, "step": 2787, "token_acc": 0.9154804270462633 }, { "epoch": 3.1361079865016874, "grad_norm": 2.2501401901245117, "learning_rate": 8.217973384295949e-05, "loss": 0.2995709776878357, "step": 2788, "token_acc": 0.8956714761376249 }, { "epoch": 3.137232845894263, "grad_norm": 2.320876359939575, "learning_rate": 8.216549556098149e-05, "loss": 0.3054083585739136, "step": 2789, "token_acc": 0.8984547461368654 }, { "epoch": 3.138357705286839, "grad_norm": 2.665182590484619, "learning_rate": 8.215125282766438e-05, "loss": 0.3128659725189209, "step": 2790, "token_acc": 0.8884353741496599 }, { "epoch": 3.139482564679415, "grad_norm": 1.8469858169555664, "learning_rate": 8.213700564497921e-05, "loss": 0.2547813057899475, "step": 2791, "token_acc": 0.920042643923241 }, { "epoch": 3.140607424071991, "grad_norm": 2.5761544704437256, "learning_rate": 8.212275401489764e-05, "loss": 0.2730013132095337, "step": 2792, "token_acc": 0.9115250291036089 }, { "epoch": 3.141732283464567, "grad_norm": 2.5377793312072754, "learning_rate": 8.21084979393919e-05, "loss": 0.30547624826431274, "step": 2793, "token_acc": 0.9064665127020786 }, { "epoch": 3.142857142857143, "grad_norm": 2.589930534362793, "learning_rate": 8.209423742043489e-05, "loss": 0.3140234351158142, "step": 2794, "token_acc": 0.8992907801418439 }, { "epoch": 3.143982002249719, "grad_norm": 2.553800106048584, "learning_rate": 8.207997246000012e-05, "loss": 0.3759109377861023, "step": 2795, "token_acc": 0.8811013767209012 }, { "epoch": 3.1451068616422946, "grad_norm": 2.740351438522339, "learning_rate": 8.206570306006167e-05, "loss": 0.37292101979255676, "step": 2796, "token_acc": 0.8814814814814815 }, { "epoch": 3.1462317210348707, "grad_norm": 2.4669835567474365, "learning_rate": 8.205142922259427e-05, "loss": 0.2727287709712982, "step": 2797, "token_acc": 0.9027611044417767 }, { "epoch": 3.1473565804274464, "grad_norm": 2.210066556930542, "learning_rate": 8.203715094957326e-05, "loss": 0.3395611345767975, "step": 2798, "token_acc": 0.8862903225806451 }, { "epoch": 3.1484814398200225, "grad_norm": 2.4927866458892822, "learning_rate": 8.202286824297458e-05, "loss": 0.32033687829971313, "step": 2799, "token_acc": 0.8986332574031891 }, { "epoch": 3.1496062992125986, "grad_norm": 2.711867094039917, "learning_rate": 8.200858110477483e-05, "loss": 0.4714598059654236, "step": 2800, "token_acc": 0.8514018691588785 }, { "epoch": 3.1507311586051743, "grad_norm": 2.139302968978882, "learning_rate": 8.199428953695114e-05, "loss": 0.2541666328907013, "step": 2801, "token_acc": 0.9159248269040554 }, { "epoch": 3.1518560179977504, "grad_norm": 2.544854164123535, "learning_rate": 8.197999354148134e-05, "loss": 0.28914061188697815, "step": 2802, "token_acc": 0.8934081346423562 }, { "epoch": 3.152980877390326, "grad_norm": 2.722614049911499, "learning_rate": 8.196569312034381e-05, "loss": 0.3936827480792999, "step": 2803, "token_acc": 0.8779395296752519 }, { "epoch": 3.154105736782902, "grad_norm": 2.6326863765716553, "learning_rate": 8.195138827551756e-05, "loss": 0.41252401471138, "step": 2804, "token_acc": 0.8751229105211407 }, { "epoch": 3.1552305961754783, "grad_norm": 2.841157913208008, "learning_rate": 8.193707900898224e-05, "loss": 0.35573679208755493, "step": 2805, "token_acc": 0.885 }, { "epoch": 3.156355455568054, "grad_norm": 2.8995208740234375, "learning_rate": 8.192276532271807e-05, "loss": 0.3165150582790375, "step": 2806, "token_acc": 0.8924137931034483 }, { "epoch": 3.15748031496063, "grad_norm": 2.6346771717071533, "learning_rate": 8.190844721870595e-05, "loss": 0.3863683342933655, "step": 2807, "token_acc": 0.8851422550052687 }, { "epoch": 3.1586051743532058, "grad_norm": 2.517165184020996, "learning_rate": 8.189412469892725e-05, "loss": 0.3575102984905243, "step": 2808, "token_acc": 0.8878787878787879 }, { "epoch": 3.159730033745782, "grad_norm": 2.23892879486084, "learning_rate": 8.187979776536412e-05, "loss": 0.3527511954307556, "step": 2809, "token_acc": 0.9034552845528455 }, { "epoch": 3.1608548931383575, "grad_norm": 1.9682374000549316, "learning_rate": 8.186546641999924e-05, "loss": 0.3056037425994873, "step": 2810, "token_acc": 0.8945182724252492 }, { "epoch": 3.1619797525309337, "grad_norm": 2.4166769981384277, "learning_rate": 8.185113066481587e-05, "loss": 0.43026745319366455, "step": 2811, "token_acc": 0.8658051689860835 }, { "epoch": 3.16310461192351, "grad_norm": 2.5559678077697754, "learning_rate": 8.183679050179794e-05, "loss": 0.32351094484329224, "step": 2812, "token_acc": 0.8930741190765492 }, { "epoch": 3.1642294713160855, "grad_norm": 2.3081214427948, "learning_rate": 8.182244593292996e-05, "loss": 0.2693164646625519, "step": 2813, "token_acc": 0.9177545691906005 }, { "epoch": 3.1653543307086616, "grad_norm": 2.611443519592285, "learning_rate": 8.180809696019707e-05, "loss": 0.345151424407959, "step": 2814, "token_acc": 0.883551673944687 }, { "epoch": 3.1664791901012372, "grad_norm": 2.5201399326324463, "learning_rate": 8.179374358558496e-05, "loss": 0.2922516167163849, "step": 2815, "token_acc": 0.9061224489795918 }, { "epoch": 3.1676040494938134, "grad_norm": 2.270583391189575, "learning_rate": 8.177938581108003e-05, "loss": 0.2817225456237793, "step": 2816, "token_acc": 0.9078104993597952 }, { "epoch": 3.168728908886389, "grad_norm": 2.4465272426605225, "learning_rate": 8.176502363866917e-05, "loss": 0.32691025733947754, "step": 2817, "token_acc": 0.903010033444816 }, { "epoch": 3.169853768278965, "grad_norm": 2.4742660522460938, "learning_rate": 8.175065707034001e-05, "loss": 0.29434734582901, "step": 2818, "token_acc": 0.897316219369895 }, { "epoch": 3.1709786276715413, "grad_norm": 2.742551565170288, "learning_rate": 8.173628610808069e-05, "loss": 0.28720536828041077, "step": 2819, "token_acc": 0.9018567639257294 }, { "epoch": 3.172103487064117, "grad_norm": 2.1838200092315674, "learning_rate": 8.172191075387997e-05, "loss": 0.1862396001815796, "step": 2820, "token_acc": 0.9237012987012987 }, { "epoch": 3.173228346456693, "grad_norm": 2.3072891235351562, "learning_rate": 8.170753100972727e-05, "loss": 0.3034243583679199, "step": 2821, "token_acc": 0.9048562933597621 }, { "epoch": 3.1743532058492687, "grad_norm": 3.007554292678833, "learning_rate": 8.169314687761255e-05, "loss": 0.500980019569397, "step": 2822, "token_acc": 0.8414043583535109 }, { "epoch": 3.175478065241845, "grad_norm": 1.9236690998077393, "learning_rate": 8.167875835952642e-05, "loss": 0.2665075659751892, "step": 2823, "token_acc": 0.9101123595505618 }, { "epoch": 3.1766029246344205, "grad_norm": 2.1031112670898438, "learning_rate": 8.166436545746011e-05, "loss": 0.264300137758255, "step": 2824, "token_acc": 0.9103521878335112 }, { "epoch": 3.1777277840269966, "grad_norm": 2.091944456100464, "learning_rate": 8.16499681734054e-05, "loss": 0.24288788437843323, "step": 2825, "token_acc": 0.9248496993987976 }, { "epoch": 3.1788526434195727, "grad_norm": 2.536558151245117, "learning_rate": 8.163556650935473e-05, "loss": 0.4015941023826599, "step": 2826, "token_acc": 0.8647773279352227 }, { "epoch": 3.1799775028121484, "grad_norm": 2.178741216659546, "learning_rate": 8.162116046730113e-05, "loss": 0.3798068165779114, "step": 2827, "token_acc": 0.8958139534883721 }, { "epoch": 3.1811023622047245, "grad_norm": 2.9234626293182373, "learning_rate": 8.160675004923822e-05, "loss": 0.3621571362018585, "step": 2828, "token_acc": 0.8847874720357942 }, { "epoch": 3.1822272215973, "grad_norm": 2.5044350624084473, "learning_rate": 8.159233525716026e-05, "loss": 0.3574947416782379, "step": 2829, "token_acc": 0.8893572181243414 }, { "epoch": 3.1833520809898763, "grad_norm": 2.4244470596313477, "learning_rate": 8.157791609306205e-05, "loss": 0.3399280309677124, "step": 2830, "token_acc": 0.9048165137614679 }, { "epoch": 3.184476940382452, "grad_norm": 2.2931106090545654, "learning_rate": 8.156349255893909e-05, "loss": 0.3165562152862549, "step": 2831, "token_acc": 0.8886618998978549 }, { "epoch": 3.185601799775028, "grad_norm": 2.8394856452941895, "learning_rate": 8.15490646567874e-05, "loss": 0.33123740553855896, "step": 2832, "token_acc": 0.8929088277858177 }, { "epoch": 3.1867266591676042, "grad_norm": 2.1946229934692383, "learning_rate": 8.153463238860366e-05, "loss": 0.3182041049003601, "step": 2833, "token_acc": 0.8934426229508197 }, { "epoch": 3.18785151856018, "grad_norm": 2.2962615489959717, "learning_rate": 8.15201957563851e-05, "loss": 0.40563759207725525, "step": 2834, "token_acc": 0.8685446009389671 }, { "epoch": 3.188976377952756, "grad_norm": 2.6938674449920654, "learning_rate": 8.150575476212963e-05, "loss": 0.3393498659133911, "step": 2835, "token_acc": 0.883745963401507 }, { "epoch": 3.1901012373453317, "grad_norm": 2.4296960830688477, "learning_rate": 8.149130940783567e-05, "loss": 0.3135857582092285, "step": 2836, "token_acc": 0.8968253968253969 }, { "epoch": 3.191226096737908, "grad_norm": 2.343705654144287, "learning_rate": 8.147685969550233e-05, "loss": 0.2821873426437378, "step": 2837, "token_acc": 0.9117647058823529 }, { "epoch": 3.1923509561304835, "grad_norm": 2.7275173664093018, "learning_rate": 8.146240562712928e-05, "loss": 0.3724781274795532, "step": 2838, "token_acc": 0.8799472295514512 }, { "epoch": 3.1934758155230596, "grad_norm": 2.2754881381988525, "learning_rate": 8.144794720471678e-05, "loss": 0.29471397399902344, "step": 2839, "token_acc": 0.9038701622971286 }, { "epoch": 3.1946006749156357, "grad_norm": 2.1440184116363525, "learning_rate": 8.143348443026572e-05, "loss": 0.3714105486869812, "step": 2840, "token_acc": 0.8852080123266564 }, { "epoch": 3.1957255343082114, "grad_norm": 2.6536407470703125, "learning_rate": 8.14190173057776e-05, "loss": 0.354226291179657, "step": 2841, "token_acc": 0.8852140077821011 }, { "epoch": 3.1968503937007875, "grad_norm": 2.6093451976776123, "learning_rate": 8.140454583325447e-05, "loss": 0.3545336425304413, "step": 2842, "token_acc": 0.8823529411764706 }, { "epoch": 3.197975253093363, "grad_norm": 2.766890048980713, "learning_rate": 8.139007001469907e-05, "loss": 0.34032174944877625, "step": 2843, "token_acc": 0.8856502242152466 }, { "epoch": 3.1991001124859393, "grad_norm": 2.321099281311035, "learning_rate": 8.137558985211465e-05, "loss": 0.25560325384140015, "step": 2844, "token_acc": 0.9144460028050491 }, { "epoch": 3.200224971878515, "grad_norm": 2.421863079071045, "learning_rate": 8.13611053475051e-05, "loss": 0.29936957359313965, "step": 2845, "token_acc": 0.9013015184381779 }, { "epoch": 3.201349831271091, "grad_norm": 2.8043103218078613, "learning_rate": 8.134661650287492e-05, "loss": 0.3208257555961609, "step": 2846, "token_acc": 0.8875739644970414 }, { "epoch": 3.202474690663667, "grad_norm": 2.690772294998169, "learning_rate": 8.133212332022922e-05, "loss": 0.37206944823265076, "step": 2847, "token_acc": 0.8840937114673243 }, { "epoch": 3.203599550056243, "grad_norm": 2.720203399658203, "learning_rate": 8.131762580157366e-05, "loss": 0.4474937915802002, "step": 2848, "token_acc": 0.8607011070110702 }, { "epoch": 3.204724409448819, "grad_norm": 2.970210552215576, "learning_rate": 8.130312394891453e-05, "loss": 0.3512877821922302, "step": 2849, "token_acc": 0.8858773181169758 }, { "epoch": 3.2058492688413947, "grad_norm": 2.7848896980285645, "learning_rate": 8.128861776425876e-05, "loss": 0.38436537981033325, "step": 2850, "token_acc": 0.8850432632880099 }, { "epoch": 3.2069741282339708, "grad_norm": 2.2967302799224854, "learning_rate": 8.127410724961381e-05, "loss": 0.3042260408401489, "step": 2851, "token_acc": 0.9231692677070829 }, { "epoch": 3.208098987626547, "grad_norm": 2.5292508602142334, "learning_rate": 8.125959240698777e-05, "loss": 0.3591729998588562, "step": 2852, "token_acc": 0.9012485811577753 }, { "epoch": 3.2092238470191226, "grad_norm": 2.1667306423187256, "learning_rate": 8.124507323838935e-05, "loss": 0.26135218143463135, "step": 2853, "token_acc": 0.9057471264367816 }, { "epoch": 3.2103487064116987, "grad_norm": 2.590050220489502, "learning_rate": 8.123054974582782e-05, "loss": 0.37492257356643677, "step": 2854, "token_acc": 0.8710801393728222 }, { "epoch": 3.2114735658042743, "grad_norm": 2.707336664199829, "learning_rate": 8.121602193131308e-05, "loss": 0.3117947280406952, "step": 2855, "token_acc": 0.89281210592686 }, { "epoch": 3.2125984251968505, "grad_norm": 2.773069143295288, "learning_rate": 8.120148979685559e-05, "loss": 0.3086402714252472, "step": 2856, "token_acc": 0.9065656565656566 }, { "epoch": 3.213723284589426, "grad_norm": 2.538120985031128, "learning_rate": 8.118695334446647e-05, "loss": 0.28909599781036377, "step": 2857, "token_acc": 0.8947368421052632 }, { "epoch": 3.2148481439820022, "grad_norm": 2.553616523742676, "learning_rate": 8.117241257615735e-05, "loss": 0.3476512134075165, "step": 2858, "token_acc": 0.8912237330037083 }, { "epoch": 3.2159730033745784, "grad_norm": 2.2077884674072266, "learning_rate": 8.115786749394057e-05, "loss": 0.2980147898197174, "step": 2859, "token_acc": 0.8967587034813925 }, { "epoch": 3.217097862767154, "grad_norm": 2.6426846981048584, "learning_rate": 8.114331809982894e-05, "loss": 0.4253798723220825, "step": 2860, "token_acc": 0.865520728008089 }, { "epoch": 3.21822272215973, "grad_norm": 2.1145687103271484, "learning_rate": 8.112876439583596e-05, "loss": 0.269325315952301, "step": 2861, "token_acc": 0.9209401709401709 }, { "epoch": 3.219347581552306, "grad_norm": 2.495260238647461, "learning_rate": 8.111420638397572e-05, "loss": 0.35479098558425903, "step": 2862, "token_acc": 0.8794642857142857 }, { "epoch": 3.220472440944882, "grad_norm": 2.5331342220306396, "learning_rate": 8.109964406626285e-05, "loss": 0.3093998432159424, "step": 2863, "token_acc": 0.8937960042060988 }, { "epoch": 3.2215973003374576, "grad_norm": 2.431147575378418, "learning_rate": 8.108507744471262e-05, "loss": 0.4086093604564667, "step": 2864, "token_acc": 0.8819255222524978 }, { "epoch": 3.2227221597300337, "grad_norm": 2.271695375442505, "learning_rate": 8.107050652134088e-05, "loss": 0.26995718479156494, "step": 2865, "token_acc": 0.9153318077803204 }, { "epoch": 3.22384701912261, "grad_norm": 2.812563180923462, "learning_rate": 8.10559312981641e-05, "loss": 0.3386465311050415, "step": 2866, "token_acc": 0.8873239436619719 }, { "epoch": 3.2249718785151855, "grad_norm": 2.8392281532287598, "learning_rate": 8.104135177719929e-05, "loss": 0.2962277829647064, "step": 2867, "token_acc": 0.8941176470588236 }, { "epoch": 3.2260967379077616, "grad_norm": 2.8670222759246826, "learning_rate": 8.102676796046413e-05, "loss": 0.39187997579574585, "step": 2868, "token_acc": 0.8678010471204188 }, { "epoch": 3.2272215973003373, "grad_norm": 2.249727249145508, "learning_rate": 8.101217984997683e-05, "loss": 0.3625223934650421, "step": 2869, "token_acc": 0.8875739644970414 }, { "epoch": 3.2283464566929134, "grad_norm": 2.6842098236083984, "learning_rate": 8.099758744775624e-05, "loss": 0.30602332949638367, "step": 2870, "token_acc": 0.9004376367614879 }, { "epoch": 3.2294713160854895, "grad_norm": 2.1277823448181152, "learning_rate": 8.098299075582174e-05, "loss": 0.2767221927642822, "step": 2871, "token_acc": 0.9096176129779838 }, { "epoch": 3.230596175478065, "grad_norm": 2.704455614089966, "learning_rate": 8.096838977619338e-05, "loss": 0.37702491879463196, "step": 2872, "token_acc": 0.8703956343792633 }, { "epoch": 3.2317210348706413, "grad_norm": 2.597766876220703, "learning_rate": 8.095378451089178e-05, "loss": 0.36940228939056396, "step": 2873, "token_acc": 0.8728070175438597 }, { "epoch": 3.232845894263217, "grad_norm": 2.1139280796051025, "learning_rate": 8.093917496193809e-05, "loss": 0.3407596945762634, "step": 2874, "token_acc": 0.896551724137931 }, { "epoch": 3.233970753655793, "grad_norm": 2.2943148612976074, "learning_rate": 8.092456113135416e-05, "loss": 0.3368658125400543, "step": 2875, "token_acc": 0.8838268792710706 }, { "epoch": 3.235095613048369, "grad_norm": 2.818981647491455, "learning_rate": 8.090994302116237e-05, "loss": 0.35743218660354614, "step": 2876, "token_acc": 0.8904282115869018 }, { "epoch": 3.236220472440945, "grad_norm": 3.123727321624756, "learning_rate": 8.089532063338568e-05, "loss": 0.3629434108734131, "step": 2877, "token_acc": 0.8756302521008403 }, { "epoch": 3.237345331833521, "grad_norm": 2.5057218074798584, "learning_rate": 8.088069397004767e-05, "loss": 0.3277847468852997, "step": 2878, "token_acc": 0.8930635838150289 }, { "epoch": 3.2384701912260967, "grad_norm": 2.5444657802581787, "learning_rate": 8.086606303317251e-05, "loss": 0.3640322685241699, "step": 2879, "token_acc": 0.9053318824809575 }, { "epoch": 3.239595050618673, "grad_norm": 2.2729837894439697, "learning_rate": 8.085142782478495e-05, "loss": 0.3132328689098358, "step": 2880, "token_acc": 0.9073170731707317 }, { "epoch": 3.2407199100112485, "grad_norm": 2.379798650741577, "learning_rate": 8.083678834691033e-05, "loss": 0.3129842281341553, "step": 2881, "token_acc": 0.9085133418043202 }, { "epoch": 3.2418447694038246, "grad_norm": 2.4290082454681396, "learning_rate": 8.08221446015746e-05, "loss": 0.37017738819122314, "step": 2882, "token_acc": 0.8948919449901768 }, { "epoch": 3.2429696287964003, "grad_norm": 2.759366512298584, "learning_rate": 8.080749659080429e-05, "loss": 0.35932284593582153, "step": 2883, "token_acc": 0.8856382978723404 }, { "epoch": 3.2440944881889764, "grad_norm": 2.534844160079956, "learning_rate": 8.079284431662651e-05, "loss": 0.3234354555606842, "step": 2884, "token_acc": 0.8917470525187567 }, { "epoch": 3.2452193475815525, "grad_norm": 2.736546754837036, "learning_rate": 8.077818778106897e-05, "loss": 0.3357997536659241, "step": 2885, "token_acc": 0.8878281622911695 }, { "epoch": 3.246344206974128, "grad_norm": 2.5811150074005127, "learning_rate": 8.076352698615994e-05, "loss": 0.37871333956718445, "step": 2886, "token_acc": 0.8774834437086093 }, { "epoch": 3.2474690663667043, "grad_norm": 2.4810407161712646, "learning_rate": 8.074886193392837e-05, "loss": 0.3033044934272766, "step": 2887, "token_acc": 0.8912466843501327 }, { "epoch": 3.24859392575928, "grad_norm": 2.9014132022857666, "learning_rate": 8.07341926264037e-05, "loss": 0.2992551028728485, "step": 2888, "token_acc": 0.8954545454545455 }, { "epoch": 3.249718785151856, "grad_norm": 2.306849479675293, "learning_rate": 8.071951906561597e-05, "loss": 0.3382648229598999, "step": 2889, "token_acc": 0.8966597077244259 }, { "epoch": 3.2508436445444318, "grad_norm": 2.316591501235962, "learning_rate": 8.070484125359588e-05, "loss": 0.3865664601325989, "step": 2890, "token_acc": 0.8803571428571428 }, { "epoch": 3.251968503937008, "grad_norm": 2.3559494018554688, "learning_rate": 8.069015919237466e-05, "loss": 0.25328660011291504, "step": 2891, "token_acc": 0.9219653179190751 }, { "epoch": 3.253093363329584, "grad_norm": 2.479825496673584, "learning_rate": 8.067547288398412e-05, "loss": 0.3395330309867859, "step": 2892, "token_acc": 0.8880976602238047 }, { "epoch": 3.2542182227221597, "grad_norm": 2.4378790855407715, "learning_rate": 8.066078233045671e-05, "loss": 0.28532639145851135, "step": 2893, "token_acc": 0.9036144578313253 }, { "epoch": 3.2553430821147358, "grad_norm": 2.080012321472168, "learning_rate": 8.064608753382539e-05, "loss": 0.2638018727302551, "step": 2894, "token_acc": 0.9139664804469274 }, { "epoch": 3.2564679415073114, "grad_norm": 2.4620039463043213, "learning_rate": 8.06313884961238e-05, "loss": 0.4464464485645294, "step": 2895, "token_acc": 0.8633540372670807 }, { "epoch": 3.2575928008998876, "grad_norm": 2.4668819904327393, "learning_rate": 8.06166852193861e-05, "loss": 0.3758646249771118, "step": 2896, "token_acc": 0.8795986622073578 }, { "epoch": 3.2587176602924632, "grad_norm": 2.6488776206970215, "learning_rate": 8.060197770564706e-05, "loss": 0.35352617502212524, "step": 2897, "token_acc": 0.8895027624309392 }, { "epoch": 3.2598425196850394, "grad_norm": 2.9644131660461426, "learning_rate": 8.058726595694201e-05, "loss": 0.3381894528865814, "step": 2898, "token_acc": 0.8932655654383735 }, { "epoch": 3.2609673790776155, "grad_norm": 2.413774251937866, "learning_rate": 8.057254997530694e-05, "loss": 0.2753635048866272, "step": 2899, "token_acc": 0.909853249475891 }, { "epoch": 3.262092238470191, "grad_norm": 2.2484283447265625, "learning_rate": 8.055782976277832e-05, "loss": 0.2653435170650482, "step": 2900, "token_acc": 0.9074074074074074 }, { "epoch": 3.2632170978627673, "grad_norm": 3.289604663848877, "learning_rate": 8.054310532139328e-05, "loss": 0.3820243775844574, "step": 2901, "token_acc": 0.8567375886524823 }, { "epoch": 3.264341957255343, "grad_norm": 2.5537052154541016, "learning_rate": 8.052837665318954e-05, "loss": 0.3572738766670227, "step": 2902, "token_acc": 0.8927444794952681 }, { "epoch": 3.265466816647919, "grad_norm": 2.4822418689727783, "learning_rate": 8.051364376020533e-05, "loss": 0.3251211941242218, "step": 2903, "token_acc": 0.8974358974358975 }, { "epoch": 3.2665916760404947, "grad_norm": 2.6779658794403076, "learning_rate": 8.049890664447959e-05, "loss": 0.32980501651763916, "step": 2904, "token_acc": 0.8836443468715697 }, { "epoch": 3.267716535433071, "grad_norm": 2.9510533809661865, "learning_rate": 8.048416530805168e-05, "loss": 0.3788508176803589, "step": 2905, "token_acc": 0.875 }, { "epoch": 3.268841394825647, "grad_norm": 3.015476942062378, "learning_rate": 8.04694197529617e-05, "loss": 0.3479160666465759, "step": 2906, "token_acc": 0.885989010989011 }, { "epoch": 3.2699662542182226, "grad_norm": 1.9937466382980347, "learning_rate": 8.045466998125025e-05, "loss": 0.2685551643371582, "step": 2907, "token_acc": 0.9120879120879121 }, { "epoch": 3.2710911136107987, "grad_norm": 2.296537399291992, "learning_rate": 8.043991599495849e-05, "loss": 0.32178446650505066, "step": 2908, "token_acc": 0.8990256864481843 }, { "epoch": 3.2722159730033744, "grad_norm": 2.682032585144043, "learning_rate": 8.042515779612827e-05, "loss": 0.24171313643455505, "step": 2909, "token_acc": 0.9153153153153153 }, { "epoch": 3.2733408323959505, "grad_norm": 2.360039234161377, "learning_rate": 8.041039538680191e-05, "loss": 0.2257658988237381, "step": 2910, "token_acc": 0.9146853146853147 }, { "epoch": 3.274465691788526, "grad_norm": 2.867535352706909, "learning_rate": 8.03956287690224e-05, "loss": 0.35147029161453247, "step": 2911, "token_acc": 0.9018492176386913 }, { "epoch": 3.2755905511811023, "grad_norm": 2.666773796081543, "learning_rate": 8.03808579448332e-05, "loss": 0.32707729935646057, "step": 2912, "token_acc": 0.9113607990012484 }, { "epoch": 3.2767154105736784, "grad_norm": 2.5579025745391846, "learning_rate": 8.036608291627852e-05, "loss": 0.2596072554588318, "step": 2913, "token_acc": 0.9065743944636678 }, { "epoch": 3.277840269966254, "grad_norm": 2.52640962600708, "learning_rate": 8.0351303685403e-05, "loss": 0.4228922128677368, "step": 2914, "token_acc": 0.863147605083089 }, { "epoch": 3.27896512935883, "grad_norm": 2.6298587322235107, "learning_rate": 8.033652025425191e-05, "loss": 0.3668851852416992, "step": 2915, "token_acc": 0.887260428410372 }, { "epoch": 3.280089988751406, "grad_norm": 2.5454301834106445, "learning_rate": 8.032173262487112e-05, "loss": 0.41293543577194214, "step": 2916, "token_acc": 0.868421052631579 }, { "epoch": 3.281214848143982, "grad_norm": 2.713818073272705, "learning_rate": 8.03069407993071e-05, "loss": 0.29536205530166626, "step": 2917, "token_acc": 0.9086021505376344 }, { "epoch": 3.2823397075365577, "grad_norm": 2.631211280822754, "learning_rate": 8.029214477960682e-05, "loss": 0.4821597635746002, "step": 2918, "token_acc": 0.8481848184818482 }, { "epoch": 3.283464566929134, "grad_norm": 2.480062246322632, "learning_rate": 8.027734456781793e-05, "loss": 0.3517025113105774, "step": 2919, "token_acc": 0.8830227743271222 }, { "epoch": 3.28458942632171, "grad_norm": 2.6473701000213623, "learning_rate": 8.026254016598857e-05, "loss": 0.33387133479118347, "step": 2920, "token_acc": 0.8823529411764706 }, { "epoch": 3.2857142857142856, "grad_norm": 2.9825611114501953, "learning_rate": 8.024773157616752e-05, "loss": 0.30911868810653687, "step": 2921, "token_acc": 0.900887573964497 }, { "epoch": 3.2868391451068617, "grad_norm": 2.4224071502685547, "learning_rate": 8.023291880040413e-05, "loss": 0.3819654583930969, "step": 2922, "token_acc": 0.8725065047701648 }, { "epoch": 3.287964004499438, "grad_norm": 3.1508328914642334, "learning_rate": 8.021810184074829e-05, "loss": 0.30350151658058167, "step": 2923, "token_acc": 0.9058084772370487 }, { "epoch": 3.2890888638920135, "grad_norm": 2.627307891845703, "learning_rate": 8.020328069925054e-05, "loss": 0.393211305141449, "step": 2924, "token_acc": 0.8807241746538871 }, { "epoch": 3.2902137232845896, "grad_norm": 2.680145502090454, "learning_rate": 8.018845537796193e-05, "loss": 0.3599094748497009, "step": 2925, "token_acc": 0.8730366492146597 }, { "epoch": 3.2913385826771653, "grad_norm": 2.118236541748047, "learning_rate": 8.017362587893411e-05, "loss": 0.3378564119338989, "step": 2926, "token_acc": 0.8914956011730205 }, { "epoch": 3.2924634420697414, "grad_norm": 2.5122828483581543, "learning_rate": 8.015879220421933e-05, "loss": 0.3612053096294403, "step": 2927, "token_acc": 0.8902439024390244 }, { "epoch": 3.293588301462317, "grad_norm": 2.6933298110961914, "learning_rate": 8.01439543558704e-05, "loss": 0.37451696395874023, "step": 2928, "token_acc": 0.881283422459893 }, { "epoch": 3.294713160854893, "grad_norm": 2.5269598960876465, "learning_rate": 8.01291123359407e-05, "loss": 0.37168681621551514, "step": 2929, "token_acc": 0.8837837837837837 }, { "epoch": 3.2958380202474693, "grad_norm": 2.0571210384368896, "learning_rate": 8.01142661464842e-05, "loss": 0.3915747404098511, "step": 2930, "token_acc": 0.8905852417302799 }, { "epoch": 3.296962879640045, "grad_norm": 2.6406381130218506, "learning_rate": 8.009941578955546e-05, "loss": 0.3506197929382324, "step": 2931, "token_acc": 0.8824163969795038 }, { "epoch": 3.298087739032621, "grad_norm": 2.4433555603027344, "learning_rate": 8.008456126720959e-05, "loss": 0.31003886461257935, "step": 2932, "token_acc": 0.8976982097186701 }, { "epoch": 3.2992125984251968, "grad_norm": 2.721683979034424, "learning_rate": 8.006970258150228e-05, "loss": 0.3066622018814087, "step": 2933, "token_acc": 0.8938053097345132 }, { "epoch": 3.300337457817773, "grad_norm": 2.5488648414611816, "learning_rate": 8.00548397344898e-05, "loss": 0.3617074489593506, "step": 2934, "token_acc": 0.8829787234042553 }, { "epoch": 3.3014623172103486, "grad_norm": 2.9964723587036133, "learning_rate": 8.003997272822903e-05, "loss": 0.4247334599494934, "step": 2935, "token_acc": 0.875 }, { "epoch": 3.3025871766029247, "grad_norm": 2.5263354778289795, "learning_rate": 8.002510156477736e-05, "loss": 0.34876853227615356, "step": 2936, "token_acc": 0.8920454545454546 }, { "epoch": 3.303712035995501, "grad_norm": 2.2823352813720703, "learning_rate": 8.00102262461928e-05, "loss": 0.411072313785553, "step": 2937, "token_acc": 0.8727272727272727 }, { "epoch": 3.3048368953880765, "grad_norm": 2.1085596084594727, "learning_rate": 7.999534677453393e-05, "loss": 0.3628975749015808, "step": 2938, "token_acc": 0.8697444352844188 }, { "epoch": 3.3059617547806526, "grad_norm": 2.801851511001587, "learning_rate": 7.99804631518599e-05, "loss": 0.4182608127593994, "step": 2939, "token_acc": 0.8692307692307693 }, { "epoch": 3.3070866141732282, "grad_norm": 2.4296555519104004, "learning_rate": 7.996557538023042e-05, "loss": 0.271230548620224, "step": 2940, "token_acc": 0.9071428571428571 }, { "epoch": 3.3082114735658044, "grad_norm": 2.15164852142334, "learning_rate": 7.995068346170583e-05, "loss": 0.2852013409137726, "step": 2941, "token_acc": 0.9134001823154057 }, { "epoch": 3.30933633295838, "grad_norm": 2.140763759613037, "learning_rate": 7.993578739834695e-05, "loss": 0.2522948682308197, "step": 2942, "token_acc": 0.9150401836969001 }, { "epoch": 3.310461192350956, "grad_norm": 2.7046923637390137, "learning_rate": 7.992088719221525e-05, "loss": 0.36174291372299194, "step": 2943, "token_acc": 0.8787528868360277 }, { "epoch": 3.3115860517435323, "grad_norm": 2.4246881008148193, "learning_rate": 7.990598284537276e-05, "loss": 0.214525043964386, "step": 2944, "token_acc": 0.9173419773095624 }, { "epoch": 3.312710911136108, "grad_norm": 2.4293622970581055, "learning_rate": 7.989107435988203e-05, "loss": 0.3442195653915405, "step": 2945, "token_acc": 0.8887601390498262 }, { "epoch": 3.313835770528684, "grad_norm": 2.4783756732940674, "learning_rate": 7.987616173780627e-05, "loss": 0.3236169219017029, "step": 2946, "token_acc": 0.8892339544513458 }, { "epoch": 3.3149606299212597, "grad_norm": 2.4481992721557617, "learning_rate": 7.986124498120921e-05, "loss": 0.3608112931251526, "step": 2947, "token_acc": 0.8854166666666666 }, { "epoch": 3.316085489313836, "grad_norm": 2.609903573989868, "learning_rate": 7.984632409215515e-05, "loss": 0.2546957731246948, "step": 2948, "token_acc": 0.9146183699870634 }, { "epoch": 3.3172103487064115, "grad_norm": 2.6693732738494873, "learning_rate": 7.983139907270898e-05, "loss": 0.28143203258514404, "step": 2949, "token_acc": 0.9161073825503355 }, { "epoch": 3.3183352080989876, "grad_norm": 2.506868600845337, "learning_rate": 7.981646992493615e-05, "loss": 0.31291529536247253, "step": 2950, "token_acc": 0.8882421420256111 }, { "epoch": 3.3194600674915637, "grad_norm": 2.2731778621673584, "learning_rate": 7.980153665090266e-05, "loss": 0.2998577356338501, "step": 2951, "token_acc": 0.9099201824401368 }, { "epoch": 3.3205849268841394, "grad_norm": 2.982804775238037, "learning_rate": 7.978659925267514e-05, "loss": 0.3809157907962799, "step": 2952, "token_acc": 0.8709677419354839 }, { "epoch": 3.3217097862767155, "grad_norm": 3.039478302001953, "learning_rate": 7.977165773232074e-05, "loss": 0.38300180435180664, "step": 2953, "token_acc": 0.8518041237113402 }, { "epoch": 3.322834645669291, "grad_norm": 2.3730711936950684, "learning_rate": 7.97567120919072e-05, "loss": 0.27534642815589905, "step": 2954, "token_acc": 0.9100998890122086 }, { "epoch": 3.3239595050618673, "grad_norm": 2.68704891204834, "learning_rate": 7.974176233350281e-05, "loss": 0.32218170166015625, "step": 2955, "token_acc": 0.8860898138006572 }, { "epoch": 3.325084364454443, "grad_norm": 2.8303139209747314, "learning_rate": 7.972680845917648e-05, "loss": 0.312197208404541, "step": 2956, "token_acc": 0.8974970202622169 }, { "epoch": 3.326209223847019, "grad_norm": 2.1592135429382324, "learning_rate": 7.971185047099765e-05, "loss": 0.2876850366592407, "step": 2957, "token_acc": 0.9131767109295199 }, { "epoch": 3.3273340832395952, "grad_norm": 2.366938352584839, "learning_rate": 7.969688837103631e-05, "loss": 0.25929224491119385, "step": 2958, "token_acc": 0.9058693244739756 }, { "epoch": 3.328458942632171, "grad_norm": 2.7496397495269775, "learning_rate": 7.968192216136306e-05, "loss": 0.4029049277305603, "step": 2959, "token_acc": 0.8810096153846154 }, { "epoch": 3.329583802024747, "grad_norm": 2.5085654258728027, "learning_rate": 7.966695184404905e-05, "loss": 0.4145781397819519, "step": 2960, "token_acc": 0.8578431372549019 }, { "epoch": 3.3307086614173227, "grad_norm": 2.4558353424072266, "learning_rate": 7.965197742116602e-05, "loss": 0.4112163782119751, "step": 2961, "token_acc": 0.8761514841351075 }, { "epoch": 3.331833520809899, "grad_norm": 2.7774479389190674, "learning_rate": 7.963699889478622e-05, "loss": 0.3264373540878296, "step": 2962, "token_acc": 0.8836265223274695 }, { "epoch": 3.3329583802024745, "grad_norm": 2.266740322113037, "learning_rate": 7.962201626698256e-05, "loss": 0.3350549042224884, "step": 2963, "token_acc": 0.8904244817374136 }, { "epoch": 3.3340832395950506, "grad_norm": 2.6453535556793213, "learning_rate": 7.960702953982841e-05, "loss": 0.3765201270580292, "step": 2964, "token_acc": 0.8774869109947644 }, { "epoch": 3.3352080989876267, "grad_norm": 2.426326274871826, "learning_rate": 7.959203871539781e-05, "loss": 0.38336020708084106, "step": 2965, "token_acc": 0.8830601092896175 }, { "epoch": 3.3363329583802024, "grad_norm": 2.3674187660217285, "learning_rate": 7.957704379576529e-05, "loss": 0.2978706657886505, "step": 2966, "token_acc": 0.8968481375358166 }, { "epoch": 3.3374578177727785, "grad_norm": 2.369239330291748, "learning_rate": 7.956204478300599e-05, "loss": 0.3603878915309906, "step": 2967, "token_acc": 0.8841405508072174 }, { "epoch": 3.338582677165354, "grad_norm": 2.570457935333252, "learning_rate": 7.95470416791956e-05, "loss": 0.42396125197410583, "step": 2968, "token_acc": 0.8721951219512195 }, { "epoch": 3.3397075365579303, "grad_norm": 3.038607597351074, "learning_rate": 7.953203448641037e-05, "loss": 0.3657207787036896, "step": 2969, "token_acc": 0.8759124087591241 }, { "epoch": 3.340832395950506, "grad_norm": 2.4596195220947266, "learning_rate": 7.951702320672715e-05, "loss": 0.4321693181991577, "step": 2970, "token_acc": 0.8701007838745801 }, { "epoch": 3.341957255343082, "grad_norm": 2.2045562267303467, "learning_rate": 7.950200784222332e-05, "loss": 0.32619041204452515, "step": 2971, "token_acc": 0.902247191011236 }, { "epoch": 3.343082114735658, "grad_norm": 2.4583797454833984, "learning_rate": 7.948698839497682e-05, "loss": 0.3398088216781616, "step": 2972, "token_acc": 0.8920863309352518 }, { "epoch": 3.344206974128234, "grad_norm": 2.4333789348602295, "learning_rate": 7.947196486706617e-05, "loss": 0.3635095953941345, "step": 2973, "token_acc": 0.8814070351758794 }, { "epoch": 3.34533183352081, "grad_norm": 2.4495255947113037, "learning_rate": 7.945693726057047e-05, "loss": 0.4369235336780548, "step": 2974, "token_acc": 0.8653648509763617 }, { "epoch": 3.3464566929133857, "grad_norm": 2.664599657058716, "learning_rate": 7.944190557756938e-05, "loss": 0.3684631586074829, "step": 2975, "token_acc": 0.8776266996291718 }, { "epoch": 3.3475815523059618, "grad_norm": 2.3449182510375977, "learning_rate": 7.942686982014309e-05, "loss": 0.35399138927459717, "step": 2976, "token_acc": 0.8796296296296297 }, { "epoch": 3.3487064116985374, "grad_norm": 2.47324538230896, "learning_rate": 7.94118299903724e-05, "loss": 0.40336698293685913, "step": 2977, "token_acc": 0.8666666666666667 }, { "epoch": 3.3498312710911136, "grad_norm": 2.6913440227508545, "learning_rate": 7.939678609033863e-05, "loss": 0.41116243600845337, "step": 2978, "token_acc": 0.8798283261802575 }, { "epoch": 3.3509561304836897, "grad_norm": 2.4216723442077637, "learning_rate": 7.938173812212369e-05, "loss": 0.3802766501903534, "step": 2979, "token_acc": 0.8886486486486487 }, { "epoch": 3.3520809898762653, "grad_norm": 2.3680596351623535, "learning_rate": 7.936668608781006e-05, "loss": 0.28329795598983765, "step": 2980, "token_acc": 0.9089874857792947 }, { "epoch": 3.3532058492688415, "grad_norm": 2.322798490524292, "learning_rate": 7.935162998948076e-05, "loss": 0.300387442111969, "step": 2981, "token_acc": 0.91125 }, { "epoch": 3.354330708661417, "grad_norm": 2.372206211090088, "learning_rate": 7.93365698292194e-05, "loss": 0.29829198122024536, "step": 2982, "token_acc": 0.8918918918918919 }, { "epoch": 3.3554555680539933, "grad_norm": 2.068211078643799, "learning_rate": 7.93215056091101e-05, "loss": 0.2663993239402771, "step": 2983, "token_acc": 0.9072063178677197 }, { "epoch": 3.3565804274465694, "grad_norm": 2.3575291633605957, "learning_rate": 7.930643733123762e-05, "loss": 0.3534581661224365, "step": 2984, "token_acc": 0.8708644610458911 }, { "epoch": 3.357705286839145, "grad_norm": 2.3878891468048096, "learning_rate": 7.92913649976872e-05, "loss": 0.35904544591903687, "step": 2985, "token_acc": 0.8808553971486762 }, { "epoch": 3.358830146231721, "grad_norm": 2.4035792350769043, "learning_rate": 7.92762886105447e-05, "loss": 0.23043964803218842, "step": 2986, "token_acc": 0.908974358974359 }, { "epoch": 3.359955005624297, "grad_norm": 1.9550453424453735, "learning_rate": 7.926120817189652e-05, "loss": 0.31810885667800903, "step": 2987, "token_acc": 0.9013333333333333 }, { "epoch": 3.361079865016873, "grad_norm": 2.1503448486328125, "learning_rate": 7.924612368382962e-05, "loss": 0.2583783268928528, "step": 2988, "token_acc": 0.9282178217821783 }, { "epoch": 3.362204724409449, "grad_norm": 2.651906967163086, "learning_rate": 7.923103514843152e-05, "loss": 0.340862900018692, "step": 2989, "token_acc": 0.8981779206859593 }, { "epoch": 3.3633295838020247, "grad_norm": 2.4033660888671875, "learning_rate": 7.921594256779028e-05, "loss": 0.3623064160346985, "step": 2990, "token_acc": 0.8792535675082327 }, { "epoch": 3.364454443194601, "grad_norm": 2.0842959880828857, "learning_rate": 7.92008459439946e-05, "loss": 0.18234246969223022, "step": 2991, "token_acc": 0.9346314325452016 }, { "epoch": 3.3655793025871765, "grad_norm": 2.4955408573150635, "learning_rate": 7.918574527913362e-05, "loss": 0.3417958617210388, "step": 2992, "token_acc": 0.8974008207934336 }, { "epoch": 3.3667041619797526, "grad_norm": 2.835132360458374, "learning_rate": 7.917064057529713e-05, "loss": 0.33175158500671387, "step": 2993, "token_acc": 0.8926261319534282 }, { "epoch": 3.3678290213723283, "grad_norm": 2.5637948513031006, "learning_rate": 7.915553183457545e-05, "loss": 0.4698517322540283, "step": 2994, "token_acc": 0.8614232209737828 }, { "epoch": 3.3689538807649044, "grad_norm": 2.7769248485565186, "learning_rate": 7.914041905905944e-05, "loss": 0.317099392414093, "step": 2995, "token_acc": 0.9056865464632455 }, { "epoch": 3.3700787401574805, "grad_norm": 2.2793993949890137, "learning_rate": 7.912530225084057e-05, "loss": 0.3007086217403412, "step": 2996, "token_acc": 0.8843283582089553 }, { "epoch": 3.371203599550056, "grad_norm": 2.242265224456787, "learning_rate": 7.911018141201081e-05, "loss": 0.3090288043022156, "step": 2997, "token_acc": 0.8898721730580138 }, { "epoch": 3.3723284589426323, "grad_norm": 2.618699073791504, "learning_rate": 7.909505654466271e-05, "loss": 0.35085612535476685, "step": 2998, "token_acc": 0.885997521685254 }, { "epoch": 3.373453318335208, "grad_norm": 2.41648530960083, "learning_rate": 7.907992765088938e-05, "loss": 0.3839814066886902, "step": 2999, "token_acc": 0.8765323992994746 }, { "epoch": 3.374578177727784, "grad_norm": 2.676351547241211, "learning_rate": 7.906479473278449e-05, "loss": 0.33147376775741577, "step": 3000, "token_acc": 0.8911111111111111 }, { "epoch": 3.374578177727784, "eval_loss": 1.1043671369552612, "eval_runtime": 31.6985, "eval_samples_per_second": 25.332, "eval_steps_per_second": 3.186, "eval_token_acc": 0.7352096813373672, "step": 3000 }, { "epoch": 3.37570303712036, "grad_norm": 2.713355302810669, "learning_rate": 7.904965779244228e-05, "loss": 0.3347512483596802, "step": 3001, "token_acc": 0.8946731234866828 }, { "epoch": 3.376827896512936, "grad_norm": 2.318016290664673, "learning_rate": 7.903451683195751e-05, "loss": 0.30058443546295166, "step": 3002, "token_acc": 0.893574297188755 }, { "epoch": 3.377952755905512, "grad_norm": 2.918889284133911, "learning_rate": 7.901937185342552e-05, "loss": 0.3454301655292511, "step": 3003, "token_acc": 0.8807339449541285 }, { "epoch": 3.3790776152980877, "grad_norm": 2.5600292682647705, "learning_rate": 7.900422285894222e-05, "loss": 0.358997106552124, "step": 3004, "token_acc": 0.8859138533178114 }, { "epoch": 3.380202474690664, "grad_norm": 3.387263536453247, "learning_rate": 7.898906985060403e-05, "loss": 0.3088800311088562, "step": 3005, "token_acc": 0.8945686900958466 }, { "epoch": 3.3813273340832395, "grad_norm": 3.01143741607666, "learning_rate": 7.897391283050796e-05, "loss": 0.437706857919693, "step": 3006, "token_acc": 0.8635778635778636 }, { "epoch": 3.3824521934758156, "grad_norm": 2.4645936489105225, "learning_rate": 7.89587518007516e-05, "loss": 0.3545777499675751, "step": 3007, "token_acc": 0.888509670079636 }, { "epoch": 3.3835770528683913, "grad_norm": 2.4205851554870605, "learning_rate": 7.894358676343305e-05, "loss": 0.454373836517334, "step": 3008, "token_acc": 0.863036303630363 }, { "epoch": 3.3847019122609674, "grad_norm": 2.4622087478637695, "learning_rate": 7.892841772065096e-05, "loss": 0.4510382413864136, "step": 3009, "token_acc": 0.8702559576345984 }, { "epoch": 3.3858267716535435, "grad_norm": 2.1755642890930176, "learning_rate": 7.891324467450457e-05, "loss": 0.288195937871933, "step": 3010, "token_acc": 0.8995633187772926 }, { "epoch": 3.386951631046119, "grad_norm": 2.2797396183013916, "learning_rate": 7.889806762709364e-05, "loss": 0.24684979021549225, "step": 3011, "token_acc": 0.9133192389006343 }, { "epoch": 3.3880764904386953, "grad_norm": 2.6325607299804688, "learning_rate": 7.888288658051853e-05, "loss": 0.33314552903175354, "step": 3012, "token_acc": 0.8772782503037667 }, { "epoch": 3.389201349831271, "grad_norm": 2.5948710441589355, "learning_rate": 7.88677015368801e-05, "loss": 0.44603005051612854, "step": 3013, "token_acc": 0.8653648509763617 }, { "epoch": 3.390326209223847, "grad_norm": 2.3366003036499023, "learning_rate": 7.88525124982798e-05, "loss": 0.29996198415756226, "step": 3014, "token_acc": 0.9076023391812865 }, { "epoch": 3.3914510686164228, "grad_norm": 2.393056631088257, "learning_rate": 7.883731946681962e-05, "loss": 0.3578258454799652, "step": 3015, "token_acc": 0.8863636363636364 }, { "epoch": 3.392575928008999, "grad_norm": 2.372823715209961, "learning_rate": 7.882212244460208e-05, "loss": 0.3255326449871063, "step": 3016, "token_acc": 0.8914285714285715 }, { "epoch": 3.393700787401575, "grad_norm": 2.7457752227783203, "learning_rate": 7.88069214337303e-05, "loss": 0.4462103545665741, "step": 3017, "token_acc": 0.8457661290322581 }, { "epoch": 3.3948256467941507, "grad_norm": 2.4196507930755615, "learning_rate": 7.879171643630794e-05, "loss": 0.2835354208946228, "step": 3018, "token_acc": 0.8931116389548693 }, { "epoch": 3.395950506186727, "grad_norm": 2.6807992458343506, "learning_rate": 7.877650745443915e-05, "loss": 0.35390806198120117, "step": 3019, "token_acc": 0.8946047678795483 }, { "epoch": 3.3970753655793025, "grad_norm": 2.1830084323883057, "learning_rate": 7.876129449022873e-05, "loss": 0.2919653356075287, "step": 3020, "token_acc": 0.9063786008230452 }, { "epoch": 3.3982002249718786, "grad_norm": 2.2343597412109375, "learning_rate": 7.874607754578196e-05, "loss": 0.3284420669078827, "step": 3021, "token_acc": 0.8981818181818182 }, { "epoch": 3.3993250843644542, "grad_norm": 2.3196308612823486, "learning_rate": 7.87308566232047e-05, "loss": 0.3395959436893463, "step": 3022, "token_acc": 0.8994413407821229 }, { "epoch": 3.4004499437570304, "grad_norm": 2.674551486968994, "learning_rate": 7.871563172460333e-05, "loss": 0.34179240465164185, "step": 3023, "token_acc": 0.8915816326530612 }, { "epoch": 3.4015748031496065, "grad_norm": 2.58516263961792, "learning_rate": 7.870040285208482e-05, "loss": 0.34244146943092346, "step": 3024, "token_acc": 0.8962053571428571 }, { "epoch": 3.402699662542182, "grad_norm": 2.525505781173706, "learning_rate": 7.868517000775668e-05, "loss": 0.36925196647644043, "step": 3025, "token_acc": 0.8883677298311444 }, { "epoch": 3.4038245219347583, "grad_norm": 2.012648820877075, "learning_rate": 7.866993319372695e-05, "loss": 0.2757297456264496, "step": 3026, "token_acc": 0.9003051881993896 }, { "epoch": 3.404949381327334, "grad_norm": 2.845410108566284, "learning_rate": 7.865469241210425e-05, "loss": 0.3981223702430725, "step": 3027, "token_acc": 0.8767942583732058 }, { "epoch": 3.40607424071991, "grad_norm": 2.612896680831909, "learning_rate": 7.86394476649977e-05, "loss": 0.3824135661125183, "step": 3028, "token_acc": 0.8918918918918919 }, { "epoch": 3.4071991001124857, "grad_norm": 2.966313600540161, "learning_rate": 7.862419895451703e-05, "loss": 0.25637465715408325, "step": 3029, "token_acc": 0.9206049149338374 }, { "epoch": 3.408323959505062, "grad_norm": 2.4612233638763428, "learning_rate": 7.860894628277247e-05, "loss": 0.31567639112472534, "step": 3030, "token_acc": 0.9057815845824411 }, { "epoch": 3.409448818897638, "grad_norm": 2.518545150756836, "learning_rate": 7.859368965187483e-05, "loss": 0.24778123199939728, "step": 3031, "token_acc": 0.9177018633540373 }, { "epoch": 3.4105736782902136, "grad_norm": 2.2968478202819824, "learning_rate": 7.857842906393543e-05, "loss": 0.28293734788894653, "step": 3032, "token_acc": 0.9123989218328841 }, { "epoch": 3.4116985376827897, "grad_norm": 2.673752546310425, "learning_rate": 7.856316452106619e-05, "loss": 0.3447033762931824, "step": 3033, "token_acc": 0.8892988929889298 }, { "epoch": 3.4128233970753654, "grad_norm": 2.417064905166626, "learning_rate": 7.854789602537954e-05, "loss": 0.23873379826545715, "step": 3034, "token_acc": 0.9185185185185185 }, { "epoch": 3.4139482564679415, "grad_norm": 2.597015142440796, "learning_rate": 7.853262357898847e-05, "loss": 0.3114691972732544, "step": 3035, "token_acc": 0.8974358974358975 }, { "epoch": 3.415073115860517, "grad_norm": 2.3613109588623047, "learning_rate": 7.85173471840065e-05, "loss": 0.36440902948379517, "step": 3036, "token_acc": 0.8882896764252697 }, { "epoch": 3.4161979752530933, "grad_norm": 2.4250640869140625, "learning_rate": 7.85020668425477e-05, "loss": 0.3545451760292053, "step": 3037, "token_acc": 0.8845780795344326 }, { "epoch": 3.4173228346456694, "grad_norm": 2.845659017562866, "learning_rate": 7.848678255672673e-05, "loss": 0.34659674763679504, "step": 3038, "token_acc": 0.888235294117647 }, { "epoch": 3.418447694038245, "grad_norm": 2.3281242847442627, "learning_rate": 7.847149432865875e-05, "loss": 0.2620457112789154, "step": 3039, "token_acc": 0.9055793991416309 }, { "epoch": 3.4195725534308212, "grad_norm": 2.8717386722564697, "learning_rate": 7.845620216045946e-05, "loss": 0.3030092120170593, "step": 3040, "token_acc": 0.8742236024844721 }, { "epoch": 3.420697412823397, "grad_norm": 2.0992002487182617, "learning_rate": 7.844090605424513e-05, "loss": 0.2356572449207306, "step": 3041, "token_acc": 0.9263050153531218 }, { "epoch": 3.421822272215973, "grad_norm": 3.3266940116882324, "learning_rate": 7.842560601213257e-05, "loss": 0.4059363007545471, "step": 3042, "token_acc": 0.8699731903485255 }, { "epoch": 3.4229471316085487, "grad_norm": 2.141369342803955, "learning_rate": 7.841030203623916e-05, "loss": 0.2633271813392639, "step": 3043, "token_acc": 0.9059929494712103 }, { "epoch": 3.424071991001125, "grad_norm": 2.41487717628479, "learning_rate": 7.839499412868274e-05, "loss": 0.39254647493362427, "step": 3044, "token_acc": 0.8768971332209107 }, { "epoch": 3.425196850393701, "grad_norm": 2.914464235305786, "learning_rate": 7.837968229158178e-05, "loss": 0.31208091974258423, "step": 3045, "token_acc": 0.8852201257861635 }, { "epoch": 3.4263217097862766, "grad_norm": 2.433777093887329, "learning_rate": 7.836436652705527e-05, "loss": 0.2825220227241516, "step": 3046, "token_acc": 0.9089900110987791 }, { "epoch": 3.4274465691788527, "grad_norm": 2.663774013519287, "learning_rate": 7.834904683722273e-05, "loss": 0.3562183380126953, "step": 3047, "token_acc": 0.8709302325581395 }, { "epoch": 3.4285714285714284, "grad_norm": 2.1024112701416016, "learning_rate": 7.833372322420425e-05, "loss": 0.29114484786987305, "step": 3048, "token_acc": 0.8984615384615384 }, { "epoch": 3.4296962879640045, "grad_norm": 2.672767400741577, "learning_rate": 7.83183956901204e-05, "loss": 0.2927924394607544, "step": 3049, "token_acc": 0.9057377049180327 }, { "epoch": 3.4308211473565806, "grad_norm": 2.916555643081665, "learning_rate": 7.830306423709237e-05, "loss": 0.41060972213745117, "step": 3050, "token_acc": 0.8904255319148936 }, { "epoch": 3.4319460067491563, "grad_norm": 2.8041012287139893, "learning_rate": 7.828772886724186e-05, "loss": 0.35495778918266296, "step": 3051, "token_acc": 0.8800904977375565 }, { "epoch": 3.4330708661417324, "grad_norm": 2.8466975688934326, "learning_rate": 7.82723895826911e-05, "loss": 0.3305234909057617, "step": 3052, "token_acc": 0.8922829581993569 }, { "epoch": 3.434195725534308, "grad_norm": 2.637568950653076, "learning_rate": 7.825704638556288e-05, "loss": 0.2754579186439514, "step": 3053, "token_acc": 0.8945868945868946 }, { "epoch": 3.435320584926884, "grad_norm": 2.7122366428375244, "learning_rate": 7.82416992779805e-05, "loss": 0.3452747166156769, "step": 3054, "token_acc": 0.8902900378310215 }, { "epoch": 3.4364454443194603, "grad_norm": 2.4218902587890625, "learning_rate": 7.822634826206786e-05, "loss": 0.23312854766845703, "step": 3055, "token_acc": 0.9171461449942463 }, { "epoch": 3.437570303712036, "grad_norm": 3.0488011837005615, "learning_rate": 7.821099333994934e-05, "loss": 0.41184931993484497, "step": 3056, "token_acc": 0.8728179551122195 }, { "epoch": 3.438695163104612, "grad_norm": 2.4587509632110596, "learning_rate": 7.819563451374991e-05, "loss": 0.253239244222641, "step": 3057, "token_acc": 0.9132706374085684 }, { "epoch": 3.4398200224971878, "grad_norm": 3.3932323455810547, "learning_rate": 7.818027178559505e-05, "loss": 0.4052833318710327, "step": 3058, "token_acc": 0.8683274021352313 }, { "epoch": 3.440944881889764, "grad_norm": 2.4944286346435547, "learning_rate": 7.816490515761076e-05, "loss": 0.2733941674232483, "step": 3059, "token_acc": 0.910757946210269 }, { "epoch": 3.4420697412823396, "grad_norm": 2.2727770805358887, "learning_rate": 7.814953463192365e-05, "loss": 0.29697543382644653, "step": 3060, "token_acc": 0.9010309278350516 }, { "epoch": 3.4431946006749157, "grad_norm": 2.7985849380493164, "learning_rate": 7.81341602106608e-05, "loss": 0.3989017605781555, "step": 3061, "token_acc": 0.8783943329397875 }, { "epoch": 3.444319460067492, "grad_norm": 2.6568830013275146, "learning_rate": 7.811878189594986e-05, "loss": 0.31904926896095276, "step": 3062, "token_acc": 0.8997821350762527 }, { "epoch": 3.4454443194600675, "grad_norm": 2.6288058757781982, "learning_rate": 7.8103399689919e-05, "loss": 0.26177868247032166, "step": 3063, "token_acc": 0.9200603318250377 }, { "epoch": 3.4465691788526436, "grad_norm": 2.2495205402374268, "learning_rate": 7.808801359469697e-05, "loss": 0.26604458689689636, "step": 3064, "token_acc": 0.9078947368421053 }, { "epoch": 3.4476940382452193, "grad_norm": 2.897972345352173, "learning_rate": 7.8072623612413e-05, "loss": 0.36638590693473816, "step": 3065, "token_acc": 0.8753117206982544 }, { "epoch": 3.4488188976377954, "grad_norm": 2.530266761779785, "learning_rate": 7.805722974519694e-05, "loss": 0.2961319386959076, "step": 3066, "token_acc": 0.8981958762886598 }, { "epoch": 3.449943757030371, "grad_norm": 2.5095155239105225, "learning_rate": 7.804183199517907e-05, "loss": 0.3605380058288574, "step": 3067, "token_acc": 0.887396694214876 }, { "epoch": 3.451068616422947, "grad_norm": 2.4327552318573, "learning_rate": 7.802643036449028e-05, "loss": 0.3251682221889496, "step": 3068, "token_acc": 0.8984088127294981 }, { "epoch": 3.4521934758155233, "grad_norm": 2.317866563796997, "learning_rate": 7.8011024855262e-05, "loss": 0.37064239382743835, "step": 3069, "token_acc": 0.873015873015873 }, { "epoch": 3.453318335208099, "grad_norm": 2.1652562618255615, "learning_rate": 7.799561546962615e-05, "loss": 0.37783175706863403, "step": 3070, "token_acc": 0.8921651221566975 }, { "epoch": 3.454443194600675, "grad_norm": 2.9366455078125, "learning_rate": 7.798020220971524e-05, "loss": 0.3611885905265808, "step": 3071, "token_acc": 0.8780841799709724 }, { "epoch": 3.4555680539932507, "grad_norm": 2.267909288406372, "learning_rate": 7.796478507766228e-05, "loss": 0.25797680020332336, "step": 3072, "token_acc": 0.9031531531531531 }, { "epoch": 3.456692913385827, "grad_norm": 2.0737802982330322, "learning_rate": 7.794936407560081e-05, "loss": 0.20484302937984467, "step": 3073, "token_acc": 0.9283236994219654 }, { "epoch": 3.4578177727784025, "grad_norm": 1.9852964878082275, "learning_rate": 7.793393920566494e-05, "loss": 0.3665611743927002, "step": 3074, "token_acc": 0.8785046728971962 }, { "epoch": 3.4589426321709786, "grad_norm": 2.7496423721313477, "learning_rate": 7.791851046998929e-05, "loss": 0.31658557057380676, "step": 3075, "token_acc": 0.9049479166666666 }, { "epoch": 3.4600674915635548, "grad_norm": 2.309345245361328, "learning_rate": 7.790307787070903e-05, "loss": 0.3063298463821411, "step": 3076, "token_acc": 0.8970873786407767 }, { "epoch": 3.4611923509561304, "grad_norm": 2.3893048763275146, "learning_rate": 7.788764140995983e-05, "loss": 0.3053242266178131, "step": 3077, "token_acc": 0.8981854838709677 }, { "epoch": 3.4623172103487065, "grad_norm": 2.7124531269073486, "learning_rate": 7.787220108987796e-05, "loss": 0.3804139196872711, "step": 3078, "token_acc": 0.8929765886287625 }, { "epoch": 3.463442069741282, "grad_norm": 2.723992347717285, "learning_rate": 7.785675691260016e-05, "loss": 0.31851112842559814, "step": 3079, "token_acc": 0.8960176991150443 }, { "epoch": 3.4645669291338583, "grad_norm": 2.040755033493042, "learning_rate": 7.784130888026374e-05, "loss": 0.3124226927757263, "step": 3080, "token_acc": 0.9027653880463872 }, { "epoch": 3.465691788526434, "grad_norm": 2.884425640106201, "learning_rate": 7.782585699500653e-05, "loss": 0.31880900263786316, "step": 3081, "token_acc": 0.9038461538461539 }, { "epoch": 3.46681664791901, "grad_norm": 2.7249855995178223, "learning_rate": 7.781040125896688e-05, "loss": 0.34285974502563477, "step": 3082, "token_acc": 0.8910891089108911 }, { "epoch": 3.4679415073115862, "grad_norm": 2.2030725479125977, "learning_rate": 7.779494167428371e-05, "loss": 0.28994929790496826, "step": 3083, "token_acc": 0.9040284360189573 }, { "epoch": 3.469066366704162, "grad_norm": 2.363352060317993, "learning_rate": 7.777947824309644e-05, "loss": 0.25987887382507324, "step": 3084, "token_acc": 0.9048128342245989 }, { "epoch": 3.470191226096738, "grad_norm": 2.67155122756958, "learning_rate": 7.776401096754502e-05, "loss": 0.35724395513534546, "step": 3085, "token_acc": 0.8870466321243523 }, { "epoch": 3.4713160854893137, "grad_norm": 2.618778944015503, "learning_rate": 7.774853984976998e-05, "loss": 0.312538206577301, "step": 3086, "token_acc": 0.8886509635974305 }, { "epoch": 3.47244094488189, "grad_norm": 2.452091932296753, "learning_rate": 7.77330648919123e-05, "loss": 0.3734896183013916, "step": 3087, "token_acc": 0.8859737638748738 }, { "epoch": 3.4735658042744655, "grad_norm": 2.657165288925171, "learning_rate": 7.771758609611359e-05, "loss": 0.31557655334472656, "step": 3088, "token_acc": 0.8946212952799122 }, { "epoch": 3.4746906636670416, "grad_norm": 2.512038469314575, "learning_rate": 7.77021034645159e-05, "loss": 0.31052112579345703, "step": 3089, "token_acc": 0.8876941457586619 }, { "epoch": 3.4758155230596177, "grad_norm": 2.2889394760131836, "learning_rate": 7.768661699926186e-05, "loss": 0.30667370557785034, "step": 3090, "token_acc": 0.9058597502401537 }, { "epoch": 3.4769403824521934, "grad_norm": 2.7640185356140137, "learning_rate": 7.767112670249463e-05, "loss": 0.34518828988075256, "step": 3091, "token_acc": 0.892904953145917 }, { "epoch": 3.4780652418447695, "grad_norm": 2.3593766689300537, "learning_rate": 7.765563257635789e-05, "loss": 0.3466023802757263, "step": 3092, "token_acc": 0.8888888888888888 }, { "epoch": 3.479190101237345, "grad_norm": 2.309577226638794, "learning_rate": 7.764013462299584e-05, "loss": 0.3356732130050659, "step": 3093, "token_acc": 0.8883720930232558 }, { "epoch": 3.4803149606299213, "grad_norm": 1.9814820289611816, "learning_rate": 7.762463284455324e-05, "loss": 0.25810477137565613, "step": 3094, "token_acc": 0.9049049049049049 }, { "epoch": 3.481439820022497, "grad_norm": 2.369051933288574, "learning_rate": 7.760912724317533e-05, "loss": 0.2520984411239624, "step": 3095, "token_acc": 0.927710843373494 }, { "epoch": 3.482564679415073, "grad_norm": 2.527315378189087, "learning_rate": 7.759361782100794e-05, "loss": 0.3397921025753021, "step": 3096, "token_acc": 0.8827655310621243 }, { "epoch": 3.483689538807649, "grad_norm": 2.571654796600342, "learning_rate": 7.757810458019739e-05, "loss": 0.3919515013694763, "step": 3097, "token_acc": 0.8595317725752508 }, { "epoch": 3.484814398200225, "grad_norm": 2.7502365112304688, "learning_rate": 7.756258752289051e-05, "loss": 0.4088655710220337, "step": 3098, "token_acc": 0.872 }, { "epoch": 3.485939257592801, "grad_norm": 2.923647165298462, "learning_rate": 7.754706665123471e-05, "loss": 0.32404887676239014, "step": 3099, "token_acc": 0.8777777777777778 }, { "epoch": 3.4870641169853767, "grad_norm": 2.7039430141448975, "learning_rate": 7.75315419673779e-05, "loss": 0.4010273814201355, "step": 3100, "token_acc": 0.8736383442265795 }, { "epoch": 3.4881889763779528, "grad_norm": 2.2127113342285156, "learning_rate": 7.751601347346852e-05, "loss": 0.30674654245376587, "step": 3101, "token_acc": 0.9055489964580874 }, { "epoch": 3.4893138357705284, "grad_norm": 2.856977939605713, "learning_rate": 7.750048117165555e-05, "loss": 0.4530438184738159, "step": 3102, "token_acc": 0.8701622971285893 }, { "epoch": 3.4904386951631046, "grad_norm": 2.4406826496124268, "learning_rate": 7.748494506408845e-05, "loss": 0.29898518323898315, "step": 3103, "token_acc": 0.8955582232893158 }, { "epoch": 3.4915635545556807, "grad_norm": 2.4689345359802246, "learning_rate": 7.746940515291728e-05, "loss": 0.2528733015060425, "step": 3104, "token_acc": 0.9118028534370947 }, { "epoch": 3.4926884139482564, "grad_norm": 2.4264073371887207, "learning_rate": 7.745386144029254e-05, "loss": 0.29271358251571655, "step": 3105, "token_acc": 0.9054916985951469 }, { "epoch": 3.4938132733408325, "grad_norm": 2.5660347938537598, "learning_rate": 7.743831392836533e-05, "loss": 0.36131787300109863, "step": 3106, "token_acc": 0.884102564102564 }, { "epoch": 3.494938132733408, "grad_norm": 2.538151264190674, "learning_rate": 7.742276261928726e-05, "loss": 0.3883085548877716, "step": 3107, "token_acc": 0.8759615384615385 }, { "epoch": 3.4960629921259843, "grad_norm": 2.3247241973876953, "learning_rate": 7.740720751521045e-05, "loss": 0.3827877640724182, "step": 3108, "token_acc": 0.8804523424878837 }, { "epoch": 3.49718785151856, "grad_norm": 2.1955933570861816, "learning_rate": 7.739164861828752e-05, "loss": 0.36449047923088074, "step": 3109, "token_acc": 0.8844221105527639 }, { "epoch": 3.498312710911136, "grad_norm": 2.410627841949463, "learning_rate": 7.737608593067168e-05, "loss": 0.28285884857177734, "step": 3110, "token_acc": 0.904833836858006 }, { "epoch": 3.499437570303712, "grad_norm": 2.259168863296509, "learning_rate": 7.736051945451661e-05, "loss": 0.27734825015068054, "step": 3111, "token_acc": 0.9071428571428571 }, { "epoch": 3.500562429696288, "grad_norm": 2.249708414077759, "learning_rate": 7.734494919197655e-05, "loss": 0.2815532684326172, "step": 3112, "token_acc": 0.8989010989010989 }, { "epoch": 3.501687289088864, "grad_norm": 2.3418779373168945, "learning_rate": 7.732937514520623e-05, "loss": 0.31926876306533813, "step": 3113, "token_acc": 0.8928164196123147 }, { "epoch": 3.50281214848144, "grad_norm": 2.501044988632202, "learning_rate": 7.731379731636093e-05, "loss": 0.4845641255378723, "step": 3114, "token_acc": 0.8616636528028933 }, { "epoch": 3.5039370078740157, "grad_norm": 2.6186368465423584, "learning_rate": 7.729821570759646e-05, "loss": 0.4218713045120239, "step": 3115, "token_acc": 0.8748824082784572 }, { "epoch": 3.5050618672665914, "grad_norm": 2.336580514907837, "learning_rate": 7.72826303210691e-05, "loss": 0.38309866189956665, "step": 3116, "token_acc": 0.8828058169375534 }, { "epoch": 3.5061867266591675, "grad_norm": 1.895223617553711, "learning_rate": 7.72670411589357e-05, "loss": 0.23530155420303345, "step": 3117, "token_acc": 0.923006416131989 }, { "epoch": 3.5073115860517436, "grad_norm": 2.092139959335327, "learning_rate": 7.725144822335367e-05, "loss": 0.27772584557533264, "step": 3118, "token_acc": 0.892960462873674 }, { "epoch": 3.5084364454443193, "grad_norm": 2.7519795894622803, "learning_rate": 7.723585151648082e-05, "loss": 0.39758217334747314, "step": 3119, "token_acc": 0.8931297709923665 }, { "epoch": 3.5095613048368954, "grad_norm": 2.906508684158325, "learning_rate": 7.722025104047561e-05, "loss": 0.36493006348609924, "step": 3120, "token_acc": 0.881638846737481 }, { "epoch": 3.5106861642294716, "grad_norm": 2.6174445152282715, "learning_rate": 7.720464679749696e-05, "loss": 0.29746878147125244, "step": 3121, "token_acc": 0.906158357771261 }, { "epoch": 3.5118110236220472, "grad_norm": 2.712203025817871, "learning_rate": 7.718903878970431e-05, "loss": 0.4077921211719513, "step": 3122, "token_acc": 0.8842105263157894 }, { "epoch": 3.512935883014623, "grad_norm": 2.732515335083008, "learning_rate": 7.717342701925766e-05, "loss": 0.3749541640281677, "step": 3123, "token_acc": 0.8810810810810811 }, { "epoch": 3.514060742407199, "grad_norm": 2.729619264602661, "learning_rate": 7.715781148831744e-05, "loss": 0.3947596549987793, "step": 3124, "token_acc": 0.8684782608695653 }, { "epoch": 3.515185601799775, "grad_norm": 2.469648599624634, "learning_rate": 7.71421921990447e-05, "loss": 0.3125728368759155, "step": 3125, "token_acc": 0.8997955010224948 }, { "epoch": 3.516310461192351, "grad_norm": 2.6966617107391357, "learning_rate": 7.712656915360099e-05, "loss": 0.3361613154411316, "step": 3126, "token_acc": 0.8727034120734908 }, { "epoch": 3.517435320584927, "grad_norm": 2.9299306869506836, "learning_rate": 7.711094235414834e-05, "loss": 0.38429296016693115, "step": 3127, "token_acc": 0.8701986754966887 }, { "epoch": 3.518560179977503, "grad_norm": 2.2523696422576904, "learning_rate": 7.709531180284932e-05, "loss": 0.3526330590248108, "step": 3128, "token_acc": 0.8824593128390597 }, { "epoch": 3.5196850393700787, "grad_norm": 3.205439329147339, "learning_rate": 7.707967750186703e-05, "loss": 0.4624493718147278, "step": 3129, "token_acc": 0.8667582417582418 }, { "epoch": 3.520809898762655, "grad_norm": 2.654865026473999, "learning_rate": 7.706403945336506e-05, "loss": 0.3466394543647766, "step": 3130, "token_acc": 0.8743654822335025 }, { "epoch": 3.5219347581552305, "grad_norm": 2.613619089126587, "learning_rate": 7.704839765950758e-05, "loss": 0.3154360055923462, "step": 3131, "token_acc": 0.8938714499252616 }, { "epoch": 3.5230596175478066, "grad_norm": 2.393279790878296, "learning_rate": 7.703275212245918e-05, "loss": 0.41143548488616943, "step": 3132, "token_acc": 0.8675429726996967 }, { "epoch": 3.5241844769403823, "grad_norm": 2.1142170429229736, "learning_rate": 7.701710284438508e-05, "loss": 0.3428662419319153, "step": 3133, "token_acc": 0.8942652329749103 }, { "epoch": 3.5253093363329584, "grad_norm": 2.580024242401123, "learning_rate": 7.700144982745093e-05, "loss": 0.4035598933696747, "step": 3134, "token_acc": 0.8818737270875764 }, { "epoch": 3.5264341957255345, "grad_norm": 2.24220609664917, "learning_rate": 7.698579307382292e-05, "loss": 0.3673822283744812, "step": 3135, "token_acc": 0.8794520547945206 }, { "epoch": 3.52755905511811, "grad_norm": 2.8419995307922363, "learning_rate": 7.69701325856678e-05, "loss": 0.3438084125518799, "step": 3136, "token_acc": 0.8852242744063324 }, { "epoch": 3.5286839145106863, "grad_norm": 2.8866095542907715, "learning_rate": 7.695446836515278e-05, "loss": 0.37085166573524475, "step": 3137, "token_acc": 0.8738839285714286 }, { "epoch": 3.529808773903262, "grad_norm": 2.6501882076263428, "learning_rate": 7.693880041444561e-05, "loss": 0.38639694452285767, "step": 3138, "token_acc": 0.8790322580645161 }, { "epoch": 3.530933633295838, "grad_norm": 2.3729054927825928, "learning_rate": 7.692312873571456e-05, "loss": 0.3089068531990051, "step": 3139, "token_acc": 0.9073514602215509 }, { "epoch": 3.5320584926884138, "grad_norm": 2.5976810455322266, "learning_rate": 7.690745333112843e-05, "loss": 0.34455180168151855, "step": 3140, "token_acc": 0.8918575063613231 }, { "epoch": 3.53318335208099, "grad_norm": 3.1278698444366455, "learning_rate": 7.689177420285651e-05, "loss": 0.3995509743690491, "step": 3141, "token_acc": 0.8728476821192053 }, { "epoch": 3.534308211473566, "grad_norm": 2.5296332836151123, "learning_rate": 7.68760913530686e-05, "loss": 0.32930588722229004, "step": 3142, "token_acc": 0.9009009009009009 }, { "epoch": 3.5354330708661417, "grad_norm": 2.140624761581421, "learning_rate": 7.686040478393501e-05, "loss": 0.29992321133613586, "step": 3143, "token_acc": 0.9011516314779271 }, { "epoch": 3.536557930258718, "grad_norm": 2.5126466751098633, "learning_rate": 7.684471449762663e-05, "loss": 0.29351234436035156, "step": 3144, "token_acc": 0.900990099009901 }, { "epoch": 3.5376827896512935, "grad_norm": 3.129603862762451, "learning_rate": 7.68290204963148e-05, "loss": 0.444684237241745, "step": 3145, "token_acc": 0.8584686774941995 }, { "epoch": 3.5388076490438696, "grad_norm": 2.6015920639038086, "learning_rate": 7.681332278217136e-05, "loss": 0.35393041372299194, "step": 3146, "token_acc": 0.8770413064361191 }, { "epoch": 3.5399325084364452, "grad_norm": 2.3251993656158447, "learning_rate": 7.679762135736876e-05, "loss": 0.3154848515987396, "step": 3147, "token_acc": 0.900497512437811 }, { "epoch": 3.5410573678290214, "grad_norm": 3.1897473335266113, "learning_rate": 7.678191622407982e-05, "loss": 0.4155825078487396, "step": 3148, "token_acc": 0.8516057585825028 }, { "epoch": 3.5421822272215975, "grad_norm": 2.476041793823242, "learning_rate": 7.676620738447802e-05, "loss": 0.33764660358428955, "step": 3149, "token_acc": 0.8919567827130852 }, { "epoch": 3.543307086614173, "grad_norm": 2.58550763130188, "learning_rate": 7.675049484073724e-05, "loss": 0.32459044456481934, "step": 3150, "token_acc": 0.8963254593175853 }, { "epoch": 3.5444319460067493, "grad_norm": 2.4717273712158203, "learning_rate": 7.673477859503195e-05, "loss": 0.33199021220207214, "step": 3151, "token_acc": 0.8957247132429614 }, { "epoch": 3.545556805399325, "grad_norm": 2.640833854675293, "learning_rate": 7.671905864953709e-05, "loss": 0.3400912284851074, "step": 3152, "token_acc": 0.8834720570749108 }, { "epoch": 3.546681664791901, "grad_norm": 3.072551727294922, "learning_rate": 7.670333500642809e-05, "loss": 0.35705745220184326, "step": 3153, "token_acc": 0.87890625 }, { "epoch": 3.5478065241844767, "grad_norm": 2.86651349067688, "learning_rate": 7.668760766788097e-05, "loss": 0.31768837571144104, "step": 3154, "token_acc": 0.8927613941018767 }, { "epoch": 3.548931383577053, "grad_norm": 2.6200735569000244, "learning_rate": 7.667187663607218e-05, "loss": 0.36769312620162964, "step": 3155, "token_acc": 0.8846153846153846 }, { "epoch": 3.550056242969629, "grad_norm": 2.6444690227508545, "learning_rate": 7.665614191317874e-05, "loss": 0.3262786269187927, "step": 3156, "token_acc": 0.893368010403121 }, { "epoch": 3.5511811023622046, "grad_norm": 2.558256149291992, "learning_rate": 7.664040350137814e-05, "loss": 0.42124444246292114, "step": 3157, "token_acc": 0.8701923076923077 }, { "epoch": 3.5523059617547807, "grad_norm": 2.5111286640167236, "learning_rate": 7.662466140284841e-05, "loss": 0.2774267792701721, "step": 3158, "token_acc": 0.8947368421052632 }, { "epoch": 3.5534308211473564, "grad_norm": 2.4639039039611816, "learning_rate": 7.660891561976808e-05, "loss": 0.3037674129009247, "step": 3159, "token_acc": 0.9059945504087193 }, { "epoch": 3.5545556805399325, "grad_norm": 2.544994592666626, "learning_rate": 7.659316615431618e-05, "loss": 0.3370357155799866, "step": 3160, "token_acc": 0.8852459016393442 }, { "epoch": 3.555680539932508, "grad_norm": 2.648860454559326, "learning_rate": 7.657741300867225e-05, "loss": 0.42874616384506226, "step": 3161, "token_acc": 0.869475847893114 }, { "epoch": 3.5568053993250843, "grad_norm": 3.0476622581481934, "learning_rate": 7.656165618501636e-05, "loss": 0.3000507354736328, "step": 3162, "token_acc": 0.8924418604651163 }, { "epoch": 3.5579302587176604, "grad_norm": 2.2679553031921387, "learning_rate": 7.654589568552906e-05, "loss": 0.24526330828666687, "step": 3163, "token_acc": 0.9106918238993711 }, { "epoch": 3.559055118110236, "grad_norm": 2.4790658950805664, "learning_rate": 7.653013151239143e-05, "loss": 0.385168194770813, "step": 3164, "token_acc": 0.8795656465942744 }, { "epoch": 3.5601799775028122, "grad_norm": 2.434011697769165, "learning_rate": 7.651436366778506e-05, "loss": 0.2573625445365906, "step": 3165, "token_acc": 0.9117647058823529 }, { "epoch": 3.5613048368953883, "grad_norm": 2.9705984592437744, "learning_rate": 7.649859215389205e-05, "loss": 0.3310781717300415, "step": 3166, "token_acc": 0.8883977900552487 }, { "epoch": 3.562429696287964, "grad_norm": 3.178499460220337, "learning_rate": 7.648281697289496e-05, "loss": 0.39130809903144836, "step": 3167, "token_acc": 0.8712522045855379 }, { "epoch": 3.5635545556805397, "grad_norm": 2.852877140045166, "learning_rate": 7.646703812697696e-05, "loss": 0.34173738956451416, "step": 3168, "token_acc": 0.8986232790988736 }, { "epoch": 3.564679415073116, "grad_norm": 2.408440113067627, "learning_rate": 7.645125561832158e-05, "loss": 0.32161226868629456, "step": 3169, "token_acc": 0.896 }, { "epoch": 3.565804274465692, "grad_norm": 2.5333986282348633, "learning_rate": 7.6435469449113e-05, "loss": 0.30451276898384094, "step": 3170, "token_acc": 0.8971830985915493 }, { "epoch": 3.5669291338582676, "grad_norm": 2.3365049362182617, "learning_rate": 7.641967962153582e-05, "loss": 0.36545461416244507, "step": 3171, "token_acc": 0.8822495606326889 }, { "epoch": 3.5680539932508437, "grad_norm": 3.063234329223633, "learning_rate": 7.64038861377752e-05, "loss": 0.3068082928657532, "step": 3172, "token_acc": 0.9058219178082192 }, { "epoch": 3.56917885264342, "grad_norm": 2.905421733856201, "learning_rate": 7.638808900001675e-05, "loss": 0.37672901153564453, "step": 3173, "token_acc": 0.8949232585596222 }, { "epoch": 3.5703037120359955, "grad_norm": 3.096357822418213, "learning_rate": 7.637228821044662e-05, "loss": 0.4271237850189209, "step": 3174, "token_acc": 0.8588888888888889 }, { "epoch": 3.571428571428571, "grad_norm": 2.392885684967041, "learning_rate": 7.635648377125148e-05, "loss": 0.23092815279960632, "step": 3175, "token_acc": 0.912621359223301 }, { "epoch": 3.5725534308211473, "grad_norm": 2.677726984024048, "learning_rate": 7.634067568461845e-05, "loss": 0.33063894510269165, "step": 3176, "token_acc": 0.896551724137931 }, { "epoch": 3.5736782902137234, "grad_norm": 2.7797672748565674, "learning_rate": 7.632486395273521e-05, "loss": 0.3357365131378174, "step": 3177, "token_acc": 0.8758782201405152 }, { "epoch": 3.574803149606299, "grad_norm": 2.099644899368286, "learning_rate": 7.630904857778993e-05, "loss": 0.25896161794662476, "step": 3178, "token_acc": 0.914179104477612 }, { "epoch": 3.575928008998875, "grad_norm": 2.6037445068359375, "learning_rate": 7.629322956197129e-05, "loss": 0.38199901580810547, "step": 3179, "token_acc": 0.8831417624521073 }, { "epoch": 3.5770528683914513, "grad_norm": 2.2919301986694336, "learning_rate": 7.627740690746843e-05, "loss": 0.37535619735717773, "step": 3180, "token_acc": 0.8780694326841659 }, { "epoch": 3.578177727784027, "grad_norm": 2.8656513690948486, "learning_rate": 7.626158061647103e-05, "loss": 0.3388063907623291, "step": 3181, "token_acc": 0.8877146631439894 }, { "epoch": 3.5793025871766027, "grad_norm": 2.757941246032715, "learning_rate": 7.624575069116929e-05, "loss": 0.3826406002044678, "step": 3182, "token_acc": 0.8734290843806104 }, { "epoch": 3.5804274465691788, "grad_norm": 2.4563965797424316, "learning_rate": 7.622991713375387e-05, "loss": 0.34703218936920166, "step": 3183, "token_acc": 0.8805668016194332 }, { "epoch": 3.581552305961755, "grad_norm": 3.2382328510284424, "learning_rate": 7.621407994641596e-05, "loss": 0.4122961461544037, "step": 3184, "token_acc": 0.8637015781922525 }, { "epoch": 3.5826771653543306, "grad_norm": 2.8983302116394043, "learning_rate": 7.619823913134727e-05, "loss": 0.4349757432937622, "step": 3185, "token_acc": 0.865315852205006 }, { "epoch": 3.5838020247469067, "grad_norm": 2.6538233757019043, "learning_rate": 7.618239469073995e-05, "loss": 0.3715450167655945, "step": 3186, "token_acc": 0.8991130820399114 }, { "epoch": 3.584926884139483, "grad_norm": 2.62107253074646, "learning_rate": 7.616654662678671e-05, "loss": 0.24312591552734375, "step": 3187, "token_acc": 0.9014267185473411 }, { "epoch": 3.5860517435320585, "grad_norm": 2.0262019634246826, "learning_rate": 7.615069494168076e-05, "loss": 0.3556632399559021, "step": 3188, "token_acc": 0.8808479532163743 }, { "epoch": 3.5871766029246346, "grad_norm": 2.1860134601593018, "learning_rate": 7.613483963761577e-05, "loss": 0.28765177726745605, "step": 3189, "token_acc": 0.902542372881356 }, { "epoch": 3.5883014623172103, "grad_norm": 2.68933367729187, "learning_rate": 7.611898071678593e-05, "loss": 0.35958653688430786, "step": 3190, "token_acc": 0.8892773892773893 }, { "epoch": 3.5894263217097864, "grad_norm": 2.2896065711975098, "learning_rate": 7.610311818138596e-05, "loss": 0.38264304399490356, "step": 3191, "token_acc": 0.8825088339222615 }, { "epoch": 3.590551181102362, "grad_norm": 2.509814739227295, "learning_rate": 7.608725203361103e-05, "loss": 0.34514614939689636, "step": 3192, "token_acc": 0.8923076923076924 }, { "epoch": 3.591676040494938, "grad_norm": 2.6991028785705566, "learning_rate": 7.607138227565682e-05, "loss": 0.30784106254577637, "step": 3193, "token_acc": 0.8972222222222223 }, { "epoch": 3.5928008998875143, "grad_norm": 2.7011537551879883, "learning_rate": 7.605550890971957e-05, "loss": 0.3321763873100281, "step": 3194, "token_acc": 0.8883435582822086 }, { "epoch": 3.59392575928009, "grad_norm": 3.5622010231018066, "learning_rate": 7.603963193799595e-05, "loss": 0.45353031158447266, "step": 3195, "token_acc": 0.8571428571428571 }, { "epoch": 3.595050618672666, "grad_norm": 2.139840841293335, "learning_rate": 7.602375136268316e-05, "loss": 0.33235830068588257, "step": 3196, "token_acc": 0.8920985556499575 }, { "epoch": 3.5961754780652417, "grad_norm": 2.6831817626953125, "learning_rate": 7.600786718597886e-05, "loss": 0.30668553709983826, "step": 3197, "token_acc": 0.8985507246376812 }, { "epoch": 3.597300337457818, "grad_norm": 2.3149056434631348, "learning_rate": 7.599197941008129e-05, "loss": 0.4240584373474121, "step": 3198, "token_acc": 0.8712273641851107 }, { "epoch": 3.5984251968503935, "grad_norm": 2.2905640602111816, "learning_rate": 7.59760880371891e-05, "loss": 0.3657910227775574, "step": 3199, "token_acc": 0.8964326812428078 }, { "epoch": 3.5995500562429696, "grad_norm": 2.4416396617889404, "learning_rate": 7.596019306950148e-05, "loss": 0.41123223304748535, "step": 3200, "token_acc": 0.877507919746568 }, { "epoch": 3.6006749156355458, "grad_norm": 2.577982187271118, "learning_rate": 7.594429450921814e-05, "loss": 0.38386356830596924, "step": 3201, "token_acc": 0.8842443729903537 }, { "epoch": 3.6017997750281214, "grad_norm": 2.9452602863311768, "learning_rate": 7.592839235853921e-05, "loss": 0.25862157344818115, "step": 3202, "token_acc": 0.9069767441860465 }, { "epoch": 3.6029246344206975, "grad_norm": 2.9984819889068604, "learning_rate": 7.591248661966541e-05, "loss": 0.4787669777870178, "step": 3203, "token_acc": 0.8501144164759725 }, { "epoch": 3.604049493813273, "grad_norm": 2.592266082763672, "learning_rate": 7.589657729479792e-05, "loss": 0.31346452236175537, "step": 3204, "token_acc": 0.9040735873850198 }, { "epoch": 3.6051743532058493, "grad_norm": 2.69718337059021, "learning_rate": 7.588066438613836e-05, "loss": 0.311576783657074, "step": 3205, "token_acc": 0.8900293255131965 }, { "epoch": 3.606299212598425, "grad_norm": 2.102815866470337, "learning_rate": 7.586474789588894e-05, "loss": 0.2586576044559479, "step": 3206, "token_acc": 0.9173228346456693 }, { "epoch": 3.607424071991001, "grad_norm": 2.277078628540039, "learning_rate": 7.584882782625231e-05, "loss": 0.3864375054836273, "step": 3207, "token_acc": 0.8690702087286527 }, { "epoch": 3.6085489313835772, "grad_norm": 2.5266149044036865, "learning_rate": 7.583290417943163e-05, "loss": 0.3811179995536804, "step": 3208, "token_acc": 0.883495145631068 }, { "epoch": 3.609673790776153, "grad_norm": 2.7068569660186768, "learning_rate": 7.581697695763052e-05, "loss": 0.3838005065917969, "step": 3209, "token_acc": 0.875531914893617 }, { "epoch": 3.610798650168729, "grad_norm": 2.438457727432251, "learning_rate": 7.580104616305319e-05, "loss": 0.3692927658557892, "step": 3210, "token_acc": 0.8961397058823529 }, { "epoch": 3.6119235095613047, "grad_norm": 3.2424118518829346, "learning_rate": 7.578511179790422e-05, "loss": 0.40403977036476135, "step": 3211, "token_acc": 0.8535911602209945 }, { "epoch": 3.613048368953881, "grad_norm": 2.579838752746582, "learning_rate": 7.576917386438876e-05, "loss": 0.34795770049095154, "step": 3212, "token_acc": 0.8842592592592593 }, { "epoch": 3.6141732283464565, "grad_norm": 2.5363314151763916, "learning_rate": 7.575323236471246e-05, "loss": 0.4431425631046295, "step": 3213, "token_acc": 0.845351867940921 }, { "epoch": 3.6152980877390326, "grad_norm": 2.560901165008545, "learning_rate": 7.573728730108143e-05, "loss": 0.3228455185890198, "step": 3214, "token_acc": 0.89171974522293 }, { "epoch": 3.6164229471316087, "grad_norm": 2.671781301498413, "learning_rate": 7.572133867570229e-05, "loss": 0.28959551453590393, "step": 3215, "token_acc": 0.903448275862069 }, { "epoch": 3.6175478065241844, "grad_norm": 2.120983123779297, "learning_rate": 7.570538649078212e-05, "loss": 0.3067529797554016, "step": 3216, "token_acc": 0.8974121996303143 }, { "epoch": 3.6186726659167605, "grad_norm": 2.9112539291381836, "learning_rate": 7.568943074852856e-05, "loss": 0.32273930311203003, "step": 3217, "token_acc": 0.8847305389221557 }, { "epoch": 3.619797525309336, "grad_norm": 2.4670138359069824, "learning_rate": 7.56734714511497e-05, "loss": 0.25956082344055176, "step": 3218, "token_acc": 0.9140811455847255 }, { "epoch": 3.6209223847019123, "grad_norm": 2.9440743923187256, "learning_rate": 7.56575086008541e-05, "loss": 0.2709891200065613, "step": 3219, "token_acc": 0.9220389805097451 }, { "epoch": 3.622047244094488, "grad_norm": 2.613365411758423, "learning_rate": 7.564154219985087e-05, "loss": 0.35093218088150024, "step": 3220, "token_acc": 0.8928210313447927 }, { "epoch": 3.623172103487064, "grad_norm": 3.1052722930908203, "learning_rate": 7.562557225034955e-05, "loss": 0.5328154563903809, "step": 3221, "token_acc": 0.8424908424908425 }, { "epoch": 3.62429696287964, "grad_norm": 2.493013381958008, "learning_rate": 7.56095987545602e-05, "loss": 0.3581679165363312, "step": 3222, "token_acc": 0.8843813387423936 }, { "epoch": 3.625421822272216, "grad_norm": 2.488661050796509, "learning_rate": 7.559362171469338e-05, "loss": 0.4193601608276367, "step": 3223, "token_acc": 0.8692515779981965 }, { "epoch": 3.626546681664792, "grad_norm": 2.5679304599761963, "learning_rate": 7.557764113296012e-05, "loss": 0.22676515579223633, "step": 3224, "token_acc": 0.9191419141914191 }, { "epoch": 3.6276715410573677, "grad_norm": 2.2598485946655273, "learning_rate": 7.556165701157197e-05, "loss": 0.2833307087421417, "step": 3225, "token_acc": 0.9222903885480572 }, { "epoch": 3.628796400449944, "grad_norm": 2.6099703311920166, "learning_rate": 7.554566935274094e-05, "loss": 0.3510811924934387, "step": 3226, "token_acc": 0.8918918918918919 }, { "epoch": 3.6299212598425195, "grad_norm": 2.176567554473877, "learning_rate": 7.552967815867955e-05, "loss": 0.32962360978126526, "step": 3227, "token_acc": 0.8964941569282137 }, { "epoch": 3.6310461192350956, "grad_norm": 2.2886345386505127, "learning_rate": 7.551368343160079e-05, "loss": 0.300891637802124, "step": 3228, "token_acc": 0.9063545150501672 }, { "epoch": 3.6321709786276717, "grad_norm": 2.8015353679656982, "learning_rate": 7.549768517371816e-05, "loss": 0.40085625648498535, "step": 3229, "token_acc": 0.8746113989637305 }, { "epoch": 3.6332958380202474, "grad_norm": 2.1467220783233643, "learning_rate": 7.548168338724561e-05, "loss": 0.3336816132068634, "step": 3230, "token_acc": 0.8906394810009268 }, { "epoch": 3.6344206974128235, "grad_norm": 2.567115545272827, "learning_rate": 7.546567807439761e-05, "loss": 0.2636217176914215, "step": 3231, "token_acc": 0.9077102803738317 }, { "epoch": 3.6355455568053996, "grad_norm": 2.8411567211151123, "learning_rate": 7.544966923738917e-05, "loss": 0.3786541819572449, "step": 3232, "token_acc": 0.8758256274768824 }, { "epoch": 3.6366704161979753, "grad_norm": 2.9068546295166016, "learning_rate": 7.543365687843565e-05, "loss": 0.352836549282074, "step": 3233, "token_acc": 0.8770270270270271 }, { "epoch": 3.637795275590551, "grad_norm": 2.880781650543213, "learning_rate": 7.541764099975303e-05, "loss": 0.3149893879890442, "step": 3234, "token_acc": 0.8860606060606061 }, { "epoch": 3.638920134983127, "grad_norm": 2.627984046936035, "learning_rate": 7.540162160355771e-05, "loss": 0.35974937677383423, "step": 3235, "token_acc": 0.887940234791889 }, { "epoch": 3.640044994375703, "grad_norm": 2.4464118480682373, "learning_rate": 7.538559869206658e-05, "loss": 0.32388943433761597, "step": 3236, "token_acc": 0.9028776978417267 }, { "epoch": 3.641169853768279, "grad_norm": 2.694512367248535, "learning_rate": 7.536957226749705e-05, "loss": 0.46426042914390564, "step": 3237, "token_acc": 0.8637803590285111 }, { "epoch": 3.642294713160855, "grad_norm": 2.6554393768310547, "learning_rate": 7.535354233206701e-05, "loss": 0.34183740615844727, "step": 3238, "token_acc": 0.8922363847045192 }, { "epoch": 3.643419572553431, "grad_norm": 2.7750489711761475, "learning_rate": 7.533750888799477e-05, "loss": 0.36558103561401367, "step": 3239, "token_acc": 0.8722860791826309 }, { "epoch": 3.6445444319460067, "grad_norm": 2.279172420501709, "learning_rate": 7.532147193749922e-05, "loss": 0.32443922758102417, "step": 3240, "token_acc": 0.888268156424581 }, { "epoch": 3.6456692913385824, "grad_norm": 2.7266578674316406, "learning_rate": 7.530543148279967e-05, "loss": 0.32815462350845337, "step": 3241, "token_acc": 0.8845108695652174 }, { "epoch": 3.6467941507311585, "grad_norm": 2.7017464637756348, "learning_rate": 7.528938752611595e-05, "loss": 0.3183969259262085, "step": 3242, "token_acc": 0.8877980364656382 }, { "epoch": 3.6479190101237347, "grad_norm": 2.336658477783203, "learning_rate": 7.527334006966836e-05, "loss": 0.28905805945396423, "step": 3243, "token_acc": 0.9194097616345063 }, { "epoch": 3.6490438695163103, "grad_norm": 2.3955976963043213, "learning_rate": 7.525728911567766e-05, "loss": 0.31542158126831055, "step": 3244, "token_acc": 0.8913813459268005 }, { "epoch": 3.6501687289088864, "grad_norm": 2.320495128631592, "learning_rate": 7.524123466636515e-05, "loss": 0.316032350063324, "step": 3245, "token_acc": 0.8942528735632184 }, { "epoch": 3.6512935883014626, "grad_norm": 2.452254295349121, "learning_rate": 7.522517672395257e-05, "loss": 0.29480379819869995, "step": 3246, "token_acc": 0.899641577060932 }, { "epoch": 3.6524184476940382, "grad_norm": 2.3840606212615967, "learning_rate": 7.520911529066217e-05, "loss": 0.3130316138267517, "step": 3247, "token_acc": 0.8980301274623407 }, { "epoch": 3.653543307086614, "grad_norm": 2.963371992111206, "learning_rate": 7.519305036871665e-05, "loss": 0.4334244132041931, "step": 3248, "token_acc": 0.8716763005780347 }, { "epoch": 3.65466816647919, "grad_norm": 2.684152364730835, "learning_rate": 7.517698196033923e-05, "loss": 0.26884791254997253, "step": 3249, "token_acc": 0.9143302180685359 }, { "epoch": 3.655793025871766, "grad_norm": 3.004262685775757, "learning_rate": 7.516091006775358e-05, "loss": 0.3572912812232971, "step": 3250, "token_acc": 0.8691588785046729 }, { "epoch": 3.656917885264342, "grad_norm": 2.8913536071777344, "learning_rate": 7.51448346931839e-05, "loss": 0.4298003911972046, "step": 3251, "token_acc": 0.866507747318236 }, { "epoch": 3.658042744656918, "grad_norm": 2.2786877155303955, "learning_rate": 7.512875583885476e-05, "loss": 0.32797715067863464, "step": 3252, "token_acc": 0.8881322957198443 }, { "epoch": 3.659167604049494, "grad_norm": 3.016805410385132, "learning_rate": 7.511267350699141e-05, "loss": 0.41987520456314087, "step": 3253, "token_acc": 0.8631138975966562 }, { "epoch": 3.6602924634420697, "grad_norm": 3.0842764377593994, "learning_rate": 7.509658769981935e-05, "loss": 0.30858516693115234, "step": 3254, "token_acc": 0.875 }, { "epoch": 3.661417322834646, "grad_norm": 2.471709728240967, "learning_rate": 7.508049841956475e-05, "loss": 0.265497088432312, "step": 3255, "token_acc": 0.9062937062937063 }, { "epoch": 3.6625421822272215, "grad_norm": 2.901207447052002, "learning_rate": 7.506440566845414e-05, "loss": 0.35442206263542175, "step": 3256, "token_acc": 0.8783068783068783 }, { "epoch": 3.6636670416197976, "grad_norm": 2.75688099861145, "learning_rate": 7.504830944871458e-05, "loss": 0.36775532364845276, "step": 3257, "token_acc": 0.8771551724137931 }, { "epoch": 3.6647919010123733, "grad_norm": 2.741954803466797, "learning_rate": 7.503220976257361e-05, "loss": 0.4454294443130493, "step": 3258, "token_acc": 0.8605128205128205 }, { "epoch": 3.6659167604049494, "grad_norm": 2.8952815532684326, "learning_rate": 7.501610661225926e-05, "loss": 0.38236379623413086, "step": 3259, "token_acc": 0.8796414852752881 }, { "epoch": 3.6670416197975255, "grad_norm": 2.9037795066833496, "learning_rate": 7.500000000000001e-05, "loss": 0.3007316589355469, "step": 3260, "token_acc": 0.884393063583815 }, { "epoch": 3.668166479190101, "grad_norm": 2.7145538330078125, "learning_rate": 7.498388992802481e-05, "loss": 0.33151260018348694, "step": 3261, "token_acc": 0.9024707412223667 }, { "epoch": 3.6692913385826773, "grad_norm": 2.379185438156128, "learning_rate": 7.496777639856316e-05, "loss": 0.29694560170173645, "step": 3262, "token_acc": 0.8891304347826087 }, { "epoch": 3.670416197975253, "grad_norm": 2.677476406097412, "learning_rate": 7.495165941384493e-05, "loss": 0.33122164011001587, "step": 3263, "token_acc": 0.8844011142061281 }, { "epoch": 3.671541057367829, "grad_norm": 2.6402769088745117, "learning_rate": 7.493553897610057e-05, "loss": 0.3207818865776062, "step": 3264, "token_acc": 0.8951434878587197 }, { "epoch": 3.6726659167604048, "grad_norm": 2.319094657897949, "learning_rate": 7.491941508756095e-05, "loss": 0.25208523869514465, "step": 3265, "token_acc": 0.9178255372945638 }, { "epoch": 3.673790776152981, "grad_norm": 2.6474902629852295, "learning_rate": 7.490328775045745e-05, "loss": 0.31298911571502686, "step": 3266, "token_acc": 0.900497512437811 }, { "epoch": 3.674915635545557, "grad_norm": 2.6214754581451416, "learning_rate": 7.488715696702189e-05, "loss": 0.3296423554420471, "step": 3267, "token_acc": 0.8958868894601543 }, { "epoch": 3.6760404949381327, "grad_norm": 2.5332705974578857, "learning_rate": 7.487102273948659e-05, "loss": 0.33033692836761475, "step": 3268, "token_acc": 0.8881199538638985 }, { "epoch": 3.677165354330709, "grad_norm": 2.363198757171631, "learning_rate": 7.485488507008433e-05, "loss": 0.35690411925315857, "step": 3269, "token_acc": 0.8792486583184258 }, { "epoch": 3.6782902137232845, "grad_norm": 2.5814898014068604, "learning_rate": 7.483874396104844e-05, "loss": 0.3029353618621826, "step": 3270, "token_acc": 0.8888888888888888 }, { "epoch": 3.6794150731158606, "grad_norm": 2.5075631141662598, "learning_rate": 7.482259941461259e-05, "loss": 0.3250587582588196, "step": 3271, "token_acc": 0.8878406708595388 }, { "epoch": 3.6805399325084363, "grad_norm": 2.6859092712402344, "learning_rate": 7.480645143301105e-05, "loss": 0.4017849266529083, "step": 3272, "token_acc": 0.8774038461538461 }, { "epoch": 3.6816647919010124, "grad_norm": 2.293003797531128, "learning_rate": 7.479030001847849e-05, "loss": 0.2817116677761078, "step": 3273, "token_acc": 0.9046321525885559 }, { "epoch": 3.6827896512935885, "grad_norm": 2.4585580825805664, "learning_rate": 7.477414517325012e-05, "loss": 0.27241408824920654, "step": 3274, "token_acc": 0.9098039215686274 }, { "epoch": 3.683914510686164, "grad_norm": 2.365406036376953, "learning_rate": 7.475798689956153e-05, "loss": 0.3296515941619873, "step": 3275, "token_acc": 0.8901220865704772 }, { "epoch": 3.6850393700787403, "grad_norm": 2.158036470413208, "learning_rate": 7.474182519964888e-05, "loss": 0.3091142177581787, "step": 3276, "token_acc": 0.8956611570247934 }, { "epoch": 3.686164229471316, "grad_norm": 2.5772013664245605, "learning_rate": 7.472566007574877e-05, "loss": 0.3900039494037628, "step": 3277, "token_acc": 0.864321608040201 }, { "epoch": 3.687289088863892, "grad_norm": 2.189011335372925, "learning_rate": 7.470949153009826e-05, "loss": 0.35453543066978455, "step": 3278, "token_acc": 0.8892988929889298 }, { "epoch": 3.6884139482564677, "grad_norm": 2.596928358078003, "learning_rate": 7.469331956493488e-05, "loss": 0.3836432695388794, "step": 3279, "token_acc": 0.8791801510248112 }, { "epoch": 3.689538807649044, "grad_norm": 2.5955400466918945, "learning_rate": 7.467714418249666e-05, "loss": 0.3476203978061676, "step": 3280, "token_acc": 0.8931818181818182 }, { "epoch": 3.69066366704162, "grad_norm": 2.8091158866882324, "learning_rate": 7.466096538502209e-05, "loss": 0.35183805227279663, "step": 3281, "token_acc": 0.886 }, { "epoch": 3.6917885264341956, "grad_norm": 1.952021598815918, "learning_rate": 7.464478317475012e-05, "loss": 0.19059520959854126, "step": 3282, "token_acc": 0.9415656008820287 }, { "epoch": 3.6929133858267718, "grad_norm": 2.6454825401306152, "learning_rate": 7.462859755392021e-05, "loss": 0.42484402656555176, "step": 3283, "token_acc": 0.8640776699029126 }, { "epoch": 3.6940382452193474, "grad_norm": 2.4435813426971436, "learning_rate": 7.461240852477223e-05, "loss": 0.2841171622276306, "step": 3284, "token_acc": 0.8970792767732962 }, { "epoch": 3.6951631046119235, "grad_norm": 2.326143980026245, "learning_rate": 7.45962160895466e-05, "loss": 0.3367239832878113, "step": 3285, "token_acc": 0.8870822041553749 }, { "epoch": 3.696287964004499, "grad_norm": 2.8688018321990967, "learning_rate": 7.458002025048413e-05, "loss": 0.4642949104309082, "step": 3286, "token_acc": 0.8537117903930131 }, { "epoch": 3.6974128233970753, "grad_norm": 2.7405788898468018, "learning_rate": 7.456382100982615e-05, "loss": 0.411314994096756, "step": 3287, "token_acc": 0.8676789587852495 }, { "epoch": 3.6985376827896514, "grad_norm": 2.6045114994049072, "learning_rate": 7.454761836981446e-05, "loss": 0.38373392820358276, "step": 3288, "token_acc": 0.8834841628959276 }, { "epoch": 3.699662542182227, "grad_norm": 2.8969314098358154, "learning_rate": 7.453141233269133e-05, "loss": 0.37326839566230774, "step": 3289, "token_acc": 0.8914835164835165 }, { "epoch": 3.7007874015748032, "grad_norm": 2.155118227005005, "learning_rate": 7.451520290069946e-05, "loss": 0.2787274122238159, "step": 3290, "token_acc": 0.9048507462686567 }, { "epoch": 3.7019122609673794, "grad_norm": 2.3752310276031494, "learning_rate": 7.449899007608209e-05, "loss": 0.3172364830970764, "step": 3291, "token_acc": 0.8949671772428884 }, { "epoch": 3.703037120359955, "grad_norm": 2.1676599979400635, "learning_rate": 7.448277386108285e-05, "loss": 0.331356406211853, "step": 3292, "token_acc": 0.9008620689655172 }, { "epoch": 3.7041619797525307, "grad_norm": 2.9168131351470947, "learning_rate": 7.446655425794591e-05, "loss": 0.3038160502910614, "step": 3293, "token_acc": 0.8934817170111288 }, { "epoch": 3.705286839145107, "grad_norm": 2.764822006225586, "learning_rate": 7.445033126891586e-05, "loss": 0.40975087881088257, "step": 3294, "token_acc": 0.8674540682414699 }, { "epoch": 3.706411698537683, "grad_norm": 2.4463295936584473, "learning_rate": 7.44341048962378e-05, "loss": 0.3265034258365631, "step": 3295, "token_acc": 0.9096612296110415 }, { "epoch": 3.7075365579302586, "grad_norm": 2.450193166732788, "learning_rate": 7.441787514215726e-05, "loss": 0.3856372833251953, "step": 3296, "token_acc": 0.8777393310265282 }, { "epoch": 3.7086614173228347, "grad_norm": 2.5960726737976074, "learning_rate": 7.440164200892026e-05, "loss": 0.37215477228164673, "step": 3297, "token_acc": 0.8776009791921665 }, { "epoch": 3.709786276715411, "grad_norm": 2.381479263305664, "learning_rate": 7.438540549877326e-05, "loss": 0.2925869822502136, "step": 3298, "token_acc": 0.9052496798975672 }, { "epoch": 3.7109111361079865, "grad_norm": 2.430850028991699, "learning_rate": 7.436916561396324e-05, "loss": 0.36340683698654175, "step": 3299, "token_acc": 0.8744541484716157 }, { "epoch": 3.712035995500562, "grad_norm": 2.192854166030884, "learning_rate": 7.43529223567376e-05, "loss": 0.2864559590816498, "step": 3300, "token_acc": 0.9005847953216374 }, { "epoch": 3.712035995500562, "eval_loss": 1.0689765214920044, "eval_runtime": 31.7402, "eval_samples_per_second": 25.299, "eval_steps_per_second": 3.182, "eval_token_acc": 0.7358721816134091, "step": 3300 } ], "logging_steps": 1, "max_steps": 8890, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.093092719833907e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }