{ "base": { "dir": "/ai/text-generation/models/MiniMaxAI_MiniMax-M2.5-3.0bpw-h6-exl3", "bpw": 3.018343639748571 }, "alts": [ { "dir": "/ai/text-generation/models/MiniMaxAI_MiniMax-M2.5-4.0bpw-h6-exl3", "bpw": 4.018129277476331 } ], "groups": [ { "idx": 0, "layers": [ "model.layers.0.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0002680603414773608, "dbits": 18874368 } ] }, { "idx": 1, "layers": [ "model.layers.0.self_attn.k_proj", "model.layers.0.self_attn.v_proj" ], "candidates": [ { "dkld": -0.028781108930706983, "dbits": 6291456 } ] }, { "idx": 2, "layers": [ "model.layers.0.self_attn.o_proj" ], "candidates": [ { "dkld": 0.013453960418701172, "dbits": 18874368 } ] }, { "idx": 3, "layers": [ "model.layers.0.block_sparse_moe.experts.0.w1", "model.layers.0.block_sparse_moe.experts.1.w1", "model.layers.0.block_sparse_moe.experts.2.w1", "model.layers.0.block_sparse_moe.experts.3.w1", "model.layers.0.block_sparse_moe.experts.4.w1", "model.layers.0.block_sparse_moe.experts.5.w1", "model.layers.0.block_sparse_moe.experts.6.w1", "model.layers.0.block_sparse_moe.experts.7.w1", "model.layers.0.block_sparse_moe.experts.8.w1", "model.layers.0.block_sparse_moe.experts.9.w1", "model.layers.0.block_sparse_moe.experts.10.w1", "model.layers.0.block_sparse_moe.experts.11.w1", "model.layers.0.block_sparse_moe.experts.12.w1", "model.layers.0.block_sparse_moe.experts.13.w1", "model.layers.0.block_sparse_moe.experts.14.w1", "model.layers.0.block_sparse_moe.experts.15.w1", "model.layers.0.block_sparse_moe.experts.16.w1", "model.layers.0.block_sparse_moe.experts.17.w1", "model.layers.0.block_sparse_moe.experts.18.w1", "model.layers.0.block_sparse_moe.experts.19.w1", "model.layers.0.block_sparse_moe.experts.20.w1", "model.layers.0.block_sparse_moe.experts.21.w1", "model.layers.0.block_sparse_moe.experts.22.w1", "model.layers.0.block_sparse_moe.experts.23.w1", "model.layers.0.block_sparse_moe.experts.24.w1", "model.layers.0.block_sparse_moe.experts.25.w1", "model.layers.0.block_sparse_moe.experts.26.w1", "model.layers.0.block_sparse_moe.experts.27.w1", "model.layers.0.block_sparse_moe.experts.28.w1", "model.layers.0.block_sparse_moe.experts.29.w1", "model.layers.0.block_sparse_moe.experts.30.w1", "model.layers.0.block_sparse_moe.experts.31.w1", "model.layers.0.block_sparse_moe.experts.32.w1", "model.layers.0.block_sparse_moe.experts.33.w1", "model.layers.0.block_sparse_moe.experts.34.w1", "model.layers.0.block_sparse_moe.experts.35.w1", "model.layers.0.block_sparse_moe.experts.36.w1", "model.layers.0.block_sparse_moe.experts.37.w1", "model.layers.0.block_sparse_moe.experts.38.w1", "model.layers.0.block_sparse_moe.experts.39.w1", "model.layers.0.block_sparse_moe.experts.40.w1", "model.layers.0.block_sparse_moe.experts.41.w1", "model.layers.0.block_sparse_moe.experts.42.w1", "model.layers.0.block_sparse_moe.experts.43.w1", "model.layers.0.block_sparse_moe.experts.44.w1", "model.layers.0.block_sparse_moe.experts.45.w1", "model.layers.0.block_sparse_moe.experts.46.w1", "model.layers.0.block_sparse_moe.experts.47.w1", "model.layers.0.block_sparse_moe.experts.48.w1", "model.layers.0.block_sparse_moe.experts.49.w1", "model.layers.0.block_sparse_moe.experts.50.w1", "model.layers.0.block_sparse_moe.experts.51.w1", "model.layers.0.block_sparse_moe.experts.52.w1", "model.layers.0.block_sparse_moe.experts.53.w1", "model.layers.0.block_sparse_moe.experts.54.w1", "model.layers.0.block_sparse_moe.experts.55.w1", "model.layers.0.block_sparse_moe.experts.56.w1", "model.layers.0.block_sparse_moe.experts.57.w1", "model.layers.0.block_sparse_moe.experts.58.w1", "model.layers.0.block_sparse_moe.experts.59.w1", "model.layers.0.block_sparse_moe.experts.60.w1", "model.layers.0.block_sparse_moe.experts.61.w1", "model.layers.0.block_sparse_moe.experts.62.w1", "model.layers.0.block_sparse_moe.experts.63.w1", "model.layers.0.block_sparse_moe.experts.64.w1", "model.layers.0.block_sparse_moe.experts.65.w1", "model.layers.0.block_sparse_moe.experts.66.w1", "model.layers.0.block_sparse_moe.experts.67.w1", "model.layers.0.block_sparse_moe.experts.68.w1", "model.layers.0.block_sparse_moe.experts.69.w1", "model.layers.0.block_sparse_moe.experts.70.w1", "model.layers.0.block_sparse_moe.experts.71.w1", "model.layers.0.block_sparse_moe.experts.72.w1", "model.layers.0.block_sparse_moe.experts.73.w1", "model.layers.0.block_sparse_moe.experts.74.w1", "model.layers.0.block_sparse_moe.experts.75.w1", "model.layers.0.block_sparse_moe.experts.76.w1", "model.layers.0.block_sparse_moe.experts.77.w1", "model.layers.0.block_sparse_moe.experts.78.w1", "model.layers.0.block_sparse_moe.experts.79.w1", "model.layers.0.block_sparse_moe.experts.80.w1", "model.layers.0.block_sparse_moe.experts.81.w1", "model.layers.0.block_sparse_moe.experts.82.w1", "model.layers.0.block_sparse_moe.experts.83.w1", "model.layers.0.block_sparse_moe.experts.84.w1", "model.layers.0.block_sparse_moe.experts.85.w1", "model.layers.0.block_sparse_moe.experts.86.w1", "model.layers.0.block_sparse_moe.experts.87.w1", "model.layers.0.block_sparse_moe.experts.88.w1", "model.layers.0.block_sparse_moe.experts.89.w1", "model.layers.0.block_sparse_moe.experts.90.w1", "model.layers.0.block_sparse_moe.experts.91.w1", "model.layers.0.block_sparse_moe.experts.92.w1", "model.layers.0.block_sparse_moe.experts.93.w1", "model.layers.0.block_sparse_moe.experts.94.w1", "model.layers.0.block_sparse_moe.experts.95.w1", "model.layers.0.block_sparse_moe.experts.96.w1", "model.layers.0.block_sparse_moe.experts.97.w1", "model.layers.0.block_sparse_moe.experts.98.w1", "model.layers.0.block_sparse_moe.experts.99.w1", "model.layers.0.block_sparse_moe.experts.100.w1", "model.layers.0.block_sparse_moe.experts.101.w1", "model.layers.0.block_sparse_moe.experts.102.w1", "model.layers.0.block_sparse_moe.experts.103.w1", "model.layers.0.block_sparse_moe.experts.104.w1", "model.layers.0.block_sparse_moe.experts.105.w1", "model.layers.0.block_sparse_moe.experts.106.w1", "model.layers.0.block_sparse_moe.experts.107.w1", "model.layers.0.block_sparse_moe.experts.108.w1", "model.layers.0.block_sparse_moe.experts.109.w1", "model.layers.0.block_sparse_moe.experts.110.w1", "model.layers.0.block_sparse_moe.experts.111.w1", "model.layers.0.block_sparse_moe.experts.112.w1", "model.layers.0.block_sparse_moe.experts.113.w1", "model.layers.0.block_sparse_moe.experts.114.w1", "model.layers.0.block_sparse_moe.experts.115.w1", "model.layers.0.block_sparse_moe.experts.116.w1", "model.layers.0.block_sparse_moe.experts.117.w1", "model.layers.0.block_sparse_moe.experts.118.w1", "model.layers.0.block_sparse_moe.experts.119.w1", "model.layers.0.block_sparse_moe.experts.120.w1", "model.layers.0.block_sparse_moe.experts.121.w1", "model.layers.0.block_sparse_moe.experts.122.w1", "model.layers.0.block_sparse_moe.experts.123.w1", "model.layers.0.block_sparse_moe.experts.124.w1", "model.layers.0.block_sparse_moe.experts.125.w1", "model.layers.0.block_sparse_moe.experts.126.w1", "model.layers.0.block_sparse_moe.experts.127.w1", "model.layers.0.block_sparse_moe.experts.128.w1", "model.layers.0.block_sparse_moe.experts.129.w1", "model.layers.0.block_sparse_moe.experts.130.w1", "model.layers.0.block_sparse_moe.experts.131.w1", "model.layers.0.block_sparse_moe.experts.132.w1", "model.layers.0.block_sparse_moe.experts.133.w1", "model.layers.0.block_sparse_moe.experts.134.w1", "model.layers.0.block_sparse_moe.experts.135.w1", "model.layers.0.block_sparse_moe.experts.136.w1", "model.layers.0.block_sparse_moe.experts.137.w1", "model.layers.0.block_sparse_moe.experts.138.w1", "model.layers.0.block_sparse_moe.experts.139.w1", "model.layers.0.block_sparse_moe.experts.140.w1", "model.layers.0.block_sparse_moe.experts.141.w1", "model.layers.0.block_sparse_moe.experts.142.w1", "model.layers.0.block_sparse_moe.experts.143.w1", "model.layers.0.block_sparse_moe.experts.144.w1", "model.layers.0.block_sparse_moe.experts.145.w1", "model.layers.0.block_sparse_moe.experts.146.w1", "model.layers.0.block_sparse_moe.experts.147.w1", "model.layers.0.block_sparse_moe.experts.148.w1", "model.layers.0.block_sparse_moe.experts.149.w1", "model.layers.0.block_sparse_moe.experts.150.w1", "model.layers.0.block_sparse_moe.experts.151.w1", "model.layers.0.block_sparse_moe.experts.152.w1", "model.layers.0.block_sparse_moe.experts.153.w1", "model.layers.0.block_sparse_moe.experts.154.w1", "model.layers.0.block_sparse_moe.experts.155.w1", "model.layers.0.block_sparse_moe.experts.156.w1", "model.layers.0.block_sparse_moe.experts.157.w1", "model.layers.0.block_sparse_moe.experts.158.w1", "model.layers.0.block_sparse_moe.experts.159.w1", "model.layers.0.block_sparse_moe.experts.160.w1", "model.layers.0.block_sparse_moe.experts.161.w1", "model.layers.0.block_sparse_moe.experts.162.w1", "model.layers.0.block_sparse_moe.experts.163.w1", "model.layers.0.block_sparse_moe.experts.164.w1", "model.layers.0.block_sparse_moe.experts.165.w1", "model.layers.0.block_sparse_moe.experts.166.w1", "model.layers.0.block_sparse_moe.experts.167.w1", "model.layers.0.block_sparse_moe.experts.168.w1", "model.layers.0.block_sparse_moe.experts.169.w1", "model.layers.0.block_sparse_moe.experts.170.w1", "model.layers.0.block_sparse_moe.experts.171.w1", "model.layers.0.block_sparse_moe.experts.172.w1", "model.layers.0.block_sparse_moe.experts.173.w1", "model.layers.0.block_sparse_moe.experts.174.w1", "model.layers.0.block_sparse_moe.experts.175.w1", "model.layers.0.block_sparse_moe.experts.176.w1", "model.layers.0.block_sparse_moe.experts.177.w1", "model.layers.0.block_sparse_moe.experts.178.w1", "model.layers.0.block_sparse_moe.experts.179.w1", "model.layers.0.block_sparse_moe.experts.180.w1", "model.layers.0.block_sparse_moe.experts.181.w1", "model.layers.0.block_sparse_moe.experts.182.w1", "model.layers.0.block_sparse_moe.experts.183.w1", "model.layers.0.block_sparse_moe.experts.184.w1", "model.layers.0.block_sparse_moe.experts.185.w1", "model.layers.0.block_sparse_moe.experts.186.w1", "model.layers.0.block_sparse_moe.experts.187.w1", "model.layers.0.block_sparse_moe.experts.188.w1", "model.layers.0.block_sparse_moe.experts.189.w1", "model.layers.0.block_sparse_moe.experts.190.w1", "model.layers.0.block_sparse_moe.experts.191.w1", "model.layers.0.block_sparse_moe.experts.192.w1", "model.layers.0.block_sparse_moe.experts.193.w1", "model.layers.0.block_sparse_moe.experts.194.w1", "model.layers.0.block_sparse_moe.experts.195.w1", "model.layers.0.block_sparse_moe.experts.196.w1", "model.layers.0.block_sparse_moe.experts.197.w1", "model.layers.0.block_sparse_moe.experts.198.w1", "model.layers.0.block_sparse_moe.experts.199.w1", "model.layers.0.block_sparse_moe.experts.200.w1", "model.layers.0.block_sparse_moe.experts.201.w1", "model.layers.0.block_sparse_moe.experts.202.w1", "model.layers.0.block_sparse_moe.experts.203.w1", "model.layers.0.block_sparse_moe.experts.204.w1", "model.layers.0.block_sparse_moe.experts.205.w1", "model.layers.0.block_sparse_moe.experts.206.w1", "model.layers.0.block_sparse_moe.experts.207.w1", "model.layers.0.block_sparse_moe.experts.208.w1", "model.layers.0.block_sparse_moe.experts.209.w1", "model.layers.0.block_sparse_moe.experts.210.w1", "model.layers.0.block_sparse_moe.experts.211.w1", "model.layers.0.block_sparse_moe.experts.212.w1", "model.layers.0.block_sparse_moe.experts.213.w1", "model.layers.0.block_sparse_moe.experts.214.w1", "model.layers.0.block_sparse_moe.experts.215.w1", "model.layers.0.block_sparse_moe.experts.216.w1", "model.layers.0.block_sparse_moe.experts.217.w1", "model.layers.0.block_sparse_moe.experts.218.w1", "model.layers.0.block_sparse_moe.experts.219.w1", "model.layers.0.block_sparse_moe.experts.220.w1", "model.layers.0.block_sparse_moe.experts.221.w1", "model.layers.0.block_sparse_moe.experts.222.w1", "model.layers.0.block_sparse_moe.experts.223.w1", "model.layers.0.block_sparse_moe.experts.224.w1", "model.layers.0.block_sparse_moe.experts.225.w1", "model.layers.0.block_sparse_moe.experts.226.w1", "model.layers.0.block_sparse_moe.experts.227.w1", "model.layers.0.block_sparse_moe.experts.228.w1", "model.layers.0.block_sparse_moe.experts.229.w1", "model.layers.0.block_sparse_moe.experts.230.w1", "model.layers.0.block_sparse_moe.experts.231.w1", "model.layers.0.block_sparse_moe.experts.232.w1", "model.layers.0.block_sparse_moe.experts.233.w1", "model.layers.0.block_sparse_moe.experts.234.w1", "model.layers.0.block_sparse_moe.experts.235.w1", "model.layers.0.block_sparse_moe.experts.236.w1", "model.layers.0.block_sparse_moe.experts.237.w1", "model.layers.0.block_sparse_moe.experts.238.w1", "model.layers.0.block_sparse_moe.experts.239.w1", "model.layers.0.block_sparse_moe.experts.240.w1", "model.layers.0.block_sparse_moe.experts.241.w1", "model.layers.0.block_sparse_moe.experts.242.w1", "model.layers.0.block_sparse_moe.experts.243.w1", "model.layers.0.block_sparse_moe.experts.244.w1", "model.layers.0.block_sparse_moe.experts.245.w1", "model.layers.0.block_sparse_moe.experts.246.w1", "model.layers.0.block_sparse_moe.experts.247.w1", "model.layers.0.block_sparse_moe.experts.248.w1", "model.layers.0.block_sparse_moe.experts.249.w1", "model.layers.0.block_sparse_moe.experts.250.w1", "model.layers.0.block_sparse_moe.experts.251.w1", "model.layers.0.block_sparse_moe.experts.252.w1", "model.layers.0.block_sparse_moe.experts.253.w1", "model.layers.0.block_sparse_moe.experts.254.w1", "model.layers.0.block_sparse_moe.experts.255.w1", "model.layers.0.block_sparse_moe.experts.0.w3", "model.layers.0.block_sparse_moe.experts.1.w3", "model.layers.0.block_sparse_moe.experts.2.w3", "model.layers.0.block_sparse_moe.experts.3.w3", "model.layers.0.block_sparse_moe.experts.4.w3", "model.layers.0.block_sparse_moe.experts.5.w3", "model.layers.0.block_sparse_moe.experts.6.w3", "model.layers.0.block_sparse_moe.experts.7.w3", "model.layers.0.block_sparse_moe.experts.8.w3", "model.layers.0.block_sparse_moe.experts.9.w3", "model.layers.0.block_sparse_moe.experts.10.w3", "model.layers.0.block_sparse_moe.experts.11.w3", "model.layers.0.block_sparse_moe.experts.12.w3", "model.layers.0.block_sparse_moe.experts.13.w3", "model.layers.0.block_sparse_moe.experts.14.w3", "model.layers.0.block_sparse_moe.experts.15.w3", "model.layers.0.block_sparse_moe.experts.16.w3", "model.layers.0.block_sparse_moe.experts.17.w3", "model.layers.0.block_sparse_moe.experts.18.w3", "model.layers.0.block_sparse_moe.experts.19.w3", "model.layers.0.block_sparse_moe.experts.20.w3", "model.layers.0.block_sparse_moe.experts.21.w3", "model.layers.0.block_sparse_moe.experts.22.w3", "model.layers.0.block_sparse_moe.experts.23.w3", "model.layers.0.block_sparse_moe.experts.24.w3", "model.layers.0.block_sparse_moe.experts.25.w3", "model.layers.0.block_sparse_moe.experts.26.w3", "model.layers.0.block_sparse_moe.experts.27.w3", "model.layers.0.block_sparse_moe.experts.28.w3", "model.layers.0.block_sparse_moe.experts.29.w3", "model.layers.0.block_sparse_moe.experts.30.w3", "model.layers.0.block_sparse_moe.experts.31.w3", "model.layers.0.block_sparse_moe.experts.32.w3", "model.layers.0.block_sparse_moe.experts.33.w3", "model.layers.0.block_sparse_moe.experts.34.w3", "model.layers.0.block_sparse_moe.experts.35.w3", "model.layers.0.block_sparse_moe.experts.36.w3", "model.layers.0.block_sparse_moe.experts.37.w3", "model.layers.0.block_sparse_moe.experts.38.w3", "model.layers.0.block_sparse_moe.experts.39.w3", "model.layers.0.block_sparse_moe.experts.40.w3", "model.layers.0.block_sparse_moe.experts.41.w3", "model.layers.0.block_sparse_moe.experts.42.w3", "model.layers.0.block_sparse_moe.experts.43.w3", "model.layers.0.block_sparse_moe.experts.44.w3", "model.layers.0.block_sparse_moe.experts.45.w3", "model.layers.0.block_sparse_moe.experts.46.w3", "model.layers.0.block_sparse_moe.experts.47.w3", "model.layers.0.block_sparse_moe.experts.48.w3", "model.layers.0.block_sparse_moe.experts.49.w3", "model.layers.0.block_sparse_moe.experts.50.w3", "model.layers.0.block_sparse_moe.experts.51.w3", "model.layers.0.block_sparse_moe.experts.52.w3", "model.layers.0.block_sparse_moe.experts.53.w3", "model.layers.0.block_sparse_moe.experts.54.w3", "model.layers.0.block_sparse_moe.experts.55.w3", "model.layers.0.block_sparse_moe.experts.56.w3", "model.layers.0.block_sparse_moe.experts.57.w3", "model.layers.0.block_sparse_moe.experts.58.w3", "model.layers.0.block_sparse_moe.experts.59.w3", "model.layers.0.block_sparse_moe.experts.60.w3", "model.layers.0.block_sparse_moe.experts.61.w3", "model.layers.0.block_sparse_moe.experts.62.w3", "model.layers.0.block_sparse_moe.experts.63.w3", "model.layers.0.block_sparse_moe.experts.64.w3", "model.layers.0.block_sparse_moe.experts.65.w3", "model.layers.0.block_sparse_moe.experts.66.w3", "model.layers.0.block_sparse_moe.experts.67.w3", "model.layers.0.block_sparse_moe.experts.68.w3", "model.layers.0.block_sparse_moe.experts.69.w3", "model.layers.0.block_sparse_moe.experts.70.w3", "model.layers.0.block_sparse_moe.experts.71.w3", "model.layers.0.block_sparse_moe.experts.72.w3", "model.layers.0.block_sparse_moe.experts.73.w3", "model.layers.0.block_sparse_moe.experts.74.w3", "model.layers.0.block_sparse_moe.experts.75.w3", "model.layers.0.block_sparse_moe.experts.76.w3", "model.layers.0.block_sparse_moe.experts.77.w3", "model.layers.0.block_sparse_moe.experts.78.w3", "model.layers.0.block_sparse_moe.experts.79.w3", "model.layers.0.block_sparse_moe.experts.80.w3", "model.layers.0.block_sparse_moe.experts.81.w3", "model.layers.0.block_sparse_moe.experts.82.w3", "model.layers.0.block_sparse_moe.experts.83.w3", "model.layers.0.block_sparse_moe.experts.84.w3", "model.layers.0.block_sparse_moe.experts.85.w3", "model.layers.0.block_sparse_moe.experts.86.w3", "model.layers.0.block_sparse_moe.experts.87.w3", "model.layers.0.block_sparse_moe.experts.88.w3", "model.layers.0.block_sparse_moe.experts.89.w3", "model.layers.0.block_sparse_moe.experts.90.w3", "model.layers.0.block_sparse_moe.experts.91.w3", "model.layers.0.block_sparse_moe.experts.92.w3", "model.layers.0.block_sparse_moe.experts.93.w3", "model.layers.0.block_sparse_moe.experts.94.w3", "model.layers.0.block_sparse_moe.experts.95.w3", "model.layers.0.block_sparse_moe.experts.96.w3", "model.layers.0.block_sparse_moe.experts.97.w3", "model.layers.0.block_sparse_moe.experts.98.w3", "model.layers.0.block_sparse_moe.experts.99.w3", "model.layers.0.block_sparse_moe.experts.100.w3", "model.layers.0.block_sparse_moe.experts.101.w3", "model.layers.0.block_sparse_moe.experts.102.w3", "model.layers.0.block_sparse_moe.experts.103.w3", "model.layers.0.block_sparse_moe.experts.104.w3", "model.layers.0.block_sparse_moe.experts.105.w3", "model.layers.0.block_sparse_moe.experts.106.w3", "model.layers.0.block_sparse_moe.experts.107.w3", "model.layers.0.block_sparse_moe.experts.108.w3", "model.layers.0.block_sparse_moe.experts.109.w3", "model.layers.0.block_sparse_moe.experts.110.w3", "model.layers.0.block_sparse_moe.experts.111.w3", "model.layers.0.block_sparse_moe.experts.112.w3", "model.layers.0.block_sparse_moe.experts.113.w3", "model.layers.0.block_sparse_moe.experts.114.w3", "model.layers.0.block_sparse_moe.experts.115.w3", "model.layers.0.block_sparse_moe.experts.116.w3", "model.layers.0.block_sparse_moe.experts.117.w3", "model.layers.0.block_sparse_moe.experts.118.w3", "model.layers.0.block_sparse_moe.experts.119.w3", "model.layers.0.block_sparse_moe.experts.120.w3", "model.layers.0.block_sparse_moe.experts.121.w3", "model.layers.0.block_sparse_moe.experts.122.w3", "model.layers.0.block_sparse_moe.experts.123.w3", "model.layers.0.block_sparse_moe.experts.124.w3", "model.layers.0.block_sparse_moe.experts.125.w3", "model.layers.0.block_sparse_moe.experts.126.w3", "model.layers.0.block_sparse_moe.experts.127.w3", "model.layers.0.block_sparse_moe.experts.128.w3", "model.layers.0.block_sparse_moe.experts.129.w3", "model.layers.0.block_sparse_moe.experts.130.w3", "model.layers.0.block_sparse_moe.experts.131.w3", "model.layers.0.block_sparse_moe.experts.132.w3", "model.layers.0.block_sparse_moe.experts.133.w3", "model.layers.0.block_sparse_moe.experts.134.w3", "model.layers.0.block_sparse_moe.experts.135.w3", "model.layers.0.block_sparse_moe.experts.136.w3", "model.layers.0.block_sparse_moe.experts.137.w3", "model.layers.0.block_sparse_moe.experts.138.w3", "model.layers.0.block_sparse_moe.experts.139.w3", "model.layers.0.block_sparse_moe.experts.140.w3", "model.layers.0.block_sparse_moe.experts.141.w3", "model.layers.0.block_sparse_moe.experts.142.w3", "model.layers.0.block_sparse_moe.experts.143.w3", "model.layers.0.block_sparse_moe.experts.144.w3", "model.layers.0.block_sparse_moe.experts.145.w3", "model.layers.0.block_sparse_moe.experts.146.w3", "model.layers.0.block_sparse_moe.experts.147.w3", "model.layers.0.block_sparse_moe.experts.148.w3", "model.layers.0.block_sparse_moe.experts.149.w3", "model.layers.0.block_sparse_moe.experts.150.w3", "model.layers.0.block_sparse_moe.experts.151.w3", "model.layers.0.block_sparse_moe.experts.152.w3", "model.layers.0.block_sparse_moe.experts.153.w3", "model.layers.0.block_sparse_moe.experts.154.w3", "model.layers.0.block_sparse_moe.experts.155.w3", "model.layers.0.block_sparse_moe.experts.156.w3", "model.layers.0.block_sparse_moe.experts.157.w3", "model.layers.0.block_sparse_moe.experts.158.w3", "model.layers.0.block_sparse_moe.experts.159.w3", "model.layers.0.block_sparse_moe.experts.160.w3", "model.layers.0.block_sparse_moe.experts.161.w3", "model.layers.0.block_sparse_moe.experts.162.w3", "model.layers.0.block_sparse_moe.experts.163.w3", "model.layers.0.block_sparse_moe.experts.164.w3", "model.layers.0.block_sparse_moe.experts.165.w3", "model.layers.0.block_sparse_moe.experts.166.w3", "model.layers.0.block_sparse_moe.experts.167.w3", "model.layers.0.block_sparse_moe.experts.168.w3", "model.layers.0.block_sparse_moe.experts.169.w3", "model.layers.0.block_sparse_moe.experts.170.w3", "model.layers.0.block_sparse_moe.experts.171.w3", "model.layers.0.block_sparse_moe.experts.172.w3", "model.layers.0.block_sparse_moe.experts.173.w3", "model.layers.0.block_sparse_moe.experts.174.w3", "model.layers.0.block_sparse_moe.experts.175.w3", "model.layers.0.block_sparse_moe.experts.176.w3", "model.layers.0.block_sparse_moe.experts.177.w3", "model.layers.0.block_sparse_moe.experts.178.w3", "model.layers.0.block_sparse_moe.experts.179.w3", "model.layers.0.block_sparse_moe.experts.180.w3", "model.layers.0.block_sparse_moe.experts.181.w3", "model.layers.0.block_sparse_moe.experts.182.w3", "model.layers.0.block_sparse_moe.experts.183.w3", "model.layers.0.block_sparse_moe.experts.184.w3", "model.layers.0.block_sparse_moe.experts.185.w3", "model.layers.0.block_sparse_moe.experts.186.w3", "model.layers.0.block_sparse_moe.experts.187.w3", "model.layers.0.block_sparse_moe.experts.188.w3", "model.layers.0.block_sparse_moe.experts.189.w3", "model.layers.0.block_sparse_moe.experts.190.w3", "model.layers.0.block_sparse_moe.experts.191.w3", "model.layers.0.block_sparse_moe.experts.192.w3", "model.layers.0.block_sparse_moe.experts.193.w3", "model.layers.0.block_sparse_moe.experts.194.w3", "model.layers.0.block_sparse_moe.experts.195.w3", "model.layers.0.block_sparse_moe.experts.196.w3", "model.layers.0.block_sparse_moe.experts.197.w3", "model.layers.0.block_sparse_moe.experts.198.w3", "model.layers.0.block_sparse_moe.experts.199.w3", "model.layers.0.block_sparse_moe.experts.200.w3", "model.layers.0.block_sparse_moe.experts.201.w3", "model.layers.0.block_sparse_moe.experts.202.w3", "model.layers.0.block_sparse_moe.experts.203.w3", "model.layers.0.block_sparse_moe.experts.204.w3", "model.layers.0.block_sparse_moe.experts.205.w3", "model.layers.0.block_sparse_moe.experts.206.w3", "model.layers.0.block_sparse_moe.experts.207.w3", "model.layers.0.block_sparse_moe.experts.208.w3", "model.layers.0.block_sparse_moe.experts.209.w3", "model.layers.0.block_sparse_moe.experts.210.w3", "model.layers.0.block_sparse_moe.experts.211.w3", "model.layers.0.block_sparse_moe.experts.212.w3", "model.layers.0.block_sparse_moe.experts.213.w3", "model.layers.0.block_sparse_moe.experts.214.w3", "model.layers.0.block_sparse_moe.experts.215.w3", "model.layers.0.block_sparse_moe.experts.216.w3", "model.layers.0.block_sparse_moe.experts.217.w3", "model.layers.0.block_sparse_moe.experts.218.w3", "model.layers.0.block_sparse_moe.experts.219.w3", "model.layers.0.block_sparse_moe.experts.220.w3", "model.layers.0.block_sparse_moe.experts.221.w3", "model.layers.0.block_sparse_moe.experts.222.w3", "model.layers.0.block_sparse_moe.experts.223.w3", "model.layers.0.block_sparse_moe.experts.224.w3", "model.layers.0.block_sparse_moe.experts.225.w3", "model.layers.0.block_sparse_moe.experts.226.w3", "model.layers.0.block_sparse_moe.experts.227.w3", "model.layers.0.block_sparse_moe.experts.228.w3", "model.layers.0.block_sparse_moe.experts.229.w3", "model.layers.0.block_sparse_moe.experts.230.w3", "model.layers.0.block_sparse_moe.experts.231.w3", "model.layers.0.block_sparse_moe.experts.232.w3", "model.layers.0.block_sparse_moe.experts.233.w3", "model.layers.0.block_sparse_moe.experts.234.w3", "model.layers.0.block_sparse_moe.experts.235.w3", "model.layers.0.block_sparse_moe.experts.236.w3", "model.layers.0.block_sparse_moe.experts.237.w3", "model.layers.0.block_sparse_moe.experts.238.w3", "model.layers.0.block_sparse_moe.experts.239.w3", "model.layers.0.block_sparse_moe.experts.240.w3", "model.layers.0.block_sparse_moe.experts.241.w3", "model.layers.0.block_sparse_moe.experts.242.w3", "model.layers.0.block_sparse_moe.experts.243.w3", "model.layers.0.block_sparse_moe.experts.244.w3", "model.layers.0.block_sparse_moe.experts.245.w3", "model.layers.0.block_sparse_moe.experts.246.w3", "model.layers.0.block_sparse_moe.experts.247.w3", "model.layers.0.block_sparse_moe.experts.248.w3", "model.layers.0.block_sparse_moe.experts.249.w3", "model.layers.0.block_sparse_moe.experts.250.w3", "model.layers.0.block_sparse_moe.experts.251.w3", "model.layers.0.block_sparse_moe.experts.252.w3", "model.layers.0.block_sparse_moe.experts.253.w3", "model.layers.0.block_sparse_moe.experts.254.w3", "model.layers.0.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.007523819804191589, "dbits": 2415919104 } ] }, { "idx": 4, "layers": [ "model.layers.0.block_sparse_moe.experts.0.w2", "model.layers.0.block_sparse_moe.experts.1.w2", "model.layers.0.block_sparse_moe.experts.2.w2", "model.layers.0.block_sparse_moe.experts.3.w2", "model.layers.0.block_sparse_moe.experts.4.w2", "model.layers.0.block_sparse_moe.experts.5.w2", "model.layers.0.block_sparse_moe.experts.6.w2", "model.layers.0.block_sparse_moe.experts.7.w2", "model.layers.0.block_sparse_moe.experts.8.w2", "model.layers.0.block_sparse_moe.experts.9.w2", "model.layers.0.block_sparse_moe.experts.10.w2", "model.layers.0.block_sparse_moe.experts.11.w2", "model.layers.0.block_sparse_moe.experts.12.w2", "model.layers.0.block_sparse_moe.experts.13.w2", "model.layers.0.block_sparse_moe.experts.14.w2", "model.layers.0.block_sparse_moe.experts.15.w2", "model.layers.0.block_sparse_moe.experts.16.w2", "model.layers.0.block_sparse_moe.experts.17.w2", "model.layers.0.block_sparse_moe.experts.18.w2", "model.layers.0.block_sparse_moe.experts.19.w2", "model.layers.0.block_sparse_moe.experts.20.w2", "model.layers.0.block_sparse_moe.experts.21.w2", "model.layers.0.block_sparse_moe.experts.22.w2", "model.layers.0.block_sparse_moe.experts.23.w2", "model.layers.0.block_sparse_moe.experts.24.w2", "model.layers.0.block_sparse_moe.experts.25.w2", "model.layers.0.block_sparse_moe.experts.26.w2", "model.layers.0.block_sparse_moe.experts.27.w2", "model.layers.0.block_sparse_moe.experts.28.w2", "model.layers.0.block_sparse_moe.experts.29.w2", "model.layers.0.block_sparse_moe.experts.30.w2", "model.layers.0.block_sparse_moe.experts.31.w2", "model.layers.0.block_sparse_moe.experts.32.w2", "model.layers.0.block_sparse_moe.experts.33.w2", "model.layers.0.block_sparse_moe.experts.34.w2", "model.layers.0.block_sparse_moe.experts.35.w2", "model.layers.0.block_sparse_moe.experts.36.w2", "model.layers.0.block_sparse_moe.experts.37.w2", "model.layers.0.block_sparse_moe.experts.38.w2", "model.layers.0.block_sparse_moe.experts.39.w2", "model.layers.0.block_sparse_moe.experts.40.w2", "model.layers.0.block_sparse_moe.experts.41.w2", "model.layers.0.block_sparse_moe.experts.42.w2", "model.layers.0.block_sparse_moe.experts.43.w2", "model.layers.0.block_sparse_moe.experts.44.w2", "model.layers.0.block_sparse_moe.experts.45.w2", "model.layers.0.block_sparse_moe.experts.46.w2", "model.layers.0.block_sparse_moe.experts.47.w2", "model.layers.0.block_sparse_moe.experts.48.w2", "model.layers.0.block_sparse_moe.experts.49.w2", "model.layers.0.block_sparse_moe.experts.50.w2", "model.layers.0.block_sparse_moe.experts.51.w2", "model.layers.0.block_sparse_moe.experts.52.w2", "model.layers.0.block_sparse_moe.experts.53.w2", "model.layers.0.block_sparse_moe.experts.54.w2", "model.layers.0.block_sparse_moe.experts.55.w2", "model.layers.0.block_sparse_moe.experts.56.w2", "model.layers.0.block_sparse_moe.experts.57.w2", "model.layers.0.block_sparse_moe.experts.58.w2", "model.layers.0.block_sparse_moe.experts.59.w2", "model.layers.0.block_sparse_moe.experts.60.w2", "model.layers.0.block_sparse_moe.experts.61.w2", "model.layers.0.block_sparse_moe.experts.62.w2", "model.layers.0.block_sparse_moe.experts.63.w2", "model.layers.0.block_sparse_moe.experts.64.w2", "model.layers.0.block_sparse_moe.experts.65.w2", "model.layers.0.block_sparse_moe.experts.66.w2", "model.layers.0.block_sparse_moe.experts.67.w2", "model.layers.0.block_sparse_moe.experts.68.w2", "model.layers.0.block_sparse_moe.experts.69.w2", "model.layers.0.block_sparse_moe.experts.70.w2", "model.layers.0.block_sparse_moe.experts.71.w2", "model.layers.0.block_sparse_moe.experts.72.w2", "model.layers.0.block_sparse_moe.experts.73.w2", "model.layers.0.block_sparse_moe.experts.74.w2", "model.layers.0.block_sparse_moe.experts.75.w2", "model.layers.0.block_sparse_moe.experts.76.w2", "model.layers.0.block_sparse_moe.experts.77.w2", "model.layers.0.block_sparse_moe.experts.78.w2", "model.layers.0.block_sparse_moe.experts.79.w2", "model.layers.0.block_sparse_moe.experts.80.w2", "model.layers.0.block_sparse_moe.experts.81.w2", "model.layers.0.block_sparse_moe.experts.82.w2", "model.layers.0.block_sparse_moe.experts.83.w2", "model.layers.0.block_sparse_moe.experts.84.w2", "model.layers.0.block_sparse_moe.experts.85.w2", "model.layers.0.block_sparse_moe.experts.86.w2", "model.layers.0.block_sparse_moe.experts.87.w2", "model.layers.0.block_sparse_moe.experts.88.w2", "model.layers.0.block_sparse_moe.experts.89.w2", "model.layers.0.block_sparse_moe.experts.90.w2", "model.layers.0.block_sparse_moe.experts.91.w2", "model.layers.0.block_sparse_moe.experts.92.w2", "model.layers.0.block_sparse_moe.experts.93.w2", "model.layers.0.block_sparse_moe.experts.94.w2", "model.layers.0.block_sparse_moe.experts.95.w2", "model.layers.0.block_sparse_moe.experts.96.w2", "model.layers.0.block_sparse_moe.experts.97.w2", "model.layers.0.block_sparse_moe.experts.98.w2", "model.layers.0.block_sparse_moe.experts.99.w2", "model.layers.0.block_sparse_moe.experts.100.w2", "model.layers.0.block_sparse_moe.experts.101.w2", "model.layers.0.block_sparse_moe.experts.102.w2", "model.layers.0.block_sparse_moe.experts.103.w2", "model.layers.0.block_sparse_moe.experts.104.w2", "model.layers.0.block_sparse_moe.experts.105.w2", "model.layers.0.block_sparse_moe.experts.106.w2", "model.layers.0.block_sparse_moe.experts.107.w2", "model.layers.0.block_sparse_moe.experts.108.w2", "model.layers.0.block_sparse_moe.experts.109.w2", "model.layers.0.block_sparse_moe.experts.110.w2", "model.layers.0.block_sparse_moe.experts.111.w2", "model.layers.0.block_sparse_moe.experts.112.w2", "model.layers.0.block_sparse_moe.experts.113.w2", "model.layers.0.block_sparse_moe.experts.114.w2", "model.layers.0.block_sparse_moe.experts.115.w2", "model.layers.0.block_sparse_moe.experts.116.w2", "model.layers.0.block_sparse_moe.experts.117.w2", "model.layers.0.block_sparse_moe.experts.118.w2", "model.layers.0.block_sparse_moe.experts.119.w2", "model.layers.0.block_sparse_moe.experts.120.w2", "model.layers.0.block_sparse_moe.experts.121.w2", "model.layers.0.block_sparse_moe.experts.122.w2", "model.layers.0.block_sparse_moe.experts.123.w2", "model.layers.0.block_sparse_moe.experts.124.w2", "model.layers.0.block_sparse_moe.experts.125.w2", "model.layers.0.block_sparse_moe.experts.126.w2", "model.layers.0.block_sparse_moe.experts.127.w2", "model.layers.0.block_sparse_moe.experts.128.w2", "model.layers.0.block_sparse_moe.experts.129.w2", "model.layers.0.block_sparse_moe.experts.130.w2", "model.layers.0.block_sparse_moe.experts.131.w2", "model.layers.0.block_sparse_moe.experts.132.w2", "model.layers.0.block_sparse_moe.experts.133.w2", "model.layers.0.block_sparse_moe.experts.134.w2", "model.layers.0.block_sparse_moe.experts.135.w2", "model.layers.0.block_sparse_moe.experts.136.w2", "model.layers.0.block_sparse_moe.experts.137.w2", "model.layers.0.block_sparse_moe.experts.138.w2", "model.layers.0.block_sparse_moe.experts.139.w2", "model.layers.0.block_sparse_moe.experts.140.w2", "model.layers.0.block_sparse_moe.experts.141.w2", "model.layers.0.block_sparse_moe.experts.142.w2", "model.layers.0.block_sparse_moe.experts.143.w2", "model.layers.0.block_sparse_moe.experts.144.w2", "model.layers.0.block_sparse_moe.experts.145.w2", "model.layers.0.block_sparse_moe.experts.146.w2", "model.layers.0.block_sparse_moe.experts.147.w2", "model.layers.0.block_sparse_moe.experts.148.w2", "model.layers.0.block_sparse_moe.experts.149.w2", "model.layers.0.block_sparse_moe.experts.150.w2", "model.layers.0.block_sparse_moe.experts.151.w2", "model.layers.0.block_sparse_moe.experts.152.w2", "model.layers.0.block_sparse_moe.experts.153.w2", "model.layers.0.block_sparse_moe.experts.154.w2", "model.layers.0.block_sparse_moe.experts.155.w2", "model.layers.0.block_sparse_moe.experts.156.w2", "model.layers.0.block_sparse_moe.experts.157.w2", "model.layers.0.block_sparse_moe.experts.158.w2", "model.layers.0.block_sparse_moe.experts.159.w2", "model.layers.0.block_sparse_moe.experts.160.w2", "model.layers.0.block_sparse_moe.experts.161.w2", "model.layers.0.block_sparse_moe.experts.162.w2", "model.layers.0.block_sparse_moe.experts.163.w2", "model.layers.0.block_sparse_moe.experts.164.w2", "model.layers.0.block_sparse_moe.experts.165.w2", "model.layers.0.block_sparse_moe.experts.166.w2", "model.layers.0.block_sparse_moe.experts.167.w2", "model.layers.0.block_sparse_moe.experts.168.w2", "model.layers.0.block_sparse_moe.experts.169.w2", "model.layers.0.block_sparse_moe.experts.170.w2", "model.layers.0.block_sparse_moe.experts.171.w2", "model.layers.0.block_sparse_moe.experts.172.w2", "model.layers.0.block_sparse_moe.experts.173.w2", "model.layers.0.block_sparse_moe.experts.174.w2", "model.layers.0.block_sparse_moe.experts.175.w2", "model.layers.0.block_sparse_moe.experts.176.w2", "model.layers.0.block_sparse_moe.experts.177.w2", "model.layers.0.block_sparse_moe.experts.178.w2", "model.layers.0.block_sparse_moe.experts.179.w2", "model.layers.0.block_sparse_moe.experts.180.w2", "model.layers.0.block_sparse_moe.experts.181.w2", "model.layers.0.block_sparse_moe.experts.182.w2", "model.layers.0.block_sparse_moe.experts.183.w2", "model.layers.0.block_sparse_moe.experts.184.w2", "model.layers.0.block_sparse_moe.experts.185.w2", "model.layers.0.block_sparse_moe.experts.186.w2", "model.layers.0.block_sparse_moe.experts.187.w2", "model.layers.0.block_sparse_moe.experts.188.w2", "model.layers.0.block_sparse_moe.experts.189.w2", "model.layers.0.block_sparse_moe.experts.190.w2", "model.layers.0.block_sparse_moe.experts.191.w2", "model.layers.0.block_sparse_moe.experts.192.w2", "model.layers.0.block_sparse_moe.experts.193.w2", "model.layers.0.block_sparse_moe.experts.194.w2", "model.layers.0.block_sparse_moe.experts.195.w2", "model.layers.0.block_sparse_moe.experts.196.w2", "model.layers.0.block_sparse_moe.experts.197.w2", "model.layers.0.block_sparse_moe.experts.198.w2", "model.layers.0.block_sparse_moe.experts.199.w2", "model.layers.0.block_sparse_moe.experts.200.w2", "model.layers.0.block_sparse_moe.experts.201.w2", "model.layers.0.block_sparse_moe.experts.202.w2", "model.layers.0.block_sparse_moe.experts.203.w2", "model.layers.0.block_sparse_moe.experts.204.w2", "model.layers.0.block_sparse_moe.experts.205.w2", "model.layers.0.block_sparse_moe.experts.206.w2", "model.layers.0.block_sparse_moe.experts.207.w2", "model.layers.0.block_sparse_moe.experts.208.w2", "model.layers.0.block_sparse_moe.experts.209.w2", "model.layers.0.block_sparse_moe.experts.210.w2", "model.layers.0.block_sparse_moe.experts.211.w2", "model.layers.0.block_sparse_moe.experts.212.w2", "model.layers.0.block_sparse_moe.experts.213.w2", "model.layers.0.block_sparse_moe.experts.214.w2", "model.layers.0.block_sparse_moe.experts.215.w2", "model.layers.0.block_sparse_moe.experts.216.w2", "model.layers.0.block_sparse_moe.experts.217.w2", "model.layers.0.block_sparse_moe.experts.218.w2", "model.layers.0.block_sparse_moe.experts.219.w2", "model.layers.0.block_sparse_moe.experts.220.w2", "model.layers.0.block_sparse_moe.experts.221.w2", "model.layers.0.block_sparse_moe.experts.222.w2", "model.layers.0.block_sparse_moe.experts.223.w2", "model.layers.0.block_sparse_moe.experts.224.w2", "model.layers.0.block_sparse_moe.experts.225.w2", "model.layers.0.block_sparse_moe.experts.226.w2", "model.layers.0.block_sparse_moe.experts.227.w2", "model.layers.0.block_sparse_moe.experts.228.w2", "model.layers.0.block_sparse_moe.experts.229.w2", "model.layers.0.block_sparse_moe.experts.230.w2", "model.layers.0.block_sparse_moe.experts.231.w2", "model.layers.0.block_sparse_moe.experts.232.w2", "model.layers.0.block_sparse_moe.experts.233.w2", "model.layers.0.block_sparse_moe.experts.234.w2", "model.layers.0.block_sparse_moe.experts.235.w2", "model.layers.0.block_sparse_moe.experts.236.w2", "model.layers.0.block_sparse_moe.experts.237.w2", "model.layers.0.block_sparse_moe.experts.238.w2", "model.layers.0.block_sparse_moe.experts.239.w2", "model.layers.0.block_sparse_moe.experts.240.w2", "model.layers.0.block_sparse_moe.experts.241.w2", "model.layers.0.block_sparse_moe.experts.242.w2", "model.layers.0.block_sparse_moe.experts.243.w2", "model.layers.0.block_sparse_moe.experts.244.w2", "model.layers.0.block_sparse_moe.experts.245.w2", "model.layers.0.block_sparse_moe.experts.246.w2", "model.layers.0.block_sparse_moe.experts.247.w2", "model.layers.0.block_sparse_moe.experts.248.w2", "model.layers.0.block_sparse_moe.experts.249.w2", "model.layers.0.block_sparse_moe.experts.250.w2", "model.layers.0.block_sparse_moe.experts.251.w2", "model.layers.0.block_sparse_moe.experts.252.w2", "model.layers.0.block_sparse_moe.experts.253.w2", "model.layers.0.block_sparse_moe.experts.254.w2", "model.layers.0.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.003925684466958013, "dbits": 1207959552 } ] }, { "idx": 5, "layers": [ "model.layers.1.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0010020848363637702, "dbits": 18874368 } ] }, { "idx": 6, "layers": [ "model.layers.1.self_attn.k_proj", "model.layers.1.self_attn.v_proj" ], "candidates": [ { "dkld": 0.00855485163629055, "dbits": 6291456 } ] }, { "idx": 7, "layers": [ "model.layers.1.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0017783161252736823, "dbits": 18874368 } ] }, { "idx": 8, "layers": [ "model.layers.1.block_sparse_moe.experts.0.w1", "model.layers.1.block_sparse_moe.experts.1.w1", "model.layers.1.block_sparse_moe.experts.2.w1", "model.layers.1.block_sparse_moe.experts.3.w1", "model.layers.1.block_sparse_moe.experts.4.w1", "model.layers.1.block_sparse_moe.experts.5.w1", "model.layers.1.block_sparse_moe.experts.6.w1", "model.layers.1.block_sparse_moe.experts.7.w1", "model.layers.1.block_sparse_moe.experts.8.w1", "model.layers.1.block_sparse_moe.experts.9.w1", "model.layers.1.block_sparse_moe.experts.10.w1", "model.layers.1.block_sparse_moe.experts.11.w1", "model.layers.1.block_sparse_moe.experts.12.w1", "model.layers.1.block_sparse_moe.experts.13.w1", "model.layers.1.block_sparse_moe.experts.14.w1", "model.layers.1.block_sparse_moe.experts.15.w1", "model.layers.1.block_sparse_moe.experts.16.w1", "model.layers.1.block_sparse_moe.experts.17.w1", "model.layers.1.block_sparse_moe.experts.18.w1", "model.layers.1.block_sparse_moe.experts.19.w1", "model.layers.1.block_sparse_moe.experts.20.w1", "model.layers.1.block_sparse_moe.experts.21.w1", "model.layers.1.block_sparse_moe.experts.22.w1", "model.layers.1.block_sparse_moe.experts.23.w1", "model.layers.1.block_sparse_moe.experts.24.w1", "model.layers.1.block_sparse_moe.experts.25.w1", "model.layers.1.block_sparse_moe.experts.26.w1", "model.layers.1.block_sparse_moe.experts.27.w1", "model.layers.1.block_sparse_moe.experts.28.w1", "model.layers.1.block_sparse_moe.experts.29.w1", "model.layers.1.block_sparse_moe.experts.30.w1", "model.layers.1.block_sparse_moe.experts.31.w1", "model.layers.1.block_sparse_moe.experts.32.w1", "model.layers.1.block_sparse_moe.experts.33.w1", "model.layers.1.block_sparse_moe.experts.34.w1", "model.layers.1.block_sparse_moe.experts.35.w1", "model.layers.1.block_sparse_moe.experts.36.w1", "model.layers.1.block_sparse_moe.experts.37.w1", "model.layers.1.block_sparse_moe.experts.38.w1", "model.layers.1.block_sparse_moe.experts.39.w1", "model.layers.1.block_sparse_moe.experts.40.w1", "model.layers.1.block_sparse_moe.experts.41.w1", "model.layers.1.block_sparse_moe.experts.42.w1", "model.layers.1.block_sparse_moe.experts.43.w1", "model.layers.1.block_sparse_moe.experts.44.w1", "model.layers.1.block_sparse_moe.experts.45.w1", "model.layers.1.block_sparse_moe.experts.46.w1", "model.layers.1.block_sparse_moe.experts.47.w1", "model.layers.1.block_sparse_moe.experts.48.w1", "model.layers.1.block_sparse_moe.experts.49.w1", "model.layers.1.block_sparse_moe.experts.50.w1", "model.layers.1.block_sparse_moe.experts.51.w1", "model.layers.1.block_sparse_moe.experts.52.w1", "model.layers.1.block_sparse_moe.experts.53.w1", "model.layers.1.block_sparse_moe.experts.54.w1", "model.layers.1.block_sparse_moe.experts.55.w1", "model.layers.1.block_sparse_moe.experts.56.w1", "model.layers.1.block_sparse_moe.experts.57.w1", "model.layers.1.block_sparse_moe.experts.58.w1", "model.layers.1.block_sparse_moe.experts.59.w1", "model.layers.1.block_sparse_moe.experts.60.w1", "model.layers.1.block_sparse_moe.experts.61.w1", "model.layers.1.block_sparse_moe.experts.62.w1", "model.layers.1.block_sparse_moe.experts.63.w1", "model.layers.1.block_sparse_moe.experts.64.w1", "model.layers.1.block_sparse_moe.experts.65.w1", "model.layers.1.block_sparse_moe.experts.66.w1", "model.layers.1.block_sparse_moe.experts.67.w1", "model.layers.1.block_sparse_moe.experts.68.w1", "model.layers.1.block_sparse_moe.experts.69.w1", "model.layers.1.block_sparse_moe.experts.70.w1", "model.layers.1.block_sparse_moe.experts.71.w1", "model.layers.1.block_sparse_moe.experts.72.w1", "model.layers.1.block_sparse_moe.experts.73.w1", "model.layers.1.block_sparse_moe.experts.74.w1", "model.layers.1.block_sparse_moe.experts.75.w1", "model.layers.1.block_sparse_moe.experts.76.w1", "model.layers.1.block_sparse_moe.experts.77.w1", "model.layers.1.block_sparse_moe.experts.78.w1", "model.layers.1.block_sparse_moe.experts.79.w1", "model.layers.1.block_sparse_moe.experts.80.w1", "model.layers.1.block_sparse_moe.experts.81.w1", "model.layers.1.block_sparse_moe.experts.82.w1", "model.layers.1.block_sparse_moe.experts.83.w1", "model.layers.1.block_sparse_moe.experts.84.w1", "model.layers.1.block_sparse_moe.experts.85.w1", "model.layers.1.block_sparse_moe.experts.86.w1", "model.layers.1.block_sparse_moe.experts.87.w1", "model.layers.1.block_sparse_moe.experts.88.w1", "model.layers.1.block_sparse_moe.experts.89.w1", "model.layers.1.block_sparse_moe.experts.90.w1", "model.layers.1.block_sparse_moe.experts.91.w1", "model.layers.1.block_sparse_moe.experts.92.w1", "model.layers.1.block_sparse_moe.experts.93.w1", "model.layers.1.block_sparse_moe.experts.94.w1", "model.layers.1.block_sparse_moe.experts.95.w1", "model.layers.1.block_sparse_moe.experts.96.w1", "model.layers.1.block_sparse_moe.experts.97.w1", "model.layers.1.block_sparse_moe.experts.98.w1", "model.layers.1.block_sparse_moe.experts.99.w1", "model.layers.1.block_sparse_moe.experts.100.w1", "model.layers.1.block_sparse_moe.experts.101.w1", "model.layers.1.block_sparse_moe.experts.102.w1", "model.layers.1.block_sparse_moe.experts.103.w1", "model.layers.1.block_sparse_moe.experts.104.w1", "model.layers.1.block_sparse_moe.experts.105.w1", "model.layers.1.block_sparse_moe.experts.106.w1", "model.layers.1.block_sparse_moe.experts.107.w1", "model.layers.1.block_sparse_moe.experts.108.w1", "model.layers.1.block_sparse_moe.experts.109.w1", "model.layers.1.block_sparse_moe.experts.110.w1", "model.layers.1.block_sparse_moe.experts.111.w1", "model.layers.1.block_sparse_moe.experts.112.w1", "model.layers.1.block_sparse_moe.experts.113.w1", "model.layers.1.block_sparse_moe.experts.114.w1", "model.layers.1.block_sparse_moe.experts.115.w1", "model.layers.1.block_sparse_moe.experts.116.w1", "model.layers.1.block_sparse_moe.experts.117.w1", "model.layers.1.block_sparse_moe.experts.118.w1", "model.layers.1.block_sparse_moe.experts.119.w1", "model.layers.1.block_sparse_moe.experts.120.w1", "model.layers.1.block_sparse_moe.experts.121.w1", "model.layers.1.block_sparse_moe.experts.122.w1", "model.layers.1.block_sparse_moe.experts.123.w1", "model.layers.1.block_sparse_moe.experts.124.w1", "model.layers.1.block_sparse_moe.experts.125.w1", "model.layers.1.block_sparse_moe.experts.126.w1", "model.layers.1.block_sparse_moe.experts.127.w1", "model.layers.1.block_sparse_moe.experts.128.w1", "model.layers.1.block_sparse_moe.experts.129.w1", "model.layers.1.block_sparse_moe.experts.130.w1", "model.layers.1.block_sparse_moe.experts.131.w1", "model.layers.1.block_sparse_moe.experts.132.w1", "model.layers.1.block_sparse_moe.experts.133.w1", "model.layers.1.block_sparse_moe.experts.134.w1", "model.layers.1.block_sparse_moe.experts.135.w1", "model.layers.1.block_sparse_moe.experts.136.w1", "model.layers.1.block_sparse_moe.experts.137.w1", "model.layers.1.block_sparse_moe.experts.138.w1", "model.layers.1.block_sparse_moe.experts.139.w1", "model.layers.1.block_sparse_moe.experts.140.w1", "model.layers.1.block_sparse_moe.experts.141.w1", "model.layers.1.block_sparse_moe.experts.142.w1", "model.layers.1.block_sparse_moe.experts.143.w1", "model.layers.1.block_sparse_moe.experts.144.w1", "model.layers.1.block_sparse_moe.experts.145.w1", "model.layers.1.block_sparse_moe.experts.146.w1", "model.layers.1.block_sparse_moe.experts.147.w1", "model.layers.1.block_sparse_moe.experts.148.w1", "model.layers.1.block_sparse_moe.experts.149.w1", "model.layers.1.block_sparse_moe.experts.150.w1", "model.layers.1.block_sparse_moe.experts.151.w1", "model.layers.1.block_sparse_moe.experts.152.w1", "model.layers.1.block_sparse_moe.experts.153.w1", "model.layers.1.block_sparse_moe.experts.154.w1", "model.layers.1.block_sparse_moe.experts.155.w1", "model.layers.1.block_sparse_moe.experts.156.w1", "model.layers.1.block_sparse_moe.experts.157.w1", "model.layers.1.block_sparse_moe.experts.158.w1", "model.layers.1.block_sparse_moe.experts.159.w1", "model.layers.1.block_sparse_moe.experts.160.w1", "model.layers.1.block_sparse_moe.experts.161.w1", "model.layers.1.block_sparse_moe.experts.162.w1", "model.layers.1.block_sparse_moe.experts.163.w1", "model.layers.1.block_sparse_moe.experts.164.w1", "model.layers.1.block_sparse_moe.experts.165.w1", "model.layers.1.block_sparse_moe.experts.166.w1", "model.layers.1.block_sparse_moe.experts.167.w1", "model.layers.1.block_sparse_moe.experts.168.w1", "model.layers.1.block_sparse_moe.experts.169.w1", "model.layers.1.block_sparse_moe.experts.170.w1", "model.layers.1.block_sparse_moe.experts.171.w1", "model.layers.1.block_sparse_moe.experts.172.w1", "model.layers.1.block_sparse_moe.experts.173.w1", "model.layers.1.block_sparse_moe.experts.174.w1", "model.layers.1.block_sparse_moe.experts.175.w1", "model.layers.1.block_sparse_moe.experts.176.w1", "model.layers.1.block_sparse_moe.experts.177.w1", "model.layers.1.block_sparse_moe.experts.178.w1", "model.layers.1.block_sparse_moe.experts.179.w1", "model.layers.1.block_sparse_moe.experts.180.w1", "model.layers.1.block_sparse_moe.experts.181.w1", "model.layers.1.block_sparse_moe.experts.182.w1", "model.layers.1.block_sparse_moe.experts.183.w1", "model.layers.1.block_sparse_moe.experts.184.w1", "model.layers.1.block_sparse_moe.experts.185.w1", "model.layers.1.block_sparse_moe.experts.186.w1", "model.layers.1.block_sparse_moe.experts.187.w1", "model.layers.1.block_sparse_moe.experts.188.w1", "model.layers.1.block_sparse_moe.experts.189.w1", "model.layers.1.block_sparse_moe.experts.190.w1", "model.layers.1.block_sparse_moe.experts.191.w1", "model.layers.1.block_sparse_moe.experts.192.w1", "model.layers.1.block_sparse_moe.experts.193.w1", "model.layers.1.block_sparse_moe.experts.194.w1", "model.layers.1.block_sparse_moe.experts.195.w1", "model.layers.1.block_sparse_moe.experts.196.w1", "model.layers.1.block_sparse_moe.experts.197.w1", "model.layers.1.block_sparse_moe.experts.198.w1", "model.layers.1.block_sparse_moe.experts.199.w1", "model.layers.1.block_sparse_moe.experts.200.w1", "model.layers.1.block_sparse_moe.experts.201.w1", "model.layers.1.block_sparse_moe.experts.202.w1", "model.layers.1.block_sparse_moe.experts.203.w1", "model.layers.1.block_sparse_moe.experts.204.w1", "model.layers.1.block_sparse_moe.experts.205.w1", "model.layers.1.block_sparse_moe.experts.206.w1", "model.layers.1.block_sparse_moe.experts.207.w1", "model.layers.1.block_sparse_moe.experts.208.w1", "model.layers.1.block_sparse_moe.experts.209.w1", "model.layers.1.block_sparse_moe.experts.210.w1", "model.layers.1.block_sparse_moe.experts.211.w1", "model.layers.1.block_sparse_moe.experts.212.w1", "model.layers.1.block_sparse_moe.experts.213.w1", "model.layers.1.block_sparse_moe.experts.214.w1", "model.layers.1.block_sparse_moe.experts.215.w1", "model.layers.1.block_sparse_moe.experts.216.w1", "model.layers.1.block_sparse_moe.experts.217.w1", "model.layers.1.block_sparse_moe.experts.218.w1", "model.layers.1.block_sparse_moe.experts.219.w1", "model.layers.1.block_sparse_moe.experts.220.w1", "model.layers.1.block_sparse_moe.experts.221.w1", "model.layers.1.block_sparse_moe.experts.222.w1", "model.layers.1.block_sparse_moe.experts.223.w1", "model.layers.1.block_sparse_moe.experts.224.w1", "model.layers.1.block_sparse_moe.experts.225.w1", "model.layers.1.block_sparse_moe.experts.226.w1", "model.layers.1.block_sparse_moe.experts.227.w1", "model.layers.1.block_sparse_moe.experts.228.w1", "model.layers.1.block_sparse_moe.experts.229.w1", "model.layers.1.block_sparse_moe.experts.230.w1", "model.layers.1.block_sparse_moe.experts.231.w1", "model.layers.1.block_sparse_moe.experts.232.w1", "model.layers.1.block_sparse_moe.experts.233.w1", "model.layers.1.block_sparse_moe.experts.234.w1", "model.layers.1.block_sparse_moe.experts.235.w1", "model.layers.1.block_sparse_moe.experts.236.w1", "model.layers.1.block_sparse_moe.experts.237.w1", "model.layers.1.block_sparse_moe.experts.238.w1", "model.layers.1.block_sparse_moe.experts.239.w1", "model.layers.1.block_sparse_moe.experts.240.w1", "model.layers.1.block_sparse_moe.experts.241.w1", "model.layers.1.block_sparse_moe.experts.242.w1", "model.layers.1.block_sparse_moe.experts.243.w1", "model.layers.1.block_sparse_moe.experts.244.w1", "model.layers.1.block_sparse_moe.experts.245.w1", "model.layers.1.block_sparse_moe.experts.246.w1", "model.layers.1.block_sparse_moe.experts.247.w1", "model.layers.1.block_sparse_moe.experts.248.w1", "model.layers.1.block_sparse_moe.experts.249.w1", "model.layers.1.block_sparse_moe.experts.250.w1", "model.layers.1.block_sparse_moe.experts.251.w1", "model.layers.1.block_sparse_moe.experts.252.w1", "model.layers.1.block_sparse_moe.experts.253.w1", "model.layers.1.block_sparse_moe.experts.254.w1", "model.layers.1.block_sparse_moe.experts.255.w1", "model.layers.1.block_sparse_moe.experts.0.w3", "model.layers.1.block_sparse_moe.experts.1.w3", "model.layers.1.block_sparse_moe.experts.2.w3", "model.layers.1.block_sparse_moe.experts.3.w3", "model.layers.1.block_sparse_moe.experts.4.w3", "model.layers.1.block_sparse_moe.experts.5.w3", "model.layers.1.block_sparse_moe.experts.6.w3", "model.layers.1.block_sparse_moe.experts.7.w3", "model.layers.1.block_sparse_moe.experts.8.w3", "model.layers.1.block_sparse_moe.experts.9.w3", "model.layers.1.block_sparse_moe.experts.10.w3", "model.layers.1.block_sparse_moe.experts.11.w3", "model.layers.1.block_sparse_moe.experts.12.w3", "model.layers.1.block_sparse_moe.experts.13.w3", "model.layers.1.block_sparse_moe.experts.14.w3", "model.layers.1.block_sparse_moe.experts.15.w3", "model.layers.1.block_sparse_moe.experts.16.w3", "model.layers.1.block_sparse_moe.experts.17.w3", "model.layers.1.block_sparse_moe.experts.18.w3", "model.layers.1.block_sparse_moe.experts.19.w3", "model.layers.1.block_sparse_moe.experts.20.w3", "model.layers.1.block_sparse_moe.experts.21.w3", "model.layers.1.block_sparse_moe.experts.22.w3", "model.layers.1.block_sparse_moe.experts.23.w3", "model.layers.1.block_sparse_moe.experts.24.w3", "model.layers.1.block_sparse_moe.experts.25.w3", "model.layers.1.block_sparse_moe.experts.26.w3", "model.layers.1.block_sparse_moe.experts.27.w3", "model.layers.1.block_sparse_moe.experts.28.w3", "model.layers.1.block_sparse_moe.experts.29.w3", "model.layers.1.block_sparse_moe.experts.30.w3", "model.layers.1.block_sparse_moe.experts.31.w3", "model.layers.1.block_sparse_moe.experts.32.w3", "model.layers.1.block_sparse_moe.experts.33.w3", "model.layers.1.block_sparse_moe.experts.34.w3", "model.layers.1.block_sparse_moe.experts.35.w3", "model.layers.1.block_sparse_moe.experts.36.w3", "model.layers.1.block_sparse_moe.experts.37.w3", "model.layers.1.block_sparse_moe.experts.38.w3", "model.layers.1.block_sparse_moe.experts.39.w3", "model.layers.1.block_sparse_moe.experts.40.w3", "model.layers.1.block_sparse_moe.experts.41.w3", "model.layers.1.block_sparse_moe.experts.42.w3", "model.layers.1.block_sparse_moe.experts.43.w3", "model.layers.1.block_sparse_moe.experts.44.w3", "model.layers.1.block_sparse_moe.experts.45.w3", "model.layers.1.block_sparse_moe.experts.46.w3", "model.layers.1.block_sparse_moe.experts.47.w3", "model.layers.1.block_sparse_moe.experts.48.w3", "model.layers.1.block_sparse_moe.experts.49.w3", "model.layers.1.block_sparse_moe.experts.50.w3", "model.layers.1.block_sparse_moe.experts.51.w3", "model.layers.1.block_sparse_moe.experts.52.w3", "model.layers.1.block_sparse_moe.experts.53.w3", "model.layers.1.block_sparse_moe.experts.54.w3", "model.layers.1.block_sparse_moe.experts.55.w3", "model.layers.1.block_sparse_moe.experts.56.w3", "model.layers.1.block_sparse_moe.experts.57.w3", "model.layers.1.block_sparse_moe.experts.58.w3", "model.layers.1.block_sparse_moe.experts.59.w3", "model.layers.1.block_sparse_moe.experts.60.w3", "model.layers.1.block_sparse_moe.experts.61.w3", "model.layers.1.block_sparse_moe.experts.62.w3", "model.layers.1.block_sparse_moe.experts.63.w3", "model.layers.1.block_sparse_moe.experts.64.w3", "model.layers.1.block_sparse_moe.experts.65.w3", "model.layers.1.block_sparse_moe.experts.66.w3", "model.layers.1.block_sparse_moe.experts.67.w3", "model.layers.1.block_sparse_moe.experts.68.w3", "model.layers.1.block_sparse_moe.experts.69.w3", "model.layers.1.block_sparse_moe.experts.70.w3", "model.layers.1.block_sparse_moe.experts.71.w3", "model.layers.1.block_sparse_moe.experts.72.w3", "model.layers.1.block_sparse_moe.experts.73.w3", "model.layers.1.block_sparse_moe.experts.74.w3", "model.layers.1.block_sparse_moe.experts.75.w3", "model.layers.1.block_sparse_moe.experts.76.w3", "model.layers.1.block_sparse_moe.experts.77.w3", "model.layers.1.block_sparse_moe.experts.78.w3", "model.layers.1.block_sparse_moe.experts.79.w3", "model.layers.1.block_sparse_moe.experts.80.w3", "model.layers.1.block_sparse_moe.experts.81.w3", "model.layers.1.block_sparse_moe.experts.82.w3", "model.layers.1.block_sparse_moe.experts.83.w3", "model.layers.1.block_sparse_moe.experts.84.w3", "model.layers.1.block_sparse_moe.experts.85.w3", "model.layers.1.block_sparse_moe.experts.86.w3", "model.layers.1.block_sparse_moe.experts.87.w3", "model.layers.1.block_sparse_moe.experts.88.w3", "model.layers.1.block_sparse_moe.experts.89.w3", "model.layers.1.block_sparse_moe.experts.90.w3", "model.layers.1.block_sparse_moe.experts.91.w3", "model.layers.1.block_sparse_moe.experts.92.w3", "model.layers.1.block_sparse_moe.experts.93.w3", "model.layers.1.block_sparse_moe.experts.94.w3", "model.layers.1.block_sparse_moe.experts.95.w3", "model.layers.1.block_sparse_moe.experts.96.w3", "model.layers.1.block_sparse_moe.experts.97.w3", "model.layers.1.block_sparse_moe.experts.98.w3", "model.layers.1.block_sparse_moe.experts.99.w3", "model.layers.1.block_sparse_moe.experts.100.w3", "model.layers.1.block_sparse_moe.experts.101.w3", "model.layers.1.block_sparse_moe.experts.102.w3", "model.layers.1.block_sparse_moe.experts.103.w3", "model.layers.1.block_sparse_moe.experts.104.w3", "model.layers.1.block_sparse_moe.experts.105.w3", "model.layers.1.block_sparse_moe.experts.106.w3", "model.layers.1.block_sparse_moe.experts.107.w3", "model.layers.1.block_sparse_moe.experts.108.w3", "model.layers.1.block_sparse_moe.experts.109.w3", "model.layers.1.block_sparse_moe.experts.110.w3", "model.layers.1.block_sparse_moe.experts.111.w3", "model.layers.1.block_sparse_moe.experts.112.w3", "model.layers.1.block_sparse_moe.experts.113.w3", "model.layers.1.block_sparse_moe.experts.114.w3", "model.layers.1.block_sparse_moe.experts.115.w3", "model.layers.1.block_sparse_moe.experts.116.w3", "model.layers.1.block_sparse_moe.experts.117.w3", "model.layers.1.block_sparse_moe.experts.118.w3", "model.layers.1.block_sparse_moe.experts.119.w3", "model.layers.1.block_sparse_moe.experts.120.w3", "model.layers.1.block_sparse_moe.experts.121.w3", "model.layers.1.block_sparse_moe.experts.122.w3", "model.layers.1.block_sparse_moe.experts.123.w3", "model.layers.1.block_sparse_moe.experts.124.w3", "model.layers.1.block_sparse_moe.experts.125.w3", "model.layers.1.block_sparse_moe.experts.126.w3", "model.layers.1.block_sparse_moe.experts.127.w3", "model.layers.1.block_sparse_moe.experts.128.w3", "model.layers.1.block_sparse_moe.experts.129.w3", "model.layers.1.block_sparse_moe.experts.130.w3", "model.layers.1.block_sparse_moe.experts.131.w3", "model.layers.1.block_sparse_moe.experts.132.w3", "model.layers.1.block_sparse_moe.experts.133.w3", "model.layers.1.block_sparse_moe.experts.134.w3", "model.layers.1.block_sparse_moe.experts.135.w3", "model.layers.1.block_sparse_moe.experts.136.w3", "model.layers.1.block_sparse_moe.experts.137.w3", "model.layers.1.block_sparse_moe.experts.138.w3", "model.layers.1.block_sparse_moe.experts.139.w3", "model.layers.1.block_sparse_moe.experts.140.w3", "model.layers.1.block_sparse_moe.experts.141.w3", "model.layers.1.block_sparse_moe.experts.142.w3", "model.layers.1.block_sparse_moe.experts.143.w3", "model.layers.1.block_sparse_moe.experts.144.w3", "model.layers.1.block_sparse_moe.experts.145.w3", "model.layers.1.block_sparse_moe.experts.146.w3", "model.layers.1.block_sparse_moe.experts.147.w3", "model.layers.1.block_sparse_moe.experts.148.w3", "model.layers.1.block_sparse_moe.experts.149.w3", "model.layers.1.block_sparse_moe.experts.150.w3", "model.layers.1.block_sparse_moe.experts.151.w3", "model.layers.1.block_sparse_moe.experts.152.w3", "model.layers.1.block_sparse_moe.experts.153.w3", "model.layers.1.block_sparse_moe.experts.154.w3", "model.layers.1.block_sparse_moe.experts.155.w3", "model.layers.1.block_sparse_moe.experts.156.w3", "model.layers.1.block_sparse_moe.experts.157.w3", "model.layers.1.block_sparse_moe.experts.158.w3", "model.layers.1.block_sparse_moe.experts.159.w3", "model.layers.1.block_sparse_moe.experts.160.w3", "model.layers.1.block_sparse_moe.experts.161.w3", "model.layers.1.block_sparse_moe.experts.162.w3", "model.layers.1.block_sparse_moe.experts.163.w3", "model.layers.1.block_sparse_moe.experts.164.w3", "model.layers.1.block_sparse_moe.experts.165.w3", "model.layers.1.block_sparse_moe.experts.166.w3", "model.layers.1.block_sparse_moe.experts.167.w3", "model.layers.1.block_sparse_moe.experts.168.w3", "model.layers.1.block_sparse_moe.experts.169.w3", "model.layers.1.block_sparse_moe.experts.170.w3", "model.layers.1.block_sparse_moe.experts.171.w3", "model.layers.1.block_sparse_moe.experts.172.w3", "model.layers.1.block_sparse_moe.experts.173.w3", "model.layers.1.block_sparse_moe.experts.174.w3", "model.layers.1.block_sparse_moe.experts.175.w3", "model.layers.1.block_sparse_moe.experts.176.w3", "model.layers.1.block_sparse_moe.experts.177.w3", "model.layers.1.block_sparse_moe.experts.178.w3", "model.layers.1.block_sparse_moe.experts.179.w3", "model.layers.1.block_sparse_moe.experts.180.w3", "model.layers.1.block_sparse_moe.experts.181.w3", "model.layers.1.block_sparse_moe.experts.182.w3", "model.layers.1.block_sparse_moe.experts.183.w3", "model.layers.1.block_sparse_moe.experts.184.w3", "model.layers.1.block_sparse_moe.experts.185.w3", "model.layers.1.block_sparse_moe.experts.186.w3", "model.layers.1.block_sparse_moe.experts.187.w3", "model.layers.1.block_sparse_moe.experts.188.w3", "model.layers.1.block_sparse_moe.experts.189.w3", "model.layers.1.block_sparse_moe.experts.190.w3", "model.layers.1.block_sparse_moe.experts.191.w3", "model.layers.1.block_sparse_moe.experts.192.w3", "model.layers.1.block_sparse_moe.experts.193.w3", "model.layers.1.block_sparse_moe.experts.194.w3", "model.layers.1.block_sparse_moe.experts.195.w3", "model.layers.1.block_sparse_moe.experts.196.w3", "model.layers.1.block_sparse_moe.experts.197.w3", "model.layers.1.block_sparse_moe.experts.198.w3", "model.layers.1.block_sparse_moe.experts.199.w3", "model.layers.1.block_sparse_moe.experts.200.w3", "model.layers.1.block_sparse_moe.experts.201.w3", "model.layers.1.block_sparse_moe.experts.202.w3", "model.layers.1.block_sparse_moe.experts.203.w3", "model.layers.1.block_sparse_moe.experts.204.w3", "model.layers.1.block_sparse_moe.experts.205.w3", "model.layers.1.block_sparse_moe.experts.206.w3", "model.layers.1.block_sparse_moe.experts.207.w3", "model.layers.1.block_sparse_moe.experts.208.w3", "model.layers.1.block_sparse_moe.experts.209.w3", "model.layers.1.block_sparse_moe.experts.210.w3", "model.layers.1.block_sparse_moe.experts.211.w3", "model.layers.1.block_sparse_moe.experts.212.w3", "model.layers.1.block_sparse_moe.experts.213.w3", "model.layers.1.block_sparse_moe.experts.214.w3", "model.layers.1.block_sparse_moe.experts.215.w3", "model.layers.1.block_sparse_moe.experts.216.w3", "model.layers.1.block_sparse_moe.experts.217.w3", "model.layers.1.block_sparse_moe.experts.218.w3", "model.layers.1.block_sparse_moe.experts.219.w3", "model.layers.1.block_sparse_moe.experts.220.w3", "model.layers.1.block_sparse_moe.experts.221.w3", "model.layers.1.block_sparse_moe.experts.222.w3", "model.layers.1.block_sparse_moe.experts.223.w3", "model.layers.1.block_sparse_moe.experts.224.w3", "model.layers.1.block_sparse_moe.experts.225.w3", "model.layers.1.block_sparse_moe.experts.226.w3", "model.layers.1.block_sparse_moe.experts.227.w3", "model.layers.1.block_sparse_moe.experts.228.w3", "model.layers.1.block_sparse_moe.experts.229.w3", "model.layers.1.block_sparse_moe.experts.230.w3", "model.layers.1.block_sparse_moe.experts.231.w3", "model.layers.1.block_sparse_moe.experts.232.w3", "model.layers.1.block_sparse_moe.experts.233.w3", "model.layers.1.block_sparse_moe.experts.234.w3", "model.layers.1.block_sparse_moe.experts.235.w3", "model.layers.1.block_sparse_moe.experts.236.w3", "model.layers.1.block_sparse_moe.experts.237.w3", "model.layers.1.block_sparse_moe.experts.238.w3", "model.layers.1.block_sparse_moe.experts.239.w3", "model.layers.1.block_sparse_moe.experts.240.w3", "model.layers.1.block_sparse_moe.experts.241.w3", "model.layers.1.block_sparse_moe.experts.242.w3", "model.layers.1.block_sparse_moe.experts.243.w3", "model.layers.1.block_sparse_moe.experts.244.w3", "model.layers.1.block_sparse_moe.experts.245.w3", "model.layers.1.block_sparse_moe.experts.246.w3", "model.layers.1.block_sparse_moe.experts.247.w3", "model.layers.1.block_sparse_moe.experts.248.w3", "model.layers.1.block_sparse_moe.experts.249.w3", "model.layers.1.block_sparse_moe.experts.250.w3", "model.layers.1.block_sparse_moe.experts.251.w3", "model.layers.1.block_sparse_moe.experts.252.w3", "model.layers.1.block_sparse_moe.experts.253.w3", "model.layers.1.block_sparse_moe.experts.254.w3", "model.layers.1.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.004398413747549068, "dbits": 2415919104 } ] }, { "idx": 9, "layers": [ "model.layers.1.block_sparse_moe.experts.0.w2", "model.layers.1.block_sparse_moe.experts.1.w2", "model.layers.1.block_sparse_moe.experts.2.w2", "model.layers.1.block_sparse_moe.experts.3.w2", "model.layers.1.block_sparse_moe.experts.4.w2", "model.layers.1.block_sparse_moe.experts.5.w2", "model.layers.1.block_sparse_moe.experts.6.w2", "model.layers.1.block_sparse_moe.experts.7.w2", "model.layers.1.block_sparse_moe.experts.8.w2", "model.layers.1.block_sparse_moe.experts.9.w2", "model.layers.1.block_sparse_moe.experts.10.w2", "model.layers.1.block_sparse_moe.experts.11.w2", "model.layers.1.block_sparse_moe.experts.12.w2", "model.layers.1.block_sparse_moe.experts.13.w2", "model.layers.1.block_sparse_moe.experts.14.w2", "model.layers.1.block_sparse_moe.experts.15.w2", "model.layers.1.block_sparse_moe.experts.16.w2", "model.layers.1.block_sparse_moe.experts.17.w2", "model.layers.1.block_sparse_moe.experts.18.w2", "model.layers.1.block_sparse_moe.experts.19.w2", "model.layers.1.block_sparse_moe.experts.20.w2", "model.layers.1.block_sparse_moe.experts.21.w2", "model.layers.1.block_sparse_moe.experts.22.w2", "model.layers.1.block_sparse_moe.experts.23.w2", "model.layers.1.block_sparse_moe.experts.24.w2", "model.layers.1.block_sparse_moe.experts.25.w2", "model.layers.1.block_sparse_moe.experts.26.w2", "model.layers.1.block_sparse_moe.experts.27.w2", "model.layers.1.block_sparse_moe.experts.28.w2", "model.layers.1.block_sparse_moe.experts.29.w2", "model.layers.1.block_sparse_moe.experts.30.w2", "model.layers.1.block_sparse_moe.experts.31.w2", "model.layers.1.block_sparse_moe.experts.32.w2", "model.layers.1.block_sparse_moe.experts.33.w2", "model.layers.1.block_sparse_moe.experts.34.w2", "model.layers.1.block_sparse_moe.experts.35.w2", "model.layers.1.block_sparse_moe.experts.36.w2", "model.layers.1.block_sparse_moe.experts.37.w2", "model.layers.1.block_sparse_moe.experts.38.w2", "model.layers.1.block_sparse_moe.experts.39.w2", "model.layers.1.block_sparse_moe.experts.40.w2", "model.layers.1.block_sparse_moe.experts.41.w2", "model.layers.1.block_sparse_moe.experts.42.w2", "model.layers.1.block_sparse_moe.experts.43.w2", "model.layers.1.block_sparse_moe.experts.44.w2", "model.layers.1.block_sparse_moe.experts.45.w2", "model.layers.1.block_sparse_moe.experts.46.w2", "model.layers.1.block_sparse_moe.experts.47.w2", "model.layers.1.block_sparse_moe.experts.48.w2", "model.layers.1.block_sparse_moe.experts.49.w2", "model.layers.1.block_sparse_moe.experts.50.w2", "model.layers.1.block_sparse_moe.experts.51.w2", "model.layers.1.block_sparse_moe.experts.52.w2", "model.layers.1.block_sparse_moe.experts.53.w2", "model.layers.1.block_sparse_moe.experts.54.w2", "model.layers.1.block_sparse_moe.experts.55.w2", "model.layers.1.block_sparse_moe.experts.56.w2", "model.layers.1.block_sparse_moe.experts.57.w2", "model.layers.1.block_sparse_moe.experts.58.w2", "model.layers.1.block_sparse_moe.experts.59.w2", "model.layers.1.block_sparse_moe.experts.60.w2", "model.layers.1.block_sparse_moe.experts.61.w2", "model.layers.1.block_sparse_moe.experts.62.w2", "model.layers.1.block_sparse_moe.experts.63.w2", "model.layers.1.block_sparse_moe.experts.64.w2", "model.layers.1.block_sparse_moe.experts.65.w2", "model.layers.1.block_sparse_moe.experts.66.w2", "model.layers.1.block_sparse_moe.experts.67.w2", "model.layers.1.block_sparse_moe.experts.68.w2", "model.layers.1.block_sparse_moe.experts.69.w2", "model.layers.1.block_sparse_moe.experts.70.w2", "model.layers.1.block_sparse_moe.experts.71.w2", "model.layers.1.block_sparse_moe.experts.72.w2", "model.layers.1.block_sparse_moe.experts.73.w2", "model.layers.1.block_sparse_moe.experts.74.w2", "model.layers.1.block_sparse_moe.experts.75.w2", "model.layers.1.block_sparse_moe.experts.76.w2", "model.layers.1.block_sparse_moe.experts.77.w2", "model.layers.1.block_sparse_moe.experts.78.w2", "model.layers.1.block_sparse_moe.experts.79.w2", "model.layers.1.block_sparse_moe.experts.80.w2", "model.layers.1.block_sparse_moe.experts.81.w2", "model.layers.1.block_sparse_moe.experts.82.w2", "model.layers.1.block_sparse_moe.experts.83.w2", "model.layers.1.block_sparse_moe.experts.84.w2", "model.layers.1.block_sparse_moe.experts.85.w2", "model.layers.1.block_sparse_moe.experts.86.w2", "model.layers.1.block_sparse_moe.experts.87.w2", "model.layers.1.block_sparse_moe.experts.88.w2", "model.layers.1.block_sparse_moe.experts.89.w2", "model.layers.1.block_sparse_moe.experts.90.w2", "model.layers.1.block_sparse_moe.experts.91.w2", "model.layers.1.block_sparse_moe.experts.92.w2", "model.layers.1.block_sparse_moe.experts.93.w2", "model.layers.1.block_sparse_moe.experts.94.w2", "model.layers.1.block_sparse_moe.experts.95.w2", "model.layers.1.block_sparse_moe.experts.96.w2", "model.layers.1.block_sparse_moe.experts.97.w2", "model.layers.1.block_sparse_moe.experts.98.w2", "model.layers.1.block_sparse_moe.experts.99.w2", "model.layers.1.block_sparse_moe.experts.100.w2", "model.layers.1.block_sparse_moe.experts.101.w2", "model.layers.1.block_sparse_moe.experts.102.w2", "model.layers.1.block_sparse_moe.experts.103.w2", "model.layers.1.block_sparse_moe.experts.104.w2", "model.layers.1.block_sparse_moe.experts.105.w2", "model.layers.1.block_sparse_moe.experts.106.w2", "model.layers.1.block_sparse_moe.experts.107.w2", "model.layers.1.block_sparse_moe.experts.108.w2", "model.layers.1.block_sparse_moe.experts.109.w2", "model.layers.1.block_sparse_moe.experts.110.w2", "model.layers.1.block_sparse_moe.experts.111.w2", "model.layers.1.block_sparse_moe.experts.112.w2", "model.layers.1.block_sparse_moe.experts.113.w2", "model.layers.1.block_sparse_moe.experts.114.w2", "model.layers.1.block_sparse_moe.experts.115.w2", "model.layers.1.block_sparse_moe.experts.116.w2", "model.layers.1.block_sparse_moe.experts.117.w2", "model.layers.1.block_sparse_moe.experts.118.w2", "model.layers.1.block_sparse_moe.experts.119.w2", "model.layers.1.block_sparse_moe.experts.120.w2", "model.layers.1.block_sparse_moe.experts.121.w2", "model.layers.1.block_sparse_moe.experts.122.w2", "model.layers.1.block_sparse_moe.experts.123.w2", "model.layers.1.block_sparse_moe.experts.124.w2", "model.layers.1.block_sparse_moe.experts.125.w2", "model.layers.1.block_sparse_moe.experts.126.w2", "model.layers.1.block_sparse_moe.experts.127.w2", "model.layers.1.block_sparse_moe.experts.128.w2", "model.layers.1.block_sparse_moe.experts.129.w2", "model.layers.1.block_sparse_moe.experts.130.w2", "model.layers.1.block_sparse_moe.experts.131.w2", "model.layers.1.block_sparse_moe.experts.132.w2", "model.layers.1.block_sparse_moe.experts.133.w2", "model.layers.1.block_sparse_moe.experts.134.w2", "model.layers.1.block_sparse_moe.experts.135.w2", "model.layers.1.block_sparse_moe.experts.136.w2", "model.layers.1.block_sparse_moe.experts.137.w2", "model.layers.1.block_sparse_moe.experts.138.w2", "model.layers.1.block_sparse_moe.experts.139.w2", "model.layers.1.block_sparse_moe.experts.140.w2", "model.layers.1.block_sparse_moe.experts.141.w2", "model.layers.1.block_sparse_moe.experts.142.w2", "model.layers.1.block_sparse_moe.experts.143.w2", "model.layers.1.block_sparse_moe.experts.144.w2", "model.layers.1.block_sparse_moe.experts.145.w2", "model.layers.1.block_sparse_moe.experts.146.w2", "model.layers.1.block_sparse_moe.experts.147.w2", "model.layers.1.block_sparse_moe.experts.148.w2", "model.layers.1.block_sparse_moe.experts.149.w2", "model.layers.1.block_sparse_moe.experts.150.w2", "model.layers.1.block_sparse_moe.experts.151.w2", "model.layers.1.block_sparse_moe.experts.152.w2", "model.layers.1.block_sparse_moe.experts.153.w2", "model.layers.1.block_sparse_moe.experts.154.w2", "model.layers.1.block_sparse_moe.experts.155.w2", "model.layers.1.block_sparse_moe.experts.156.w2", "model.layers.1.block_sparse_moe.experts.157.w2", "model.layers.1.block_sparse_moe.experts.158.w2", "model.layers.1.block_sparse_moe.experts.159.w2", "model.layers.1.block_sparse_moe.experts.160.w2", "model.layers.1.block_sparse_moe.experts.161.w2", "model.layers.1.block_sparse_moe.experts.162.w2", "model.layers.1.block_sparse_moe.experts.163.w2", "model.layers.1.block_sparse_moe.experts.164.w2", "model.layers.1.block_sparse_moe.experts.165.w2", "model.layers.1.block_sparse_moe.experts.166.w2", "model.layers.1.block_sparse_moe.experts.167.w2", "model.layers.1.block_sparse_moe.experts.168.w2", "model.layers.1.block_sparse_moe.experts.169.w2", "model.layers.1.block_sparse_moe.experts.170.w2", "model.layers.1.block_sparse_moe.experts.171.w2", "model.layers.1.block_sparse_moe.experts.172.w2", "model.layers.1.block_sparse_moe.experts.173.w2", "model.layers.1.block_sparse_moe.experts.174.w2", "model.layers.1.block_sparse_moe.experts.175.w2", "model.layers.1.block_sparse_moe.experts.176.w2", "model.layers.1.block_sparse_moe.experts.177.w2", "model.layers.1.block_sparse_moe.experts.178.w2", "model.layers.1.block_sparse_moe.experts.179.w2", "model.layers.1.block_sparse_moe.experts.180.w2", "model.layers.1.block_sparse_moe.experts.181.w2", "model.layers.1.block_sparse_moe.experts.182.w2", "model.layers.1.block_sparse_moe.experts.183.w2", "model.layers.1.block_sparse_moe.experts.184.w2", "model.layers.1.block_sparse_moe.experts.185.w2", "model.layers.1.block_sparse_moe.experts.186.w2", "model.layers.1.block_sparse_moe.experts.187.w2", "model.layers.1.block_sparse_moe.experts.188.w2", "model.layers.1.block_sparse_moe.experts.189.w2", "model.layers.1.block_sparse_moe.experts.190.w2", "model.layers.1.block_sparse_moe.experts.191.w2", "model.layers.1.block_sparse_moe.experts.192.w2", "model.layers.1.block_sparse_moe.experts.193.w2", "model.layers.1.block_sparse_moe.experts.194.w2", "model.layers.1.block_sparse_moe.experts.195.w2", "model.layers.1.block_sparse_moe.experts.196.w2", "model.layers.1.block_sparse_moe.experts.197.w2", "model.layers.1.block_sparse_moe.experts.198.w2", "model.layers.1.block_sparse_moe.experts.199.w2", "model.layers.1.block_sparse_moe.experts.200.w2", "model.layers.1.block_sparse_moe.experts.201.w2", "model.layers.1.block_sparse_moe.experts.202.w2", "model.layers.1.block_sparse_moe.experts.203.w2", "model.layers.1.block_sparse_moe.experts.204.w2", "model.layers.1.block_sparse_moe.experts.205.w2", "model.layers.1.block_sparse_moe.experts.206.w2", "model.layers.1.block_sparse_moe.experts.207.w2", "model.layers.1.block_sparse_moe.experts.208.w2", "model.layers.1.block_sparse_moe.experts.209.w2", "model.layers.1.block_sparse_moe.experts.210.w2", "model.layers.1.block_sparse_moe.experts.211.w2", "model.layers.1.block_sparse_moe.experts.212.w2", "model.layers.1.block_sparse_moe.experts.213.w2", "model.layers.1.block_sparse_moe.experts.214.w2", "model.layers.1.block_sparse_moe.experts.215.w2", "model.layers.1.block_sparse_moe.experts.216.w2", "model.layers.1.block_sparse_moe.experts.217.w2", "model.layers.1.block_sparse_moe.experts.218.w2", "model.layers.1.block_sparse_moe.experts.219.w2", "model.layers.1.block_sparse_moe.experts.220.w2", "model.layers.1.block_sparse_moe.experts.221.w2", "model.layers.1.block_sparse_moe.experts.222.w2", "model.layers.1.block_sparse_moe.experts.223.w2", "model.layers.1.block_sparse_moe.experts.224.w2", "model.layers.1.block_sparse_moe.experts.225.w2", "model.layers.1.block_sparse_moe.experts.226.w2", "model.layers.1.block_sparse_moe.experts.227.w2", "model.layers.1.block_sparse_moe.experts.228.w2", "model.layers.1.block_sparse_moe.experts.229.w2", "model.layers.1.block_sparse_moe.experts.230.w2", "model.layers.1.block_sparse_moe.experts.231.w2", "model.layers.1.block_sparse_moe.experts.232.w2", "model.layers.1.block_sparse_moe.experts.233.w2", "model.layers.1.block_sparse_moe.experts.234.w2", "model.layers.1.block_sparse_moe.experts.235.w2", "model.layers.1.block_sparse_moe.experts.236.w2", "model.layers.1.block_sparse_moe.experts.237.w2", "model.layers.1.block_sparse_moe.experts.238.w2", "model.layers.1.block_sparse_moe.experts.239.w2", "model.layers.1.block_sparse_moe.experts.240.w2", "model.layers.1.block_sparse_moe.experts.241.w2", "model.layers.1.block_sparse_moe.experts.242.w2", "model.layers.1.block_sparse_moe.experts.243.w2", "model.layers.1.block_sparse_moe.experts.244.w2", "model.layers.1.block_sparse_moe.experts.245.w2", "model.layers.1.block_sparse_moe.experts.246.w2", "model.layers.1.block_sparse_moe.experts.247.w2", "model.layers.1.block_sparse_moe.experts.248.w2", "model.layers.1.block_sparse_moe.experts.249.w2", "model.layers.1.block_sparse_moe.experts.250.w2", "model.layers.1.block_sparse_moe.experts.251.w2", "model.layers.1.block_sparse_moe.experts.252.w2", "model.layers.1.block_sparse_moe.experts.253.w2", "model.layers.1.block_sparse_moe.experts.254.w2", "model.layers.1.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.002662316337227799, "dbits": 1207959552 } ] }, { "idx": 10, "layers": [ "model.layers.2.self_attn.q_proj" ], "candidates": [ { "dkld": 3.47774475812912e-05, "dbits": 18874368 } ] }, { "idx": 11, "layers": [ "model.layers.2.self_attn.k_proj", "model.layers.2.self_attn.v_proj" ], "candidates": [ { "dkld": -0.02448230534791948, "dbits": 6291456 } ] }, { "idx": 12, "layers": [ "model.layers.2.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0014198325574398263, "dbits": 18874368 } ] }, { "idx": 13, "layers": [ "model.layers.2.block_sparse_moe.experts.0.w1", "model.layers.2.block_sparse_moe.experts.1.w1", "model.layers.2.block_sparse_moe.experts.2.w1", "model.layers.2.block_sparse_moe.experts.3.w1", "model.layers.2.block_sparse_moe.experts.4.w1", "model.layers.2.block_sparse_moe.experts.5.w1", "model.layers.2.block_sparse_moe.experts.6.w1", "model.layers.2.block_sparse_moe.experts.7.w1", "model.layers.2.block_sparse_moe.experts.8.w1", "model.layers.2.block_sparse_moe.experts.9.w1", "model.layers.2.block_sparse_moe.experts.10.w1", "model.layers.2.block_sparse_moe.experts.11.w1", "model.layers.2.block_sparse_moe.experts.12.w1", "model.layers.2.block_sparse_moe.experts.13.w1", "model.layers.2.block_sparse_moe.experts.14.w1", "model.layers.2.block_sparse_moe.experts.15.w1", "model.layers.2.block_sparse_moe.experts.16.w1", "model.layers.2.block_sparse_moe.experts.17.w1", "model.layers.2.block_sparse_moe.experts.18.w1", "model.layers.2.block_sparse_moe.experts.19.w1", "model.layers.2.block_sparse_moe.experts.20.w1", "model.layers.2.block_sparse_moe.experts.21.w1", "model.layers.2.block_sparse_moe.experts.22.w1", "model.layers.2.block_sparse_moe.experts.23.w1", "model.layers.2.block_sparse_moe.experts.24.w1", "model.layers.2.block_sparse_moe.experts.25.w1", "model.layers.2.block_sparse_moe.experts.26.w1", "model.layers.2.block_sparse_moe.experts.27.w1", "model.layers.2.block_sparse_moe.experts.28.w1", "model.layers.2.block_sparse_moe.experts.29.w1", "model.layers.2.block_sparse_moe.experts.30.w1", "model.layers.2.block_sparse_moe.experts.31.w1", "model.layers.2.block_sparse_moe.experts.32.w1", "model.layers.2.block_sparse_moe.experts.33.w1", "model.layers.2.block_sparse_moe.experts.34.w1", "model.layers.2.block_sparse_moe.experts.35.w1", "model.layers.2.block_sparse_moe.experts.36.w1", "model.layers.2.block_sparse_moe.experts.37.w1", "model.layers.2.block_sparse_moe.experts.38.w1", "model.layers.2.block_sparse_moe.experts.39.w1", "model.layers.2.block_sparse_moe.experts.40.w1", "model.layers.2.block_sparse_moe.experts.41.w1", "model.layers.2.block_sparse_moe.experts.42.w1", "model.layers.2.block_sparse_moe.experts.43.w1", "model.layers.2.block_sparse_moe.experts.44.w1", "model.layers.2.block_sparse_moe.experts.45.w1", "model.layers.2.block_sparse_moe.experts.46.w1", "model.layers.2.block_sparse_moe.experts.47.w1", "model.layers.2.block_sparse_moe.experts.48.w1", "model.layers.2.block_sparse_moe.experts.49.w1", "model.layers.2.block_sparse_moe.experts.50.w1", "model.layers.2.block_sparse_moe.experts.51.w1", "model.layers.2.block_sparse_moe.experts.52.w1", "model.layers.2.block_sparse_moe.experts.53.w1", "model.layers.2.block_sparse_moe.experts.54.w1", "model.layers.2.block_sparse_moe.experts.55.w1", "model.layers.2.block_sparse_moe.experts.56.w1", "model.layers.2.block_sparse_moe.experts.57.w1", "model.layers.2.block_sparse_moe.experts.58.w1", "model.layers.2.block_sparse_moe.experts.59.w1", "model.layers.2.block_sparse_moe.experts.60.w1", "model.layers.2.block_sparse_moe.experts.61.w1", "model.layers.2.block_sparse_moe.experts.62.w1", "model.layers.2.block_sparse_moe.experts.63.w1", "model.layers.2.block_sparse_moe.experts.64.w1", "model.layers.2.block_sparse_moe.experts.65.w1", "model.layers.2.block_sparse_moe.experts.66.w1", "model.layers.2.block_sparse_moe.experts.67.w1", "model.layers.2.block_sparse_moe.experts.68.w1", "model.layers.2.block_sparse_moe.experts.69.w1", "model.layers.2.block_sparse_moe.experts.70.w1", "model.layers.2.block_sparse_moe.experts.71.w1", "model.layers.2.block_sparse_moe.experts.72.w1", "model.layers.2.block_sparse_moe.experts.73.w1", "model.layers.2.block_sparse_moe.experts.74.w1", "model.layers.2.block_sparse_moe.experts.75.w1", "model.layers.2.block_sparse_moe.experts.76.w1", "model.layers.2.block_sparse_moe.experts.77.w1", "model.layers.2.block_sparse_moe.experts.78.w1", "model.layers.2.block_sparse_moe.experts.79.w1", "model.layers.2.block_sparse_moe.experts.80.w1", "model.layers.2.block_sparse_moe.experts.81.w1", "model.layers.2.block_sparse_moe.experts.82.w1", "model.layers.2.block_sparse_moe.experts.83.w1", "model.layers.2.block_sparse_moe.experts.84.w1", "model.layers.2.block_sparse_moe.experts.85.w1", "model.layers.2.block_sparse_moe.experts.86.w1", "model.layers.2.block_sparse_moe.experts.87.w1", "model.layers.2.block_sparse_moe.experts.88.w1", "model.layers.2.block_sparse_moe.experts.89.w1", "model.layers.2.block_sparse_moe.experts.90.w1", "model.layers.2.block_sparse_moe.experts.91.w1", "model.layers.2.block_sparse_moe.experts.92.w1", "model.layers.2.block_sparse_moe.experts.93.w1", "model.layers.2.block_sparse_moe.experts.94.w1", "model.layers.2.block_sparse_moe.experts.95.w1", "model.layers.2.block_sparse_moe.experts.96.w1", "model.layers.2.block_sparse_moe.experts.97.w1", "model.layers.2.block_sparse_moe.experts.98.w1", "model.layers.2.block_sparse_moe.experts.99.w1", "model.layers.2.block_sparse_moe.experts.100.w1", "model.layers.2.block_sparse_moe.experts.101.w1", "model.layers.2.block_sparse_moe.experts.102.w1", "model.layers.2.block_sparse_moe.experts.103.w1", "model.layers.2.block_sparse_moe.experts.104.w1", "model.layers.2.block_sparse_moe.experts.105.w1", "model.layers.2.block_sparse_moe.experts.106.w1", "model.layers.2.block_sparse_moe.experts.107.w1", "model.layers.2.block_sparse_moe.experts.108.w1", "model.layers.2.block_sparse_moe.experts.109.w1", "model.layers.2.block_sparse_moe.experts.110.w1", "model.layers.2.block_sparse_moe.experts.111.w1", "model.layers.2.block_sparse_moe.experts.112.w1", "model.layers.2.block_sparse_moe.experts.113.w1", "model.layers.2.block_sparse_moe.experts.114.w1", "model.layers.2.block_sparse_moe.experts.115.w1", "model.layers.2.block_sparse_moe.experts.116.w1", "model.layers.2.block_sparse_moe.experts.117.w1", "model.layers.2.block_sparse_moe.experts.118.w1", "model.layers.2.block_sparse_moe.experts.119.w1", "model.layers.2.block_sparse_moe.experts.120.w1", "model.layers.2.block_sparse_moe.experts.121.w1", "model.layers.2.block_sparse_moe.experts.122.w1", "model.layers.2.block_sparse_moe.experts.123.w1", "model.layers.2.block_sparse_moe.experts.124.w1", "model.layers.2.block_sparse_moe.experts.125.w1", "model.layers.2.block_sparse_moe.experts.126.w1", "model.layers.2.block_sparse_moe.experts.127.w1", "model.layers.2.block_sparse_moe.experts.128.w1", "model.layers.2.block_sparse_moe.experts.129.w1", "model.layers.2.block_sparse_moe.experts.130.w1", "model.layers.2.block_sparse_moe.experts.131.w1", "model.layers.2.block_sparse_moe.experts.132.w1", "model.layers.2.block_sparse_moe.experts.133.w1", "model.layers.2.block_sparse_moe.experts.134.w1", "model.layers.2.block_sparse_moe.experts.135.w1", "model.layers.2.block_sparse_moe.experts.136.w1", "model.layers.2.block_sparse_moe.experts.137.w1", "model.layers.2.block_sparse_moe.experts.138.w1", "model.layers.2.block_sparse_moe.experts.139.w1", "model.layers.2.block_sparse_moe.experts.140.w1", "model.layers.2.block_sparse_moe.experts.141.w1", "model.layers.2.block_sparse_moe.experts.142.w1", "model.layers.2.block_sparse_moe.experts.143.w1", "model.layers.2.block_sparse_moe.experts.144.w1", "model.layers.2.block_sparse_moe.experts.145.w1", "model.layers.2.block_sparse_moe.experts.146.w1", "model.layers.2.block_sparse_moe.experts.147.w1", "model.layers.2.block_sparse_moe.experts.148.w1", "model.layers.2.block_sparse_moe.experts.149.w1", "model.layers.2.block_sparse_moe.experts.150.w1", "model.layers.2.block_sparse_moe.experts.151.w1", "model.layers.2.block_sparse_moe.experts.152.w1", "model.layers.2.block_sparse_moe.experts.153.w1", "model.layers.2.block_sparse_moe.experts.154.w1", "model.layers.2.block_sparse_moe.experts.155.w1", "model.layers.2.block_sparse_moe.experts.156.w1", "model.layers.2.block_sparse_moe.experts.157.w1", "model.layers.2.block_sparse_moe.experts.158.w1", "model.layers.2.block_sparse_moe.experts.159.w1", "model.layers.2.block_sparse_moe.experts.160.w1", "model.layers.2.block_sparse_moe.experts.161.w1", "model.layers.2.block_sparse_moe.experts.162.w1", "model.layers.2.block_sparse_moe.experts.163.w1", "model.layers.2.block_sparse_moe.experts.164.w1", "model.layers.2.block_sparse_moe.experts.165.w1", "model.layers.2.block_sparse_moe.experts.166.w1", "model.layers.2.block_sparse_moe.experts.167.w1", "model.layers.2.block_sparse_moe.experts.168.w1", "model.layers.2.block_sparse_moe.experts.169.w1", "model.layers.2.block_sparse_moe.experts.170.w1", "model.layers.2.block_sparse_moe.experts.171.w1", "model.layers.2.block_sparse_moe.experts.172.w1", "model.layers.2.block_sparse_moe.experts.173.w1", "model.layers.2.block_sparse_moe.experts.174.w1", "model.layers.2.block_sparse_moe.experts.175.w1", "model.layers.2.block_sparse_moe.experts.176.w1", "model.layers.2.block_sparse_moe.experts.177.w1", "model.layers.2.block_sparse_moe.experts.178.w1", "model.layers.2.block_sparse_moe.experts.179.w1", "model.layers.2.block_sparse_moe.experts.180.w1", "model.layers.2.block_sparse_moe.experts.181.w1", "model.layers.2.block_sparse_moe.experts.182.w1", "model.layers.2.block_sparse_moe.experts.183.w1", "model.layers.2.block_sparse_moe.experts.184.w1", "model.layers.2.block_sparse_moe.experts.185.w1", "model.layers.2.block_sparse_moe.experts.186.w1", "model.layers.2.block_sparse_moe.experts.187.w1", "model.layers.2.block_sparse_moe.experts.188.w1", "model.layers.2.block_sparse_moe.experts.189.w1", "model.layers.2.block_sparse_moe.experts.190.w1", "model.layers.2.block_sparse_moe.experts.191.w1", "model.layers.2.block_sparse_moe.experts.192.w1", "model.layers.2.block_sparse_moe.experts.193.w1", "model.layers.2.block_sparse_moe.experts.194.w1", "model.layers.2.block_sparse_moe.experts.195.w1", "model.layers.2.block_sparse_moe.experts.196.w1", "model.layers.2.block_sparse_moe.experts.197.w1", "model.layers.2.block_sparse_moe.experts.198.w1", "model.layers.2.block_sparse_moe.experts.199.w1", "model.layers.2.block_sparse_moe.experts.200.w1", "model.layers.2.block_sparse_moe.experts.201.w1", "model.layers.2.block_sparse_moe.experts.202.w1", "model.layers.2.block_sparse_moe.experts.203.w1", "model.layers.2.block_sparse_moe.experts.204.w1", "model.layers.2.block_sparse_moe.experts.205.w1", "model.layers.2.block_sparse_moe.experts.206.w1", "model.layers.2.block_sparse_moe.experts.207.w1", "model.layers.2.block_sparse_moe.experts.208.w1", "model.layers.2.block_sparse_moe.experts.209.w1", "model.layers.2.block_sparse_moe.experts.210.w1", "model.layers.2.block_sparse_moe.experts.211.w1", "model.layers.2.block_sparse_moe.experts.212.w1", "model.layers.2.block_sparse_moe.experts.213.w1", "model.layers.2.block_sparse_moe.experts.214.w1", "model.layers.2.block_sparse_moe.experts.215.w1", "model.layers.2.block_sparse_moe.experts.216.w1", "model.layers.2.block_sparse_moe.experts.217.w1", "model.layers.2.block_sparse_moe.experts.218.w1", "model.layers.2.block_sparse_moe.experts.219.w1", "model.layers.2.block_sparse_moe.experts.220.w1", "model.layers.2.block_sparse_moe.experts.221.w1", "model.layers.2.block_sparse_moe.experts.222.w1", "model.layers.2.block_sparse_moe.experts.223.w1", "model.layers.2.block_sparse_moe.experts.224.w1", "model.layers.2.block_sparse_moe.experts.225.w1", "model.layers.2.block_sparse_moe.experts.226.w1", "model.layers.2.block_sparse_moe.experts.227.w1", "model.layers.2.block_sparse_moe.experts.228.w1", "model.layers.2.block_sparse_moe.experts.229.w1", "model.layers.2.block_sparse_moe.experts.230.w1", "model.layers.2.block_sparse_moe.experts.231.w1", "model.layers.2.block_sparse_moe.experts.232.w1", "model.layers.2.block_sparse_moe.experts.233.w1", "model.layers.2.block_sparse_moe.experts.234.w1", "model.layers.2.block_sparse_moe.experts.235.w1", "model.layers.2.block_sparse_moe.experts.236.w1", "model.layers.2.block_sparse_moe.experts.237.w1", "model.layers.2.block_sparse_moe.experts.238.w1", "model.layers.2.block_sparse_moe.experts.239.w1", "model.layers.2.block_sparse_moe.experts.240.w1", "model.layers.2.block_sparse_moe.experts.241.w1", "model.layers.2.block_sparse_moe.experts.242.w1", "model.layers.2.block_sparse_moe.experts.243.w1", "model.layers.2.block_sparse_moe.experts.244.w1", "model.layers.2.block_sparse_moe.experts.245.w1", "model.layers.2.block_sparse_moe.experts.246.w1", "model.layers.2.block_sparse_moe.experts.247.w1", "model.layers.2.block_sparse_moe.experts.248.w1", "model.layers.2.block_sparse_moe.experts.249.w1", "model.layers.2.block_sparse_moe.experts.250.w1", "model.layers.2.block_sparse_moe.experts.251.w1", "model.layers.2.block_sparse_moe.experts.252.w1", "model.layers.2.block_sparse_moe.experts.253.w1", "model.layers.2.block_sparse_moe.experts.254.w1", "model.layers.2.block_sparse_moe.experts.255.w1", "model.layers.2.block_sparse_moe.experts.0.w3", "model.layers.2.block_sparse_moe.experts.1.w3", "model.layers.2.block_sparse_moe.experts.2.w3", "model.layers.2.block_sparse_moe.experts.3.w3", "model.layers.2.block_sparse_moe.experts.4.w3", "model.layers.2.block_sparse_moe.experts.5.w3", "model.layers.2.block_sparse_moe.experts.6.w3", "model.layers.2.block_sparse_moe.experts.7.w3", "model.layers.2.block_sparse_moe.experts.8.w3", "model.layers.2.block_sparse_moe.experts.9.w3", "model.layers.2.block_sparse_moe.experts.10.w3", "model.layers.2.block_sparse_moe.experts.11.w3", "model.layers.2.block_sparse_moe.experts.12.w3", "model.layers.2.block_sparse_moe.experts.13.w3", "model.layers.2.block_sparse_moe.experts.14.w3", "model.layers.2.block_sparse_moe.experts.15.w3", "model.layers.2.block_sparse_moe.experts.16.w3", "model.layers.2.block_sparse_moe.experts.17.w3", "model.layers.2.block_sparse_moe.experts.18.w3", "model.layers.2.block_sparse_moe.experts.19.w3", "model.layers.2.block_sparse_moe.experts.20.w3", "model.layers.2.block_sparse_moe.experts.21.w3", "model.layers.2.block_sparse_moe.experts.22.w3", "model.layers.2.block_sparse_moe.experts.23.w3", "model.layers.2.block_sparse_moe.experts.24.w3", "model.layers.2.block_sparse_moe.experts.25.w3", "model.layers.2.block_sparse_moe.experts.26.w3", "model.layers.2.block_sparse_moe.experts.27.w3", "model.layers.2.block_sparse_moe.experts.28.w3", "model.layers.2.block_sparse_moe.experts.29.w3", "model.layers.2.block_sparse_moe.experts.30.w3", "model.layers.2.block_sparse_moe.experts.31.w3", "model.layers.2.block_sparse_moe.experts.32.w3", "model.layers.2.block_sparse_moe.experts.33.w3", "model.layers.2.block_sparse_moe.experts.34.w3", "model.layers.2.block_sparse_moe.experts.35.w3", "model.layers.2.block_sparse_moe.experts.36.w3", "model.layers.2.block_sparse_moe.experts.37.w3", "model.layers.2.block_sparse_moe.experts.38.w3", "model.layers.2.block_sparse_moe.experts.39.w3", "model.layers.2.block_sparse_moe.experts.40.w3", "model.layers.2.block_sparse_moe.experts.41.w3", "model.layers.2.block_sparse_moe.experts.42.w3", "model.layers.2.block_sparse_moe.experts.43.w3", "model.layers.2.block_sparse_moe.experts.44.w3", "model.layers.2.block_sparse_moe.experts.45.w3", "model.layers.2.block_sparse_moe.experts.46.w3", "model.layers.2.block_sparse_moe.experts.47.w3", "model.layers.2.block_sparse_moe.experts.48.w3", "model.layers.2.block_sparse_moe.experts.49.w3", "model.layers.2.block_sparse_moe.experts.50.w3", "model.layers.2.block_sparse_moe.experts.51.w3", "model.layers.2.block_sparse_moe.experts.52.w3", "model.layers.2.block_sparse_moe.experts.53.w3", "model.layers.2.block_sparse_moe.experts.54.w3", "model.layers.2.block_sparse_moe.experts.55.w3", "model.layers.2.block_sparse_moe.experts.56.w3", "model.layers.2.block_sparse_moe.experts.57.w3", "model.layers.2.block_sparse_moe.experts.58.w3", "model.layers.2.block_sparse_moe.experts.59.w3", "model.layers.2.block_sparse_moe.experts.60.w3", "model.layers.2.block_sparse_moe.experts.61.w3", "model.layers.2.block_sparse_moe.experts.62.w3", "model.layers.2.block_sparse_moe.experts.63.w3", "model.layers.2.block_sparse_moe.experts.64.w3", "model.layers.2.block_sparse_moe.experts.65.w3", "model.layers.2.block_sparse_moe.experts.66.w3", "model.layers.2.block_sparse_moe.experts.67.w3", "model.layers.2.block_sparse_moe.experts.68.w3", "model.layers.2.block_sparse_moe.experts.69.w3", "model.layers.2.block_sparse_moe.experts.70.w3", "model.layers.2.block_sparse_moe.experts.71.w3", "model.layers.2.block_sparse_moe.experts.72.w3", "model.layers.2.block_sparse_moe.experts.73.w3", "model.layers.2.block_sparse_moe.experts.74.w3", "model.layers.2.block_sparse_moe.experts.75.w3", "model.layers.2.block_sparse_moe.experts.76.w3", "model.layers.2.block_sparse_moe.experts.77.w3", "model.layers.2.block_sparse_moe.experts.78.w3", "model.layers.2.block_sparse_moe.experts.79.w3", "model.layers.2.block_sparse_moe.experts.80.w3", "model.layers.2.block_sparse_moe.experts.81.w3", "model.layers.2.block_sparse_moe.experts.82.w3", "model.layers.2.block_sparse_moe.experts.83.w3", "model.layers.2.block_sparse_moe.experts.84.w3", "model.layers.2.block_sparse_moe.experts.85.w3", "model.layers.2.block_sparse_moe.experts.86.w3", "model.layers.2.block_sparse_moe.experts.87.w3", "model.layers.2.block_sparse_moe.experts.88.w3", "model.layers.2.block_sparse_moe.experts.89.w3", "model.layers.2.block_sparse_moe.experts.90.w3", "model.layers.2.block_sparse_moe.experts.91.w3", "model.layers.2.block_sparse_moe.experts.92.w3", "model.layers.2.block_sparse_moe.experts.93.w3", "model.layers.2.block_sparse_moe.experts.94.w3", "model.layers.2.block_sparse_moe.experts.95.w3", "model.layers.2.block_sparse_moe.experts.96.w3", "model.layers.2.block_sparse_moe.experts.97.w3", "model.layers.2.block_sparse_moe.experts.98.w3", "model.layers.2.block_sparse_moe.experts.99.w3", "model.layers.2.block_sparse_moe.experts.100.w3", "model.layers.2.block_sparse_moe.experts.101.w3", "model.layers.2.block_sparse_moe.experts.102.w3", "model.layers.2.block_sparse_moe.experts.103.w3", "model.layers.2.block_sparse_moe.experts.104.w3", "model.layers.2.block_sparse_moe.experts.105.w3", "model.layers.2.block_sparse_moe.experts.106.w3", "model.layers.2.block_sparse_moe.experts.107.w3", "model.layers.2.block_sparse_moe.experts.108.w3", "model.layers.2.block_sparse_moe.experts.109.w3", "model.layers.2.block_sparse_moe.experts.110.w3", "model.layers.2.block_sparse_moe.experts.111.w3", "model.layers.2.block_sparse_moe.experts.112.w3", "model.layers.2.block_sparse_moe.experts.113.w3", "model.layers.2.block_sparse_moe.experts.114.w3", "model.layers.2.block_sparse_moe.experts.115.w3", "model.layers.2.block_sparse_moe.experts.116.w3", "model.layers.2.block_sparse_moe.experts.117.w3", "model.layers.2.block_sparse_moe.experts.118.w3", "model.layers.2.block_sparse_moe.experts.119.w3", "model.layers.2.block_sparse_moe.experts.120.w3", "model.layers.2.block_sparse_moe.experts.121.w3", "model.layers.2.block_sparse_moe.experts.122.w3", "model.layers.2.block_sparse_moe.experts.123.w3", "model.layers.2.block_sparse_moe.experts.124.w3", "model.layers.2.block_sparse_moe.experts.125.w3", "model.layers.2.block_sparse_moe.experts.126.w3", "model.layers.2.block_sparse_moe.experts.127.w3", "model.layers.2.block_sparse_moe.experts.128.w3", "model.layers.2.block_sparse_moe.experts.129.w3", "model.layers.2.block_sparse_moe.experts.130.w3", "model.layers.2.block_sparse_moe.experts.131.w3", "model.layers.2.block_sparse_moe.experts.132.w3", "model.layers.2.block_sparse_moe.experts.133.w3", "model.layers.2.block_sparse_moe.experts.134.w3", "model.layers.2.block_sparse_moe.experts.135.w3", "model.layers.2.block_sparse_moe.experts.136.w3", "model.layers.2.block_sparse_moe.experts.137.w3", "model.layers.2.block_sparse_moe.experts.138.w3", "model.layers.2.block_sparse_moe.experts.139.w3", "model.layers.2.block_sparse_moe.experts.140.w3", "model.layers.2.block_sparse_moe.experts.141.w3", "model.layers.2.block_sparse_moe.experts.142.w3", "model.layers.2.block_sparse_moe.experts.143.w3", "model.layers.2.block_sparse_moe.experts.144.w3", "model.layers.2.block_sparse_moe.experts.145.w3", "model.layers.2.block_sparse_moe.experts.146.w3", "model.layers.2.block_sparse_moe.experts.147.w3", "model.layers.2.block_sparse_moe.experts.148.w3", "model.layers.2.block_sparse_moe.experts.149.w3", "model.layers.2.block_sparse_moe.experts.150.w3", "model.layers.2.block_sparse_moe.experts.151.w3", "model.layers.2.block_sparse_moe.experts.152.w3", "model.layers.2.block_sparse_moe.experts.153.w3", "model.layers.2.block_sparse_moe.experts.154.w3", "model.layers.2.block_sparse_moe.experts.155.w3", "model.layers.2.block_sparse_moe.experts.156.w3", "model.layers.2.block_sparse_moe.experts.157.w3", "model.layers.2.block_sparse_moe.experts.158.w3", "model.layers.2.block_sparse_moe.experts.159.w3", "model.layers.2.block_sparse_moe.experts.160.w3", "model.layers.2.block_sparse_moe.experts.161.w3", "model.layers.2.block_sparse_moe.experts.162.w3", "model.layers.2.block_sparse_moe.experts.163.w3", "model.layers.2.block_sparse_moe.experts.164.w3", "model.layers.2.block_sparse_moe.experts.165.w3", "model.layers.2.block_sparse_moe.experts.166.w3", "model.layers.2.block_sparse_moe.experts.167.w3", "model.layers.2.block_sparse_moe.experts.168.w3", "model.layers.2.block_sparse_moe.experts.169.w3", "model.layers.2.block_sparse_moe.experts.170.w3", "model.layers.2.block_sparse_moe.experts.171.w3", "model.layers.2.block_sparse_moe.experts.172.w3", "model.layers.2.block_sparse_moe.experts.173.w3", "model.layers.2.block_sparse_moe.experts.174.w3", "model.layers.2.block_sparse_moe.experts.175.w3", "model.layers.2.block_sparse_moe.experts.176.w3", "model.layers.2.block_sparse_moe.experts.177.w3", "model.layers.2.block_sparse_moe.experts.178.w3", "model.layers.2.block_sparse_moe.experts.179.w3", "model.layers.2.block_sparse_moe.experts.180.w3", "model.layers.2.block_sparse_moe.experts.181.w3", "model.layers.2.block_sparse_moe.experts.182.w3", "model.layers.2.block_sparse_moe.experts.183.w3", "model.layers.2.block_sparse_moe.experts.184.w3", "model.layers.2.block_sparse_moe.experts.185.w3", "model.layers.2.block_sparse_moe.experts.186.w3", "model.layers.2.block_sparse_moe.experts.187.w3", "model.layers.2.block_sparse_moe.experts.188.w3", "model.layers.2.block_sparse_moe.experts.189.w3", "model.layers.2.block_sparse_moe.experts.190.w3", "model.layers.2.block_sparse_moe.experts.191.w3", "model.layers.2.block_sparse_moe.experts.192.w3", "model.layers.2.block_sparse_moe.experts.193.w3", "model.layers.2.block_sparse_moe.experts.194.w3", "model.layers.2.block_sparse_moe.experts.195.w3", "model.layers.2.block_sparse_moe.experts.196.w3", "model.layers.2.block_sparse_moe.experts.197.w3", "model.layers.2.block_sparse_moe.experts.198.w3", "model.layers.2.block_sparse_moe.experts.199.w3", "model.layers.2.block_sparse_moe.experts.200.w3", "model.layers.2.block_sparse_moe.experts.201.w3", "model.layers.2.block_sparse_moe.experts.202.w3", "model.layers.2.block_sparse_moe.experts.203.w3", "model.layers.2.block_sparse_moe.experts.204.w3", "model.layers.2.block_sparse_moe.experts.205.w3", "model.layers.2.block_sparse_moe.experts.206.w3", "model.layers.2.block_sparse_moe.experts.207.w3", "model.layers.2.block_sparse_moe.experts.208.w3", "model.layers.2.block_sparse_moe.experts.209.w3", "model.layers.2.block_sparse_moe.experts.210.w3", "model.layers.2.block_sparse_moe.experts.211.w3", "model.layers.2.block_sparse_moe.experts.212.w3", "model.layers.2.block_sparse_moe.experts.213.w3", "model.layers.2.block_sparse_moe.experts.214.w3", "model.layers.2.block_sparse_moe.experts.215.w3", "model.layers.2.block_sparse_moe.experts.216.w3", "model.layers.2.block_sparse_moe.experts.217.w3", "model.layers.2.block_sparse_moe.experts.218.w3", "model.layers.2.block_sparse_moe.experts.219.w3", "model.layers.2.block_sparse_moe.experts.220.w3", "model.layers.2.block_sparse_moe.experts.221.w3", "model.layers.2.block_sparse_moe.experts.222.w3", "model.layers.2.block_sparse_moe.experts.223.w3", "model.layers.2.block_sparse_moe.experts.224.w3", "model.layers.2.block_sparse_moe.experts.225.w3", "model.layers.2.block_sparse_moe.experts.226.w3", "model.layers.2.block_sparse_moe.experts.227.w3", "model.layers.2.block_sparse_moe.experts.228.w3", "model.layers.2.block_sparse_moe.experts.229.w3", "model.layers.2.block_sparse_moe.experts.230.w3", "model.layers.2.block_sparse_moe.experts.231.w3", "model.layers.2.block_sparse_moe.experts.232.w3", "model.layers.2.block_sparse_moe.experts.233.w3", "model.layers.2.block_sparse_moe.experts.234.w3", "model.layers.2.block_sparse_moe.experts.235.w3", "model.layers.2.block_sparse_moe.experts.236.w3", "model.layers.2.block_sparse_moe.experts.237.w3", "model.layers.2.block_sparse_moe.experts.238.w3", "model.layers.2.block_sparse_moe.experts.239.w3", "model.layers.2.block_sparse_moe.experts.240.w3", "model.layers.2.block_sparse_moe.experts.241.w3", "model.layers.2.block_sparse_moe.experts.242.w3", "model.layers.2.block_sparse_moe.experts.243.w3", "model.layers.2.block_sparse_moe.experts.244.w3", "model.layers.2.block_sparse_moe.experts.245.w3", "model.layers.2.block_sparse_moe.experts.246.w3", "model.layers.2.block_sparse_moe.experts.247.w3", "model.layers.2.block_sparse_moe.experts.248.w3", "model.layers.2.block_sparse_moe.experts.249.w3", "model.layers.2.block_sparse_moe.experts.250.w3", "model.layers.2.block_sparse_moe.experts.251.w3", "model.layers.2.block_sparse_moe.experts.252.w3", "model.layers.2.block_sparse_moe.experts.253.w3", "model.layers.2.block_sparse_moe.experts.254.w3", "model.layers.2.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.003173669427633252, "dbits": 2415919104 } ] }, { "idx": 14, "layers": [ "model.layers.2.block_sparse_moe.experts.0.w2", "model.layers.2.block_sparse_moe.experts.1.w2", "model.layers.2.block_sparse_moe.experts.2.w2", "model.layers.2.block_sparse_moe.experts.3.w2", "model.layers.2.block_sparse_moe.experts.4.w2", "model.layers.2.block_sparse_moe.experts.5.w2", "model.layers.2.block_sparse_moe.experts.6.w2", "model.layers.2.block_sparse_moe.experts.7.w2", "model.layers.2.block_sparse_moe.experts.8.w2", "model.layers.2.block_sparse_moe.experts.9.w2", "model.layers.2.block_sparse_moe.experts.10.w2", "model.layers.2.block_sparse_moe.experts.11.w2", "model.layers.2.block_sparse_moe.experts.12.w2", "model.layers.2.block_sparse_moe.experts.13.w2", "model.layers.2.block_sparse_moe.experts.14.w2", "model.layers.2.block_sparse_moe.experts.15.w2", "model.layers.2.block_sparse_moe.experts.16.w2", "model.layers.2.block_sparse_moe.experts.17.w2", "model.layers.2.block_sparse_moe.experts.18.w2", "model.layers.2.block_sparse_moe.experts.19.w2", "model.layers.2.block_sparse_moe.experts.20.w2", "model.layers.2.block_sparse_moe.experts.21.w2", "model.layers.2.block_sparse_moe.experts.22.w2", "model.layers.2.block_sparse_moe.experts.23.w2", "model.layers.2.block_sparse_moe.experts.24.w2", "model.layers.2.block_sparse_moe.experts.25.w2", "model.layers.2.block_sparse_moe.experts.26.w2", "model.layers.2.block_sparse_moe.experts.27.w2", "model.layers.2.block_sparse_moe.experts.28.w2", "model.layers.2.block_sparse_moe.experts.29.w2", "model.layers.2.block_sparse_moe.experts.30.w2", "model.layers.2.block_sparse_moe.experts.31.w2", "model.layers.2.block_sparse_moe.experts.32.w2", "model.layers.2.block_sparse_moe.experts.33.w2", "model.layers.2.block_sparse_moe.experts.34.w2", "model.layers.2.block_sparse_moe.experts.35.w2", "model.layers.2.block_sparse_moe.experts.36.w2", "model.layers.2.block_sparse_moe.experts.37.w2", "model.layers.2.block_sparse_moe.experts.38.w2", "model.layers.2.block_sparse_moe.experts.39.w2", "model.layers.2.block_sparse_moe.experts.40.w2", "model.layers.2.block_sparse_moe.experts.41.w2", "model.layers.2.block_sparse_moe.experts.42.w2", "model.layers.2.block_sparse_moe.experts.43.w2", "model.layers.2.block_sparse_moe.experts.44.w2", "model.layers.2.block_sparse_moe.experts.45.w2", "model.layers.2.block_sparse_moe.experts.46.w2", "model.layers.2.block_sparse_moe.experts.47.w2", "model.layers.2.block_sparse_moe.experts.48.w2", "model.layers.2.block_sparse_moe.experts.49.w2", "model.layers.2.block_sparse_moe.experts.50.w2", "model.layers.2.block_sparse_moe.experts.51.w2", "model.layers.2.block_sparse_moe.experts.52.w2", "model.layers.2.block_sparse_moe.experts.53.w2", "model.layers.2.block_sparse_moe.experts.54.w2", "model.layers.2.block_sparse_moe.experts.55.w2", "model.layers.2.block_sparse_moe.experts.56.w2", "model.layers.2.block_sparse_moe.experts.57.w2", "model.layers.2.block_sparse_moe.experts.58.w2", "model.layers.2.block_sparse_moe.experts.59.w2", "model.layers.2.block_sparse_moe.experts.60.w2", "model.layers.2.block_sparse_moe.experts.61.w2", "model.layers.2.block_sparse_moe.experts.62.w2", "model.layers.2.block_sparse_moe.experts.63.w2", "model.layers.2.block_sparse_moe.experts.64.w2", "model.layers.2.block_sparse_moe.experts.65.w2", "model.layers.2.block_sparse_moe.experts.66.w2", "model.layers.2.block_sparse_moe.experts.67.w2", "model.layers.2.block_sparse_moe.experts.68.w2", "model.layers.2.block_sparse_moe.experts.69.w2", "model.layers.2.block_sparse_moe.experts.70.w2", "model.layers.2.block_sparse_moe.experts.71.w2", "model.layers.2.block_sparse_moe.experts.72.w2", "model.layers.2.block_sparse_moe.experts.73.w2", "model.layers.2.block_sparse_moe.experts.74.w2", "model.layers.2.block_sparse_moe.experts.75.w2", "model.layers.2.block_sparse_moe.experts.76.w2", "model.layers.2.block_sparse_moe.experts.77.w2", "model.layers.2.block_sparse_moe.experts.78.w2", "model.layers.2.block_sparse_moe.experts.79.w2", "model.layers.2.block_sparse_moe.experts.80.w2", "model.layers.2.block_sparse_moe.experts.81.w2", "model.layers.2.block_sparse_moe.experts.82.w2", "model.layers.2.block_sparse_moe.experts.83.w2", "model.layers.2.block_sparse_moe.experts.84.w2", "model.layers.2.block_sparse_moe.experts.85.w2", "model.layers.2.block_sparse_moe.experts.86.w2", "model.layers.2.block_sparse_moe.experts.87.w2", "model.layers.2.block_sparse_moe.experts.88.w2", "model.layers.2.block_sparse_moe.experts.89.w2", "model.layers.2.block_sparse_moe.experts.90.w2", "model.layers.2.block_sparse_moe.experts.91.w2", "model.layers.2.block_sparse_moe.experts.92.w2", "model.layers.2.block_sparse_moe.experts.93.w2", "model.layers.2.block_sparse_moe.experts.94.w2", "model.layers.2.block_sparse_moe.experts.95.w2", "model.layers.2.block_sparse_moe.experts.96.w2", "model.layers.2.block_sparse_moe.experts.97.w2", "model.layers.2.block_sparse_moe.experts.98.w2", "model.layers.2.block_sparse_moe.experts.99.w2", "model.layers.2.block_sparse_moe.experts.100.w2", "model.layers.2.block_sparse_moe.experts.101.w2", "model.layers.2.block_sparse_moe.experts.102.w2", "model.layers.2.block_sparse_moe.experts.103.w2", "model.layers.2.block_sparse_moe.experts.104.w2", "model.layers.2.block_sparse_moe.experts.105.w2", "model.layers.2.block_sparse_moe.experts.106.w2", "model.layers.2.block_sparse_moe.experts.107.w2", "model.layers.2.block_sparse_moe.experts.108.w2", "model.layers.2.block_sparse_moe.experts.109.w2", "model.layers.2.block_sparse_moe.experts.110.w2", "model.layers.2.block_sparse_moe.experts.111.w2", "model.layers.2.block_sparse_moe.experts.112.w2", "model.layers.2.block_sparse_moe.experts.113.w2", "model.layers.2.block_sparse_moe.experts.114.w2", "model.layers.2.block_sparse_moe.experts.115.w2", "model.layers.2.block_sparse_moe.experts.116.w2", "model.layers.2.block_sparse_moe.experts.117.w2", "model.layers.2.block_sparse_moe.experts.118.w2", "model.layers.2.block_sparse_moe.experts.119.w2", "model.layers.2.block_sparse_moe.experts.120.w2", "model.layers.2.block_sparse_moe.experts.121.w2", "model.layers.2.block_sparse_moe.experts.122.w2", "model.layers.2.block_sparse_moe.experts.123.w2", "model.layers.2.block_sparse_moe.experts.124.w2", "model.layers.2.block_sparse_moe.experts.125.w2", "model.layers.2.block_sparse_moe.experts.126.w2", "model.layers.2.block_sparse_moe.experts.127.w2", "model.layers.2.block_sparse_moe.experts.128.w2", "model.layers.2.block_sparse_moe.experts.129.w2", "model.layers.2.block_sparse_moe.experts.130.w2", "model.layers.2.block_sparse_moe.experts.131.w2", "model.layers.2.block_sparse_moe.experts.132.w2", "model.layers.2.block_sparse_moe.experts.133.w2", "model.layers.2.block_sparse_moe.experts.134.w2", "model.layers.2.block_sparse_moe.experts.135.w2", "model.layers.2.block_sparse_moe.experts.136.w2", "model.layers.2.block_sparse_moe.experts.137.w2", "model.layers.2.block_sparse_moe.experts.138.w2", "model.layers.2.block_sparse_moe.experts.139.w2", "model.layers.2.block_sparse_moe.experts.140.w2", "model.layers.2.block_sparse_moe.experts.141.w2", "model.layers.2.block_sparse_moe.experts.142.w2", "model.layers.2.block_sparse_moe.experts.143.w2", "model.layers.2.block_sparse_moe.experts.144.w2", "model.layers.2.block_sparse_moe.experts.145.w2", "model.layers.2.block_sparse_moe.experts.146.w2", "model.layers.2.block_sparse_moe.experts.147.w2", "model.layers.2.block_sparse_moe.experts.148.w2", "model.layers.2.block_sparse_moe.experts.149.w2", "model.layers.2.block_sparse_moe.experts.150.w2", "model.layers.2.block_sparse_moe.experts.151.w2", "model.layers.2.block_sparse_moe.experts.152.w2", "model.layers.2.block_sparse_moe.experts.153.w2", "model.layers.2.block_sparse_moe.experts.154.w2", "model.layers.2.block_sparse_moe.experts.155.w2", "model.layers.2.block_sparse_moe.experts.156.w2", "model.layers.2.block_sparse_moe.experts.157.w2", "model.layers.2.block_sparse_moe.experts.158.w2", "model.layers.2.block_sparse_moe.experts.159.w2", "model.layers.2.block_sparse_moe.experts.160.w2", "model.layers.2.block_sparse_moe.experts.161.w2", "model.layers.2.block_sparse_moe.experts.162.w2", "model.layers.2.block_sparse_moe.experts.163.w2", "model.layers.2.block_sparse_moe.experts.164.w2", "model.layers.2.block_sparse_moe.experts.165.w2", "model.layers.2.block_sparse_moe.experts.166.w2", "model.layers.2.block_sparse_moe.experts.167.w2", "model.layers.2.block_sparse_moe.experts.168.w2", "model.layers.2.block_sparse_moe.experts.169.w2", "model.layers.2.block_sparse_moe.experts.170.w2", "model.layers.2.block_sparse_moe.experts.171.w2", "model.layers.2.block_sparse_moe.experts.172.w2", "model.layers.2.block_sparse_moe.experts.173.w2", "model.layers.2.block_sparse_moe.experts.174.w2", "model.layers.2.block_sparse_moe.experts.175.w2", "model.layers.2.block_sparse_moe.experts.176.w2", "model.layers.2.block_sparse_moe.experts.177.w2", "model.layers.2.block_sparse_moe.experts.178.w2", "model.layers.2.block_sparse_moe.experts.179.w2", "model.layers.2.block_sparse_moe.experts.180.w2", "model.layers.2.block_sparse_moe.experts.181.w2", "model.layers.2.block_sparse_moe.experts.182.w2", "model.layers.2.block_sparse_moe.experts.183.w2", "model.layers.2.block_sparse_moe.experts.184.w2", "model.layers.2.block_sparse_moe.experts.185.w2", "model.layers.2.block_sparse_moe.experts.186.w2", "model.layers.2.block_sparse_moe.experts.187.w2", "model.layers.2.block_sparse_moe.experts.188.w2", "model.layers.2.block_sparse_moe.experts.189.w2", "model.layers.2.block_sparse_moe.experts.190.w2", "model.layers.2.block_sparse_moe.experts.191.w2", "model.layers.2.block_sparse_moe.experts.192.w2", "model.layers.2.block_sparse_moe.experts.193.w2", "model.layers.2.block_sparse_moe.experts.194.w2", "model.layers.2.block_sparse_moe.experts.195.w2", "model.layers.2.block_sparse_moe.experts.196.w2", "model.layers.2.block_sparse_moe.experts.197.w2", "model.layers.2.block_sparse_moe.experts.198.w2", "model.layers.2.block_sparse_moe.experts.199.w2", "model.layers.2.block_sparse_moe.experts.200.w2", "model.layers.2.block_sparse_moe.experts.201.w2", "model.layers.2.block_sparse_moe.experts.202.w2", "model.layers.2.block_sparse_moe.experts.203.w2", "model.layers.2.block_sparse_moe.experts.204.w2", "model.layers.2.block_sparse_moe.experts.205.w2", "model.layers.2.block_sparse_moe.experts.206.w2", "model.layers.2.block_sparse_moe.experts.207.w2", "model.layers.2.block_sparse_moe.experts.208.w2", "model.layers.2.block_sparse_moe.experts.209.w2", "model.layers.2.block_sparse_moe.experts.210.w2", "model.layers.2.block_sparse_moe.experts.211.w2", "model.layers.2.block_sparse_moe.experts.212.w2", "model.layers.2.block_sparse_moe.experts.213.w2", "model.layers.2.block_sparse_moe.experts.214.w2", "model.layers.2.block_sparse_moe.experts.215.w2", "model.layers.2.block_sparse_moe.experts.216.w2", "model.layers.2.block_sparse_moe.experts.217.w2", "model.layers.2.block_sparse_moe.experts.218.w2", "model.layers.2.block_sparse_moe.experts.219.w2", "model.layers.2.block_sparse_moe.experts.220.w2", "model.layers.2.block_sparse_moe.experts.221.w2", "model.layers.2.block_sparse_moe.experts.222.w2", "model.layers.2.block_sparse_moe.experts.223.w2", "model.layers.2.block_sparse_moe.experts.224.w2", "model.layers.2.block_sparse_moe.experts.225.w2", "model.layers.2.block_sparse_moe.experts.226.w2", "model.layers.2.block_sparse_moe.experts.227.w2", "model.layers.2.block_sparse_moe.experts.228.w2", "model.layers.2.block_sparse_moe.experts.229.w2", "model.layers.2.block_sparse_moe.experts.230.w2", "model.layers.2.block_sparse_moe.experts.231.w2", "model.layers.2.block_sparse_moe.experts.232.w2", "model.layers.2.block_sparse_moe.experts.233.w2", "model.layers.2.block_sparse_moe.experts.234.w2", "model.layers.2.block_sparse_moe.experts.235.w2", "model.layers.2.block_sparse_moe.experts.236.w2", "model.layers.2.block_sparse_moe.experts.237.w2", "model.layers.2.block_sparse_moe.experts.238.w2", "model.layers.2.block_sparse_moe.experts.239.w2", "model.layers.2.block_sparse_moe.experts.240.w2", "model.layers.2.block_sparse_moe.experts.241.w2", "model.layers.2.block_sparse_moe.experts.242.w2", "model.layers.2.block_sparse_moe.experts.243.w2", "model.layers.2.block_sparse_moe.experts.244.w2", "model.layers.2.block_sparse_moe.experts.245.w2", "model.layers.2.block_sparse_moe.experts.246.w2", "model.layers.2.block_sparse_moe.experts.247.w2", "model.layers.2.block_sparse_moe.experts.248.w2", "model.layers.2.block_sparse_moe.experts.249.w2", "model.layers.2.block_sparse_moe.experts.250.w2", "model.layers.2.block_sparse_moe.experts.251.w2", "model.layers.2.block_sparse_moe.experts.252.w2", "model.layers.2.block_sparse_moe.experts.253.w2", "model.layers.2.block_sparse_moe.experts.254.w2", "model.layers.2.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0012847084552049526, "dbits": 1207959552 } ] }, { "idx": 15, "layers": [ "model.layers.3.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0013625189661979342, "dbits": 18874368 } ] }, { "idx": 16, "layers": [ "model.layers.3.self_attn.k_proj", "model.layers.3.self_attn.v_proj" ], "candidates": [ { "dkld": -3.677047789096832e-05, "dbits": 6291456 } ] }, { "idx": 17, "layers": [ "model.layers.3.self_attn.o_proj" ], "candidates": [ { "dkld": -0.00649765841662886, "dbits": 18874368 } ] }, { "idx": 18, "layers": [ "model.layers.3.block_sparse_moe.experts.0.w1", "model.layers.3.block_sparse_moe.experts.1.w1", "model.layers.3.block_sparse_moe.experts.2.w1", "model.layers.3.block_sparse_moe.experts.3.w1", "model.layers.3.block_sparse_moe.experts.4.w1", "model.layers.3.block_sparse_moe.experts.5.w1", "model.layers.3.block_sparse_moe.experts.6.w1", "model.layers.3.block_sparse_moe.experts.7.w1", "model.layers.3.block_sparse_moe.experts.8.w1", "model.layers.3.block_sparse_moe.experts.9.w1", "model.layers.3.block_sparse_moe.experts.10.w1", "model.layers.3.block_sparse_moe.experts.11.w1", "model.layers.3.block_sparse_moe.experts.12.w1", "model.layers.3.block_sparse_moe.experts.13.w1", "model.layers.3.block_sparse_moe.experts.14.w1", "model.layers.3.block_sparse_moe.experts.15.w1", "model.layers.3.block_sparse_moe.experts.16.w1", "model.layers.3.block_sparse_moe.experts.17.w1", "model.layers.3.block_sparse_moe.experts.18.w1", "model.layers.3.block_sparse_moe.experts.19.w1", "model.layers.3.block_sparse_moe.experts.20.w1", "model.layers.3.block_sparse_moe.experts.21.w1", "model.layers.3.block_sparse_moe.experts.22.w1", "model.layers.3.block_sparse_moe.experts.23.w1", "model.layers.3.block_sparse_moe.experts.24.w1", "model.layers.3.block_sparse_moe.experts.25.w1", "model.layers.3.block_sparse_moe.experts.26.w1", "model.layers.3.block_sparse_moe.experts.27.w1", "model.layers.3.block_sparse_moe.experts.28.w1", "model.layers.3.block_sparse_moe.experts.29.w1", "model.layers.3.block_sparse_moe.experts.30.w1", "model.layers.3.block_sparse_moe.experts.31.w1", "model.layers.3.block_sparse_moe.experts.32.w1", "model.layers.3.block_sparse_moe.experts.33.w1", "model.layers.3.block_sparse_moe.experts.34.w1", "model.layers.3.block_sparse_moe.experts.35.w1", "model.layers.3.block_sparse_moe.experts.36.w1", "model.layers.3.block_sparse_moe.experts.37.w1", "model.layers.3.block_sparse_moe.experts.38.w1", "model.layers.3.block_sparse_moe.experts.39.w1", "model.layers.3.block_sparse_moe.experts.40.w1", "model.layers.3.block_sparse_moe.experts.41.w1", "model.layers.3.block_sparse_moe.experts.42.w1", "model.layers.3.block_sparse_moe.experts.43.w1", "model.layers.3.block_sparse_moe.experts.44.w1", "model.layers.3.block_sparse_moe.experts.45.w1", "model.layers.3.block_sparse_moe.experts.46.w1", "model.layers.3.block_sparse_moe.experts.47.w1", "model.layers.3.block_sparse_moe.experts.48.w1", "model.layers.3.block_sparse_moe.experts.49.w1", "model.layers.3.block_sparse_moe.experts.50.w1", "model.layers.3.block_sparse_moe.experts.51.w1", "model.layers.3.block_sparse_moe.experts.52.w1", "model.layers.3.block_sparse_moe.experts.53.w1", "model.layers.3.block_sparse_moe.experts.54.w1", "model.layers.3.block_sparse_moe.experts.55.w1", "model.layers.3.block_sparse_moe.experts.56.w1", "model.layers.3.block_sparse_moe.experts.57.w1", "model.layers.3.block_sparse_moe.experts.58.w1", "model.layers.3.block_sparse_moe.experts.59.w1", "model.layers.3.block_sparse_moe.experts.60.w1", "model.layers.3.block_sparse_moe.experts.61.w1", "model.layers.3.block_sparse_moe.experts.62.w1", "model.layers.3.block_sparse_moe.experts.63.w1", "model.layers.3.block_sparse_moe.experts.64.w1", "model.layers.3.block_sparse_moe.experts.65.w1", "model.layers.3.block_sparse_moe.experts.66.w1", "model.layers.3.block_sparse_moe.experts.67.w1", "model.layers.3.block_sparse_moe.experts.68.w1", "model.layers.3.block_sparse_moe.experts.69.w1", "model.layers.3.block_sparse_moe.experts.70.w1", "model.layers.3.block_sparse_moe.experts.71.w1", "model.layers.3.block_sparse_moe.experts.72.w1", "model.layers.3.block_sparse_moe.experts.73.w1", "model.layers.3.block_sparse_moe.experts.74.w1", "model.layers.3.block_sparse_moe.experts.75.w1", "model.layers.3.block_sparse_moe.experts.76.w1", "model.layers.3.block_sparse_moe.experts.77.w1", "model.layers.3.block_sparse_moe.experts.78.w1", "model.layers.3.block_sparse_moe.experts.79.w1", "model.layers.3.block_sparse_moe.experts.80.w1", "model.layers.3.block_sparse_moe.experts.81.w1", "model.layers.3.block_sparse_moe.experts.82.w1", "model.layers.3.block_sparse_moe.experts.83.w1", "model.layers.3.block_sparse_moe.experts.84.w1", "model.layers.3.block_sparse_moe.experts.85.w1", "model.layers.3.block_sparse_moe.experts.86.w1", "model.layers.3.block_sparse_moe.experts.87.w1", "model.layers.3.block_sparse_moe.experts.88.w1", "model.layers.3.block_sparse_moe.experts.89.w1", "model.layers.3.block_sparse_moe.experts.90.w1", "model.layers.3.block_sparse_moe.experts.91.w1", "model.layers.3.block_sparse_moe.experts.92.w1", "model.layers.3.block_sparse_moe.experts.93.w1", "model.layers.3.block_sparse_moe.experts.94.w1", "model.layers.3.block_sparse_moe.experts.95.w1", "model.layers.3.block_sparse_moe.experts.96.w1", "model.layers.3.block_sparse_moe.experts.97.w1", "model.layers.3.block_sparse_moe.experts.98.w1", "model.layers.3.block_sparse_moe.experts.99.w1", "model.layers.3.block_sparse_moe.experts.100.w1", "model.layers.3.block_sparse_moe.experts.101.w1", "model.layers.3.block_sparse_moe.experts.102.w1", "model.layers.3.block_sparse_moe.experts.103.w1", "model.layers.3.block_sparse_moe.experts.104.w1", "model.layers.3.block_sparse_moe.experts.105.w1", "model.layers.3.block_sparse_moe.experts.106.w1", "model.layers.3.block_sparse_moe.experts.107.w1", "model.layers.3.block_sparse_moe.experts.108.w1", "model.layers.3.block_sparse_moe.experts.109.w1", "model.layers.3.block_sparse_moe.experts.110.w1", "model.layers.3.block_sparse_moe.experts.111.w1", "model.layers.3.block_sparse_moe.experts.112.w1", "model.layers.3.block_sparse_moe.experts.113.w1", "model.layers.3.block_sparse_moe.experts.114.w1", "model.layers.3.block_sparse_moe.experts.115.w1", "model.layers.3.block_sparse_moe.experts.116.w1", "model.layers.3.block_sparse_moe.experts.117.w1", "model.layers.3.block_sparse_moe.experts.118.w1", "model.layers.3.block_sparse_moe.experts.119.w1", "model.layers.3.block_sparse_moe.experts.120.w1", "model.layers.3.block_sparse_moe.experts.121.w1", "model.layers.3.block_sparse_moe.experts.122.w1", "model.layers.3.block_sparse_moe.experts.123.w1", "model.layers.3.block_sparse_moe.experts.124.w1", "model.layers.3.block_sparse_moe.experts.125.w1", "model.layers.3.block_sparse_moe.experts.126.w1", "model.layers.3.block_sparse_moe.experts.127.w1", "model.layers.3.block_sparse_moe.experts.128.w1", "model.layers.3.block_sparse_moe.experts.129.w1", "model.layers.3.block_sparse_moe.experts.130.w1", "model.layers.3.block_sparse_moe.experts.131.w1", "model.layers.3.block_sparse_moe.experts.132.w1", "model.layers.3.block_sparse_moe.experts.133.w1", "model.layers.3.block_sparse_moe.experts.134.w1", "model.layers.3.block_sparse_moe.experts.135.w1", "model.layers.3.block_sparse_moe.experts.136.w1", "model.layers.3.block_sparse_moe.experts.137.w1", "model.layers.3.block_sparse_moe.experts.138.w1", "model.layers.3.block_sparse_moe.experts.139.w1", "model.layers.3.block_sparse_moe.experts.140.w1", "model.layers.3.block_sparse_moe.experts.141.w1", "model.layers.3.block_sparse_moe.experts.142.w1", "model.layers.3.block_sparse_moe.experts.143.w1", "model.layers.3.block_sparse_moe.experts.144.w1", "model.layers.3.block_sparse_moe.experts.145.w1", "model.layers.3.block_sparse_moe.experts.146.w1", "model.layers.3.block_sparse_moe.experts.147.w1", "model.layers.3.block_sparse_moe.experts.148.w1", "model.layers.3.block_sparse_moe.experts.149.w1", "model.layers.3.block_sparse_moe.experts.150.w1", "model.layers.3.block_sparse_moe.experts.151.w1", "model.layers.3.block_sparse_moe.experts.152.w1", "model.layers.3.block_sparse_moe.experts.153.w1", "model.layers.3.block_sparse_moe.experts.154.w1", "model.layers.3.block_sparse_moe.experts.155.w1", "model.layers.3.block_sparse_moe.experts.156.w1", "model.layers.3.block_sparse_moe.experts.157.w1", "model.layers.3.block_sparse_moe.experts.158.w1", "model.layers.3.block_sparse_moe.experts.159.w1", "model.layers.3.block_sparse_moe.experts.160.w1", "model.layers.3.block_sparse_moe.experts.161.w1", "model.layers.3.block_sparse_moe.experts.162.w1", "model.layers.3.block_sparse_moe.experts.163.w1", "model.layers.3.block_sparse_moe.experts.164.w1", "model.layers.3.block_sparse_moe.experts.165.w1", "model.layers.3.block_sparse_moe.experts.166.w1", "model.layers.3.block_sparse_moe.experts.167.w1", "model.layers.3.block_sparse_moe.experts.168.w1", "model.layers.3.block_sparse_moe.experts.169.w1", "model.layers.3.block_sparse_moe.experts.170.w1", "model.layers.3.block_sparse_moe.experts.171.w1", "model.layers.3.block_sparse_moe.experts.172.w1", "model.layers.3.block_sparse_moe.experts.173.w1", "model.layers.3.block_sparse_moe.experts.174.w1", "model.layers.3.block_sparse_moe.experts.175.w1", "model.layers.3.block_sparse_moe.experts.176.w1", "model.layers.3.block_sparse_moe.experts.177.w1", "model.layers.3.block_sparse_moe.experts.178.w1", "model.layers.3.block_sparse_moe.experts.179.w1", "model.layers.3.block_sparse_moe.experts.180.w1", "model.layers.3.block_sparse_moe.experts.181.w1", "model.layers.3.block_sparse_moe.experts.182.w1", "model.layers.3.block_sparse_moe.experts.183.w1", "model.layers.3.block_sparse_moe.experts.184.w1", "model.layers.3.block_sparse_moe.experts.185.w1", "model.layers.3.block_sparse_moe.experts.186.w1", "model.layers.3.block_sparse_moe.experts.187.w1", "model.layers.3.block_sparse_moe.experts.188.w1", "model.layers.3.block_sparse_moe.experts.189.w1", "model.layers.3.block_sparse_moe.experts.190.w1", "model.layers.3.block_sparse_moe.experts.191.w1", "model.layers.3.block_sparse_moe.experts.192.w1", "model.layers.3.block_sparse_moe.experts.193.w1", "model.layers.3.block_sparse_moe.experts.194.w1", "model.layers.3.block_sparse_moe.experts.195.w1", "model.layers.3.block_sparse_moe.experts.196.w1", "model.layers.3.block_sparse_moe.experts.197.w1", "model.layers.3.block_sparse_moe.experts.198.w1", "model.layers.3.block_sparse_moe.experts.199.w1", "model.layers.3.block_sparse_moe.experts.200.w1", "model.layers.3.block_sparse_moe.experts.201.w1", "model.layers.3.block_sparse_moe.experts.202.w1", "model.layers.3.block_sparse_moe.experts.203.w1", "model.layers.3.block_sparse_moe.experts.204.w1", "model.layers.3.block_sparse_moe.experts.205.w1", "model.layers.3.block_sparse_moe.experts.206.w1", "model.layers.3.block_sparse_moe.experts.207.w1", "model.layers.3.block_sparse_moe.experts.208.w1", "model.layers.3.block_sparse_moe.experts.209.w1", "model.layers.3.block_sparse_moe.experts.210.w1", "model.layers.3.block_sparse_moe.experts.211.w1", "model.layers.3.block_sparse_moe.experts.212.w1", "model.layers.3.block_sparse_moe.experts.213.w1", "model.layers.3.block_sparse_moe.experts.214.w1", "model.layers.3.block_sparse_moe.experts.215.w1", "model.layers.3.block_sparse_moe.experts.216.w1", "model.layers.3.block_sparse_moe.experts.217.w1", "model.layers.3.block_sparse_moe.experts.218.w1", "model.layers.3.block_sparse_moe.experts.219.w1", "model.layers.3.block_sparse_moe.experts.220.w1", "model.layers.3.block_sparse_moe.experts.221.w1", "model.layers.3.block_sparse_moe.experts.222.w1", "model.layers.3.block_sparse_moe.experts.223.w1", "model.layers.3.block_sparse_moe.experts.224.w1", "model.layers.3.block_sparse_moe.experts.225.w1", "model.layers.3.block_sparse_moe.experts.226.w1", "model.layers.3.block_sparse_moe.experts.227.w1", "model.layers.3.block_sparse_moe.experts.228.w1", "model.layers.3.block_sparse_moe.experts.229.w1", "model.layers.3.block_sparse_moe.experts.230.w1", "model.layers.3.block_sparse_moe.experts.231.w1", "model.layers.3.block_sparse_moe.experts.232.w1", "model.layers.3.block_sparse_moe.experts.233.w1", "model.layers.3.block_sparse_moe.experts.234.w1", "model.layers.3.block_sparse_moe.experts.235.w1", "model.layers.3.block_sparse_moe.experts.236.w1", "model.layers.3.block_sparse_moe.experts.237.w1", "model.layers.3.block_sparse_moe.experts.238.w1", "model.layers.3.block_sparse_moe.experts.239.w1", "model.layers.3.block_sparse_moe.experts.240.w1", "model.layers.3.block_sparse_moe.experts.241.w1", "model.layers.3.block_sparse_moe.experts.242.w1", "model.layers.3.block_sparse_moe.experts.243.w1", "model.layers.3.block_sparse_moe.experts.244.w1", "model.layers.3.block_sparse_moe.experts.245.w1", "model.layers.3.block_sparse_moe.experts.246.w1", "model.layers.3.block_sparse_moe.experts.247.w1", "model.layers.3.block_sparse_moe.experts.248.w1", "model.layers.3.block_sparse_moe.experts.249.w1", "model.layers.3.block_sparse_moe.experts.250.w1", "model.layers.3.block_sparse_moe.experts.251.w1", "model.layers.3.block_sparse_moe.experts.252.w1", "model.layers.3.block_sparse_moe.experts.253.w1", "model.layers.3.block_sparse_moe.experts.254.w1", "model.layers.3.block_sparse_moe.experts.255.w1", "model.layers.3.block_sparse_moe.experts.0.w3", "model.layers.3.block_sparse_moe.experts.1.w3", "model.layers.3.block_sparse_moe.experts.2.w3", "model.layers.3.block_sparse_moe.experts.3.w3", "model.layers.3.block_sparse_moe.experts.4.w3", "model.layers.3.block_sparse_moe.experts.5.w3", "model.layers.3.block_sparse_moe.experts.6.w3", "model.layers.3.block_sparse_moe.experts.7.w3", "model.layers.3.block_sparse_moe.experts.8.w3", "model.layers.3.block_sparse_moe.experts.9.w3", "model.layers.3.block_sparse_moe.experts.10.w3", "model.layers.3.block_sparse_moe.experts.11.w3", "model.layers.3.block_sparse_moe.experts.12.w3", "model.layers.3.block_sparse_moe.experts.13.w3", "model.layers.3.block_sparse_moe.experts.14.w3", "model.layers.3.block_sparse_moe.experts.15.w3", "model.layers.3.block_sparse_moe.experts.16.w3", "model.layers.3.block_sparse_moe.experts.17.w3", "model.layers.3.block_sparse_moe.experts.18.w3", "model.layers.3.block_sparse_moe.experts.19.w3", "model.layers.3.block_sparse_moe.experts.20.w3", "model.layers.3.block_sparse_moe.experts.21.w3", "model.layers.3.block_sparse_moe.experts.22.w3", "model.layers.3.block_sparse_moe.experts.23.w3", "model.layers.3.block_sparse_moe.experts.24.w3", "model.layers.3.block_sparse_moe.experts.25.w3", "model.layers.3.block_sparse_moe.experts.26.w3", "model.layers.3.block_sparse_moe.experts.27.w3", "model.layers.3.block_sparse_moe.experts.28.w3", "model.layers.3.block_sparse_moe.experts.29.w3", "model.layers.3.block_sparse_moe.experts.30.w3", "model.layers.3.block_sparse_moe.experts.31.w3", "model.layers.3.block_sparse_moe.experts.32.w3", "model.layers.3.block_sparse_moe.experts.33.w3", "model.layers.3.block_sparse_moe.experts.34.w3", "model.layers.3.block_sparse_moe.experts.35.w3", "model.layers.3.block_sparse_moe.experts.36.w3", "model.layers.3.block_sparse_moe.experts.37.w3", "model.layers.3.block_sparse_moe.experts.38.w3", "model.layers.3.block_sparse_moe.experts.39.w3", "model.layers.3.block_sparse_moe.experts.40.w3", "model.layers.3.block_sparse_moe.experts.41.w3", "model.layers.3.block_sparse_moe.experts.42.w3", "model.layers.3.block_sparse_moe.experts.43.w3", "model.layers.3.block_sparse_moe.experts.44.w3", "model.layers.3.block_sparse_moe.experts.45.w3", "model.layers.3.block_sparse_moe.experts.46.w3", "model.layers.3.block_sparse_moe.experts.47.w3", "model.layers.3.block_sparse_moe.experts.48.w3", "model.layers.3.block_sparse_moe.experts.49.w3", "model.layers.3.block_sparse_moe.experts.50.w3", "model.layers.3.block_sparse_moe.experts.51.w3", "model.layers.3.block_sparse_moe.experts.52.w3", "model.layers.3.block_sparse_moe.experts.53.w3", "model.layers.3.block_sparse_moe.experts.54.w3", "model.layers.3.block_sparse_moe.experts.55.w3", "model.layers.3.block_sparse_moe.experts.56.w3", "model.layers.3.block_sparse_moe.experts.57.w3", "model.layers.3.block_sparse_moe.experts.58.w3", "model.layers.3.block_sparse_moe.experts.59.w3", "model.layers.3.block_sparse_moe.experts.60.w3", "model.layers.3.block_sparse_moe.experts.61.w3", "model.layers.3.block_sparse_moe.experts.62.w3", "model.layers.3.block_sparse_moe.experts.63.w3", "model.layers.3.block_sparse_moe.experts.64.w3", "model.layers.3.block_sparse_moe.experts.65.w3", "model.layers.3.block_sparse_moe.experts.66.w3", "model.layers.3.block_sparse_moe.experts.67.w3", "model.layers.3.block_sparse_moe.experts.68.w3", "model.layers.3.block_sparse_moe.experts.69.w3", "model.layers.3.block_sparse_moe.experts.70.w3", "model.layers.3.block_sparse_moe.experts.71.w3", "model.layers.3.block_sparse_moe.experts.72.w3", "model.layers.3.block_sparse_moe.experts.73.w3", "model.layers.3.block_sparse_moe.experts.74.w3", "model.layers.3.block_sparse_moe.experts.75.w3", "model.layers.3.block_sparse_moe.experts.76.w3", "model.layers.3.block_sparse_moe.experts.77.w3", "model.layers.3.block_sparse_moe.experts.78.w3", "model.layers.3.block_sparse_moe.experts.79.w3", "model.layers.3.block_sparse_moe.experts.80.w3", "model.layers.3.block_sparse_moe.experts.81.w3", "model.layers.3.block_sparse_moe.experts.82.w3", "model.layers.3.block_sparse_moe.experts.83.w3", "model.layers.3.block_sparse_moe.experts.84.w3", "model.layers.3.block_sparse_moe.experts.85.w3", "model.layers.3.block_sparse_moe.experts.86.w3", "model.layers.3.block_sparse_moe.experts.87.w3", "model.layers.3.block_sparse_moe.experts.88.w3", "model.layers.3.block_sparse_moe.experts.89.w3", "model.layers.3.block_sparse_moe.experts.90.w3", "model.layers.3.block_sparse_moe.experts.91.w3", "model.layers.3.block_sparse_moe.experts.92.w3", "model.layers.3.block_sparse_moe.experts.93.w3", "model.layers.3.block_sparse_moe.experts.94.w3", "model.layers.3.block_sparse_moe.experts.95.w3", "model.layers.3.block_sparse_moe.experts.96.w3", "model.layers.3.block_sparse_moe.experts.97.w3", "model.layers.3.block_sparse_moe.experts.98.w3", "model.layers.3.block_sparse_moe.experts.99.w3", "model.layers.3.block_sparse_moe.experts.100.w3", "model.layers.3.block_sparse_moe.experts.101.w3", "model.layers.3.block_sparse_moe.experts.102.w3", "model.layers.3.block_sparse_moe.experts.103.w3", "model.layers.3.block_sparse_moe.experts.104.w3", "model.layers.3.block_sparse_moe.experts.105.w3", "model.layers.3.block_sparse_moe.experts.106.w3", "model.layers.3.block_sparse_moe.experts.107.w3", "model.layers.3.block_sparse_moe.experts.108.w3", "model.layers.3.block_sparse_moe.experts.109.w3", "model.layers.3.block_sparse_moe.experts.110.w3", "model.layers.3.block_sparse_moe.experts.111.w3", "model.layers.3.block_sparse_moe.experts.112.w3", "model.layers.3.block_sparse_moe.experts.113.w3", "model.layers.3.block_sparse_moe.experts.114.w3", "model.layers.3.block_sparse_moe.experts.115.w3", "model.layers.3.block_sparse_moe.experts.116.w3", "model.layers.3.block_sparse_moe.experts.117.w3", "model.layers.3.block_sparse_moe.experts.118.w3", "model.layers.3.block_sparse_moe.experts.119.w3", "model.layers.3.block_sparse_moe.experts.120.w3", "model.layers.3.block_sparse_moe.experts.121.w3", "model.layers.3.block_sparse_moe.experts.122.w3", "model.layers.3.block_sparse_moe.experts.123.w3", "model.layers.3.block_sparse_moe.experts.124.w3", "model.layers.3.block_sparse_moe.experts.125.w3", "model.layers.3.block_sparse_moe.experts.126.w3", "model.layers.3.block_sparse_moe.experts.127.w3", "model.layers.3.block_sparse_moe.experts.128.w3", "model.layers.3.block_sparse_moe.experts.129.w3", "model.layers.3.block_sparse_moe.experts.130.w3", "model.layers.3.block_sparse_moe.experts.131.w3", "model.layers.3.block_sparse_moe.experts.132.w3", "model.layers.3.block_sparse_moe.experts.133.w3", "model.layers.3.block_sparse_moe.experts.134.w3", "model.layers.3.block_sparse_moe.experts.135.w3", "model.layers.3.block_sparse_moe.experts.136.w3", "model.layers.3.block_sparse_moe.experts.137.w3", "model.layers.3.block_sparse_moe.experts.138.w3", "model.layers.3.block_sparse_moe.experts.139.w3", "model.layers.3.block_sparse_moe.experts.140.w3", "model.layers.3.block_sparse_moe.experts.141.w3", "model.layers.3.block_sparse_moe.experts.142.w3", "model.layers.3.block_sparse_moe.experts.143.w3", "model.layers.3.block_sparse_moe.experts.144.w3", "model.layers.3.block_sparse_moe.experts.145.w3", "model.layers.3.block_sparse_moe.experts.146.w3", "model.layers.3.block_sparse_moe.experts.147.w3", "model.layers.3.block_sparse_moe.experts.148.w3", "model.layers.3.block_sparse_moe.experts.149.w3", "model.layers.3.block_sparse_moe.experts.150.w3", "model.layers.3.block_sparse_moe.experts.151.w3", "model.layers.3.block_sparse_moe.experts.152.w3", "model.layers.3.block_sparse_moe.experts.153.w3", "model.layers.3.block_sparse_moe.experts.154.w3", "model.layers.3.block_sparse_moe.experts.155.w3", "model.layers.3.block_sparse_moe.experts.156.w3", "model.layers.3.block_sparse_moe.experts.157.w3", "model.layers.3.block_sparse_moe.experts.158.w3", "model.layers.3.block_sparse_moe.experts.159.w3", "model.layers.3.block_sparse_moe.experts.160.w3", "model.layers.3.block_sparse_moe.experts.161.w3", "model.layers.3.block_sparse_moe.experts.162.w3", "model.layers.3.block_sparse_moe.experts.163.w3", "model.layers.3.block_sparse_moe.experts.164.w3", "model.layers.3.block_sparse_moe.experts.165.w3", "model.layers.3.block_sparse_moe.experts.166.w3", "model.layers.3.block_sparse_moe.experts.167.w3", "model.layers.3.block_sparse_moe.experts.168.w3", "model.layers.3.block_sparse_moe.experts.169.w3", "model.layers.3.block_sparse_moe.experts.170.w3", "model.layers.3.block_sparse_moe.experts.171.w3", "model.layers.3.block_sparse_moe.experts.172.w3", "model.layers.3.block_sparse_moe.experts.173.w3", "model.layers.3.block_sparse_moe.experts.174.w3", "model.layers.3.block_sparse_moe.experts.175.w3", "model.layers.3.block_sparse_moe.experts.176.w3", "model.layers.3.block_sparse_moe.experts.177.w3", "model.layers.3.block_sparse_moe.experts.178.w3", "model.layers.3.block_sparse_moe.experts.179.w3", "model.layers.3.block_sparse_moe.experts.180.w3", "model.layers.3.block_sparse_moe.experts.181.w3", "model.layers.3.block_sparse_moe.experts.182.w3", "model.layers.3.block_sparse_moe.experts.183.w3", "model.layers.3.block_sparse_moe.experts.184.w3", "model.layers.3.block_sparse_moe.experts.185.w3", "model.layers.3.block_sparse_moe.experts.186.w3", "model.layers.3.block_sparse_moe.experts.187.w3", "model.layers.3.block_sparse_moe.experts.188.w3", "model.layers.3.block_sparse_moe.experts.189.w3", "model.layers.3.block_sparse_moe.experts.190.w3", "model.layers.3.block_sparse_moe.experts.191.w3", "model.layers.3.block_sparse_moe.experts.192.w3", "model.layers.3.block_sparse_moe.experts.193.w3", "model.layers.3.block_sparse_moe.experts.194.w3", "model.layers.3.block_sparse_moe.experts.195.w3", "model.layers.3.block_sparse_moe.experts.196.w3", "model.layers.3.block_sparse_moe.experts.197.w3", "model.layers.3.block_sparse_moe.experts.198.w3", "model.layers.3.block_sparse_moe.experts.199.w3", "model.layers.3.block_sparse_moe.experts.200.w3", "model.layers.3.block_sparse_moe.experts.201.w3", "model.layers.3.block_sparse_moe.experts.202.w3", "model.layers.3.block_sparse_moe.experts.203.w3", "model.layers.3.block_sparse_moe.experts.204.w3", "model.layers.3.block_sparse_moe.experts.205.w3", "model.layers.3.block_sparse_moe.experts.206.w3", "model.layers.3.block_sparse_moe.experts.207.w3", "model.layers.3.block_sparse_moe.experts.208.w3", "model.layers.3.block_sparse_moe.experts.209.w3", "model.layers.3.block_sparse_moe.experts.210.w3", "model.layers.3.block_sparse_moe.experts.211.w3", "model.layers.3.block_sparse_moe.experts.212.w3", "model.layers.3.block_sparse_moe.experts.213.w3", "model.layers.3.block_sparse_moe.experts.214.w3", "model.layers.3.block_sparse_moe.experts.215.w3", "model.layers.3.block_sparse_moe.experts.216.w3", "model.layers.3.block_sparse_moe.experts.217.w3", "model.layers.3.block_sparse_moe.experts.218.w3", "model.layers.3.block_sparse_moe.experts.219.w3", "model.layers.3.block_sparse_moe.experts.220.w3", "model.layers.3.block_sparse_moe.experts.221.w3", "model.layers.3.block_sparse_moe.experts.222.w3", "model.layers.3.block_sparse_moe.experts.223.w3", "model.layers.3.block_sparse_moe.experts.224.w3", "model.layers.3.block_sparse_moe.experts.225.w3", "model.layers.3.block_sparse_moe.experts.226.w3", "model.layers.3.block_sparse_moe.experts.227.w3", "model.layers.3.block_sparse_moe.experts.228.w3", "model.layers.3.block_sparse_moe.experts.229.w3", "model.layers.3.block_sparse_moe.experts.230.w3", "model.layers.3.block_sparse_moe.experts.231.w3", "model.layers.3.block_sparse_moe.experts.232.w3", "model.layers.3.block_sparse_moe.experts.233.w3", "model.layers.3.block_sparse_moe.experts.234.w3", "model.layers.3.block_sparse_moe.experts.235.w3", "model.layers.3.block_sparse_moe.experts.236.w3", "model.layers.3.block_sparse_moe.experts.237.w3", "model.layers.3.block_sparse_moe.experts.238.w3", "model.layers.3.block_sparse_moe.experts.239.w3", "model.layers.3.block_sparse_moe.experts.240.w3", "model.layers.3.block_sparse_moe.experts.241.w3", "model.layers.3.block_sparse_moe.experts.242.w3", "model.layers.3.block_sparse_moe.experts.243.w3", "model.layers.3.block_sparse_moe.experts.244.w3", "model.layers.3.block_sparse_moe.experts.245.w3", "model.layers.3.block_sparse_moe.experts.246.w3", "model.layers.3.block_sparse_moe.experts.247.w3", "model.layers.3.block_sparse_moe.experts.248.w3", "model.layers.3.block_sparse_moe.experts.249.w3", "model.layers.3.block_sparse_moe.experts.250.w3", "model.layers.3.block_sparse_moe.experts.251.w3", "model.layers.3.block_sparse_moe.experts.252.w3", "model.layers.3.block_sparse_moe.experts.253.w3", "model.layers.3.block_sparse_moe.experts.254.w3", "model.layers.3.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00123582445085052, "dbits": 2415919104 } ] }, { "idx": 19, "layers": [ "model.layers.3.block_sparse_moe.experts.0.w2", "model.layers.3.block_sparse_moe.experts.1.w2", "model.layers.3.block_sparse_moe.experts.2.w2", "model.layers.3.block_sparse_moe.experts.3.w2", "model.layers.3.block_sparse_moe.experts.4.w2", "model.layers.3.block_sparse_moe.experts.5.w2", "model.layers.3.block_sparse_moe.experts.6.w2", "model.layers.3.block_sparse_moe.experts.7.w2", "model.layers.3.block_sparse_moe.experts.8.w2", "model.layers.3.block_sparse_moe.experts.9.w2", "model.layers.3.block_sparse_moe.experts.10.w2", "model.layers.3.block_sparse_moe.experts.11.w2", "model.layers.3.block_sparse_moe.experts.12.w2", "model.layers.3.block_sparse_moe.experts.13.w2", "model.layers.3.block_sparse_moe.experts.14.w2", "model.layers.3.block_sparse_moe.experts.15.w2", "model.layers.3.block_sparse_moe.experts.16.w2", "model.layers.3.block_sparse_moe.experts.17.w2", "model.layers.3.block_sparse_moe.experts.18.w2", "model.layers.3.block_sparse_moe.experts.19.w2", "model.layers.3.block_sparse_moe.experts.20.w2", "model.layers.3.block_sparse_moe.experts.21.w2", "model.layers.3.block_sparse_moe.experts.22.w2", "model.layers.3.block_sparse_moe.experts.23.w2", "model.layers.3.block_sparse_moe.experts.24.w2", "model.layers.3.block_sparse_moe.experts.25.w2", "model.layers.3.block_sparse_moe.experts.26.w2", "model.layers.3.block_sparse_moe.experts.27.w2", "model.layers.3.block_sparse_moe.experts.28.w2", "model.layers.3.block_sparse_moe.experts.29.w2", "model.layers.3.block_sparse_moe.experts.30.w2", "model.layers.3.block_sparse_moe.experts.31.w2", "model.layers.3.block_sparse_moe.experts.32.w2", "model.layers.3.block_sparse_moe.experts.33.w2", "model.layers.3.block_sparse_moe.experts.34.w2", "model.layers.3.block_sparse_moe.experts.35.w2", "model.layers.3.block_sparse_moe.experts.36.w2", "model.layers.3.block_sparse_moe.experts.37.w2", "model.layers.3.block_sparse_moe.experts.38.w2", "model.layers.3.block_sparse_moe.experts.39.w2", "model.layers.3.block_sparse_moe.experts.40.w2", "model.layers.3.block_sparse_moe.experts.41.w2", "model.layers.3.block_sparse_moe.experts.42.w2", "model.layers.3.block_sparse_moe.experts.43.w2", "model.layers.3.block_sparse_moe.experts.44.w2", "model.layers.3.block_sparse_moe.experts.45.w2", "model.layers.3.block_sparse_moe.experts.46.w2", "model.layers.3.block_sparse_moe.experts.47.w2", "model.layers.3.block_sparse_moe.experts.48.w2", "model.layers.3.block_sparse_moe.experts.49.w2", "model.layers.3.block_sparse_moe.experts.50.w2", "model.layers.3.block_sparse_moe.experts.51.w2", "model.layers.3.block_sparse_moe.experts.52.w2", "model.layers.3.block_sparse_moe.experts.53.w2", "model.layers.3.block_sparse_moe.experts.54.w2", "model.layers.3.block_sparse_moe.experts.55.w2", "model.layers.3.block_sparse_moe.experts.56.w2", "model.layers.3.block_sparse_moe.experts.57.w2", "model.layers.3.block_sparse_moe.experts.58.w2", "model.layers.3.block_sparse_moe.experts.59.w2", "model.layers.3.block_sparse_moe.experts.60.w2", "model.layers.3.block_sparse_moe.experts.61.w2", "model.layers.3.block_sparse_moe.experts.62.w2", "model.layers.3.block_sparse_moe.experts.63.w2", "model.layers.3.block_sparse_moe.experts.64.w2", "model.layers.3.block_sparse_moe.experts.65.w2", "model.layers.3.block_sparse_moe.experts.66.w2", "model.layers.3.block_sparse_moe.experts.67.w2", "model.layers.3.block_sparse_moe.experts.68.w2", "model.layers.3.block_sparse_moe.experts.69.w2", "model.layers.3.block_sparse_moe.experts.70.w2", "model.layers.3.block_sparse_moe.experts.71.w2", "model.layers.3.block_sparse_moe.experts.72.w2", "model.layers.3.block_sparse_moe.experts.73.w2", "model.layers.3.block_sparse_moe.experts.74.w2", "model.layers.3.block_sparse_moe.experts.75.w2", "model.layers.3.block_sparse_moe.experts.76.w2", "model.layers.3.block_sparse_moe.experts.77.w2", "model.layers.3.block_sparse_moe.experts.78.w2", "model.layers.3.block_sparse_moe.experts.79.w2", "model.layers.3.block_sparse_moe.experts.80.w2", "model.layers.3.block_sparse_moe.experts.81.w2", "model.layers.3.block_sparse_moe.experts.82.w2", "model.layers.3.block_sparse_moe.experts.83.w2", "model.layers.3.block_sparse_moe.experts.84.w2", "model.layers.3.block_sparse_moe.experts.85.w2", "model.layers.3.block_sparse_moe.experts.86.w2", "model.layers.3.block_sparse_moe.experts.87.w2", "model.layers.3.block_sparse_moe.experts.88.w2", "model.layers.3.block_sparse_moe.experts.89.w2", "model.layers.3.block_sparse_moe.experts.90.w2", "model.layers.3.block_sparse_moe.experts.91.w2", "model.layers.3.block_sparse_moe.experts.92.w2", "model.layers.3.block_sparse_moe.experts.93.w2", "model.layers.3.block_sparse_moe.experts.94.w2", "model.layers.3.block_sparse_moe.experts.95.w2", "model.layers.3.block_sparse_moe.experts.96.w2", "model.layers.3.block_sparse_moe.experts.97.w2", "model.layers.3.block_sparse_moe.experts.98.w2", "model.layers.3.block_sparse_moe.experts.99.w2", "model.layers.3.block_sparse_moe.experts.100.w2", "model.layers.3.block_sparse_moe.experts.101.w2", "model.layers.3.block_sparse_moe.experts.102.w2", "model.layers.3.block_sparse_moe.experts.103.w2", "model.layers.3.block_sparse_moe.experts.104.w2", "model.layers.3.block_sparse_moe.experts.105.w2", "model.layers.3.block_sparse_moe.experts.106.w2", "model.layers.3.block_sparse_moe.experts.107.w2", "model.layers.3.block_sparse_moe.experts.108.w2", "model.layers.3.block_sparse_moe.experts.109.w2", "model.layers.3.block_sparse_moe.experts.110.w2", "model.layers.3.block_sparse_moe.experts.111.w2", "model.layers.3.block_sparse_moe.experts.112.w2", "model.layers.3.block_sparse_moe.experts.113.w2", "model.layers.3.block_sparse_moe.experts.114.w2", "model.layers.3.block_sparse_moe.experts.115.w2", "model.layers.3.block_sparse_moe.experts.116.w2", "model.layers.3.block_sparse_moe.experts.117.w2", "model.layers.3.block_sparse_moe.experts.118.w2", "model.layers.3.block_sparse_moe.experts.119.w2", "model.layers.3.block_sparse_moe.experts.120.w2", "model.layers.3.block_sparse_moe.experts.121.w2", "model.layers.3.block_sparse_moe.experts.122.w2", "model.layers.3.block_sparse_moe.experts.123.w2", "model.layers.3.block_sparse_moe.experts.124.w2", "model.layers.3.block_sparse_moe.experts.125.w2", "model.layers.3.block_sparse_moe.experts.126.w2", "model.layers.3.block_sparse_moe.experts.127.w2", "model.layers.3.block_sparse_moe.experts.128.w2", "model.layers.3.block_sparse_moe.experts.129.w2", "model.layers.3.block_sparse_moe.experts.130.w2", "model.layers.3.block_sparse_moe.experts.131.w2", "model.layers.3.block_sparse_moe.experts.132.w2", "model.layers.3.block_sparse_moe.experts.133.w2", "model.layers.3.block_sparse_moe.experts.134.w2", "model.layers.3.block_sparse_moe.experts.135.w2", "model.layers.3.block_sparse_moe.experts.136.w2", "model.layers.3.block_sparse_moe.experts.137.w2", "model.layers.3.block_sparse_moe.experts.138.w2", "model.layers.3.block_sparse_moe.experts.139.w2", "model.layers.3.block_sparse_moe.experts.140.w2", "model.layers.3.block_sparse_moe.experts.141.w2", "model.layers.3.block_sparse_moe.experts.142.w2", "model.layers.3.block_sparse_moe.experts.143.w2", "model.layers.3.block_sparse_moe.experts.144.w2", "model.layers.3.block_sparse_moe.experts.145.w2", "model.layers.3.block_sparse_moe.experts.146.w2", "model.layers.3.block_sparse_moe.experts.147.w2", "model.layers.3.block_sparse_moe.experts.148.w2", "model.layers.3.block_sparse_moe.experts.149.w2", "model.layers.3.block_sparse_moe.experts.150.w2", "model.layers.3.block_sparse_moe.experts.151.w2", "model.layers.3.block_sparse_moe.experts.152.w2", "model.layers.3.block_sparse_moe.experts.153.w2", "model.layers.3.block_sparse_moe.experts.154.w2", "model.layers.3.block_sparse_moe.experts.155.w2", "model.layers.3.block_sparse_moe.experts.156.w2", "model.layers.3.block_sparse_moe.experts.157.w2", "model.layers.3.block_sparse_moe.experts.158.w2", "model.layers.3.block_sparse_moe.experts.159.w2", "model.layers.3.block_sparse_moe.experts.160.w2", "model.layers.3.block_sparse_moe.experts.161.w2", "model.layers.3.block_sparse_moe.experts.162.w2", "model.layers.3.block_sparse_moe.experts.163.w2", "model.layers.3.block_sparse_moe.experts.164.w2", "model.layers.3.block_sparse_moe.experts.165.w2", "model.layers.3.block_sparse_moe.experts.166.w2", "model.layers.3.block_sparse_moe.experts.167.w2", "model.layers.3.block_sparse_moe.experts.168.w2", "model.layers.3.block_sparse_moe.experts.169.w2", "model.layers.3.block_sparse_moe.experts.170.w2", "model.layers.3.block_sparse_moe.experts.171.w2", "model.layers.3.block_sparse_moe.experts.172.w2", "model.layers.3.block_sparse_moe.experts.173.w2", "model.layers.3.block_sparse_moe.experts.174.w2", "model.layers.3.block_sparse_moe.experts.175.w2", "model.layers.3.block_sparse_moe.experts.176.w2", "model.layers.3.block_sparse_moe.experts.177.w2", "model.layers.3.block_sparse_moe.experts.178.w2", "model.layers.3.block_sparse_moe.experts.179.w2", "model.layers.3.block_sparse_moe.experts.180.w2", "model.layers.3.block_sparse_moe.experts.181.w2", "model.layers.3.block_sparse_moe.experts.182.w2", "model.layers.3.block_sparse_moe.experts.183.w2", "model.layers.3.block_sparse_moe.experts.184.w2", "model.layers.3.block_sparse_moe.experts.185.w2", "model.layers.3.block_sparse_moe.experts.186.w2", "model.layers.3.block_sparse_moe.experts.187.w2", "model.layers.3.block_sparse_moe.experts.188.w2", "model.layers.3.block_sparse_moe.experts.189.w2", "model.layers.3.block_sparse_moe.experts.190.w2", "model.layers.3.block_sparse_moe.experts.191.w2", "model.layers.3.block_sparse_moe.experts.192.w2", "model.layers.3.block_sparse_moe.experts.193.w2", "model.layers.3.block_sparse_moe.experts.194.w2", "model.layers.3.block_sparse_moe.experts.195.w2", "model.layers.3.block_sparse_moe.experts.196.w2", "model.layers.3.block_sparse_moe.experts.197.w2", "model.layers.3.block_sparse_moe.experts.198.w2", "model.layers.3.block_sparse_moe.experts.199.w2", "model.layers.3.block_sparse_moe.experts.200.w2", "model.layers.3.block_sparse_moe.experts.201.w2", "model.layers.3.block_sparse_moe.experts.202.w2", "model.layers.3.block_sparse_moe.experts.203.w2", "model.layers.3.block_sparse_moe.experts.204.w2", "model.layers.3.block_sparse_moe.experts.205.w2", "model.layers.3.block_sparse_moe.experts.206.w2", "model.layers.3.block_sparse_moe.experts.207.w2", "model.layers.3.block_sparse_moe.experts.208.w2", "model.layers.3.block_sparse_moe.experts.209.w2", "model.layers.3.block_sparse_moe.experts.210.w2", "model.layers.3.block_sparse_moe.experts.211.w2", "model.layers.3.block_sparse_moe.experts.212.w2", "model.layers.3.block_sparse_moe.experts.213.w2", "model.layers.3.block_sparse_moe.experts.214.w2", "model.layers.3.block_sparse_moe.experts.215.w2", "model.layers.3.block_sparse_moe.experts.216.w2", "model.layers.3.block_sparse_moe.experts.217.w2", "model.layers.3.block_sparse_moe.experts.218.w2", "model.layers.3.block_sparse_moe.experts.219.w2", "model.layers.3.block_sparse_moe.experts.220.w2", "model.layers.3.block_sparse_moe.experts.221.w2", "model.layers.3.block_sparse_moe.experts.222.w2", "model.layers.3.block_sparse_moe.experts.223.w2", "model.layers.3.block_sparse_moe.experts.224.w2", "model.layers.3.block_sparse_moe.experts.225.w2", "model.layers.3.block_sparse_moe.experts.226.w2", "model.layers.3.block_sparse_moe.experts.227.w2", "model.layers.3.block_sparse_moe.experts.228.w2", "model.layers.3.block_sparse_moe.experts.229.w2", "model.layers.3.block_sparse_moe.experts.230.w2", "model.layers.3.block_sparse_moe.experts.231.w2", "model.layers.3.block_sparse_moe.experts.232.w2", "model.layers.3.block_sparse_moe.experts.233.w2", "model.layers.3.block_sparse_moe.experts.234.w2", "model.layers.3.block_sparse_moe.experts.235.w2", "model.layers.3.block_sparse_moe.experts.236.w2", "model.layers.3.block_sparse_moe.experts.237.w2", "model.layers.3.block_sparse_moe.experts.238.w2", "model.layers.3.block_sparse_moe.experts.239.w2", "model.layers.3.block_sparse_moe.experts.240.w2", "model.layers.3.block_sparse_moe.experts.241.w2", "model.layers.3.block_sparse_moe.experts.242.w2", "model.layers.3.block_sparse_moe.experts.243.w2", "model.layers.3.block_sparse_moe.experts.244.w2", "model.layers.3.block_sparse_moe.experts.245.w2", "model.layers.3.block_sparse_moe.experts.246.w2", "model.layers.3.block_sparse_moe.experts.247.w2", "model.layers.3.block_sparse_moe.experts.248.w2", "model.layers.3.block_sparse_moe.experts.249.w2", "model.layers.3.block_sparse_moe.experts.250.w2", "model.layers.3.block_sparse_moe.experts.251.w2", "model.layers.3.block_sparse_moe.experts.252.w2", "model.layers.3.block_sparse_moe.experts.253.w2", "model.layers.3.block_sparse_moe.experts.254.w2", "model.layers.3.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0003466866910457611, "dbits": 1207959552 } ] }, { "idx": 20, "layers": [ "model.layers.4.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0008498895913362503, "dbits": 18874368 } ] }, { "idx": 21, "layers": [ "model.layers.4.self_attn.k_proj", "model.layers.4.self_attn.v_proj" ], "candidates": [ { "dkld": -0.008371177315711964, "dbits": 6291456 } ] }, { "idx": 22, "layers": [ "model.layers.4.self_attn.o_proj" ], "candidates": [ { "dkld": -0.008039718121290229, "dbits": 18874368 } ] }, { "idx": 23, "layers": [ "model.layers.4.block_sparse_moe.experts.0.w1", "model.layers.4.block_sparse_moe.experts.1.w1", "model.layers.4.block_sparse_moe.experts.2.w1", "model.layers.4.block_sparse_moe.experts.3.w1", "model.layers.4.block_sparse_moe.experts.4.w1", "model.layers.4.block_sparse_moe.experts.5.w1", "model.layers.4.block_sparse_moe.experts.6.w1", "model.layers.4.block_sparse_moe.experts.7.w1", "model.layers.4.block_sparse_moe.experts.8.w1", "model.layers.4.block_sparse_moe.experts.9.w1", "model.layers.4.block_sparse_moe.experts.10.w1", "model.layers.4.block_sparse_moe.experts.11.w1", "model.layers.4.block_sparse_moe.experts.12.w1", "model.layers.4.block_sparse_moe.experts.13.w1", "model.layers.4.block_sparse_moe.experts.14.w1", "model.layers.4.block_sparse_moe.experts.15.w1", "model.layers.4.block_sparse_moe.experts.16.w1", "model.layers.4.block_sparse_moe.experts.17.w1", "model.layers.4.block_sparse_moe.experts.18.w1", "model.layers.4.block_sparse_moe.experts.19.w1", "model.layers.4.block_sparse_moe.experts.20.w1", "model.layers.4.block_sparse_moe.experts.21.w1", "model.layers.4.block_sparse_moe.experts.22.w1", "model.layers.4.block_sparse_moe.experts.23.w1", "model.layers.4.block_sparse_moe.experts.24.w1", "model.layers.4.block_sparse_moe.experts.25.w1", "model.layers.4.block_sparse_moe.experts.26.w1", "model.layers.4.block_sparse_moe.experts.27.w1", "model.layers.4.block_sparse_moe.experts.28.w1", "model.layers.4.block_sparse_moe.experts.29.w1", "model.layers.4.block_sparse_moe.experts.30.w1", "model.layers.4.block_sparse_moe.experts.31.w1", "model.layers.4.block_sparse_moe.experts.32.w1", "model.layers.4.block_sparse_moe.experts.33.w1", "model.layers.4.block_sparse_moe.experts.34.w1", "model.layers.4.block_sparse_moe.experts.35.w1", "model.layers.4.block_sparse_moe.experts.36.w1", "model.layers.4.block_sparse_moe.experts.37.w1", "model.layers.4.block_sparse_moe.experts.38.w1", "model.layers.4.block_sparse_moe.experts.39.w1", "model.layers.4.block_sparse_moe.experts.40.w1", "model.layers.4.block_sparse_moe.experts.41.w1", "model.layers.4.block_sparse_moe.experts.42.w1", "model.layers.4.block_sparse_moe.experts.43.w1", "model.layers.4.block_sparse_moe.experts.44.w1", "model.layers.4.block_sparse_moe.experts.45.w1", "model.layers.4.block_sparse_moe.experts.46.w1", "model.layers.4.block_sparse_moe.experts.47.w1", "model.layers.4.block_sparse_moe.experts.48.w1", "model.layers.4.block_sparse_moe.experts.49.w1", "model.layers.4.block_sparse_moe.experts.50.w1", "model.layers.4.block_sparse_moe.experts.51.w1", "model.layers.4.block_sparse_moe.experts.52.w1", "model.layers.4.block_sparse_moe.experts.53.w1", "model.layers.4.block_sparse_moe.experts.54.w1", "model.layers.4.block_sparse_moe.experts.55.w1", "model.layers.4.block_sparse_moe.experts.56.w1", "model.layers.4.block_sparse_moe.experts.57.w1", "model.layers.4.block_sparse_moe.experts.58.w1", "model.layers.4.block_sparse_moe.experts.59.w1", "model.layers.4.block_sparse_moe.experts.60.w1", "model.layers.4.block_sparse_moe.experts.61.w1", "model.layers.4.block_sparse_moe.experts.62.w1", "model.layers.4.block_sparse_moe.experts.63.w1", "model.layers.4.block_sparse_moe.experts.64.w1", "model.layers.4.block_sparse_moe.experts.65.w1", "model.layers.4.block_sparse_moe.experts.66.w1", "model.layers.4.block_sparse_moe.experts.67.w1", "model.layers.4.block_sparse_moe.experts.68.w1", "model.layers.4.block_sparse_moe.experts.69.w1", "model.layers.4.block_sparse_moe.experts.70.w1", "model.layers.4.block_sparse_moe.experts.71.w1", "model.layers.4.block_sparse_moe.experts.72.w1", "model.layers.4.block_sparse_moe.experts.73.w1", "model.layers.4.block_sparse_moe.experts.74.w1", "model.layers.4.block_sparse_moe.experts.75.w1", "model.layers.4.block_sparse_moe.experts.76.w1", "model.layers.4.block_sparse_moe.experts.77.w1", "model.layers.4.block_sparse_moe.experts.78.w1", "model.layers.4.block_sparse_moe.experts.79.w1", "model.layers.4.block_sparse_moe.experts.80.w1", "model.layers.4.block_sparse_moe.experts.81.w1", "model.layers.4.block_sparse_moe.experts.82.w1", "model.layers.4.block_sparse_moe.experts.83.w1", "model.layers.4.block_sparse_moe.experts.84.w1", "model.layers.4.block_sparse_moe.experts.85.w1", "model.layers.4.block_sparse_moe.experts.86.w1", "model.layers.4.block_sparse_moe.experts.87.w1", "model.layers.4.block_sparse_moe.experts.88.w1", "model.layers.4.block_sparse_moe.experts.89.w1", "model.layers.4.block_sparse_moe.experts.90.w1", "model.layers.4.block_sparse_moe.experts.91.w1", "model.layers.4.block_sparse_moe.experts.92.w1", "model.layers.4.block_sparse_moe.experts.93.w1", "model.layers.4.block_sparse_moe.experts.94.w1", "model.layers.4.block_sparse_moe.experts.95.w1", "model.layers.4.block_sparse_moe.experts.96.w1", "model.layers.4.block_sparse_moe.experts.97.w1", "model.layers.4.block_sparse_moe.experts.98.w1", "model.layers.4.block_sparse_moe.experts.99.w1", "model.layers.4.block_sparse_moe.experts.100.w1", "model.layers.4.block_sparse_moe.experts.101.w1", "model.layers.4.block_sparse_moe.experts.102.w1", "model.layers.4.block_sparse_moe.experts.103.w1", "model.layers.4.block_sparse_moe.experts.104.w1", "model.layers.4.block_sparse_moe.experts.105.w1", "model.layers.4.block_sparse_moe.experts.106.w1", "model.layers.4.block_sparse_moe.experts.107.w1", "model.layers.4.block_sparse_moe.experts.108.w1", "model.layers.4.block_sparse_moe.experts.109.w1", "model.layers.4.block_sparse_moe.experts.110.w1", "model.layers.4.block_sparse_moe.experts.111.w1", "model.layers.4.block_sparse_moe.experts.112.w1", "model.layers.4.block_sparse_moe.experts.113.w1", "model.layers.4.block_sparse_moe.experts.114.w1", "model.layers.4.block_sparse_moe.experts.115.w1", "model.layers.4.block_sparse_moe.experts.116.w1", "model.layers.4.block_sparse_moe.experts.117.w1", "model.layers.4.block_sparse_moe.experts.118.w1", "model.layers.4.block_sparse_moe.experts.119.w1", "model.layers.4.block_sparse_moe.experts.120.w1", "model.layers.4.block_sparse_moe.experts.121.w1", "model.layers.4.block_sparse_moe.experts.122.w1", "model.layers.4.block_sparse_moe.experts.123.w1", "model.layers.4.block_sparse_moe.experts.124.w1", "model.layers.4.block_sparse_moe.experts.125.w1", "model.layers.4.block_sparse_moe.experts.126.w1", "model.layers.4.block_sparse_moe.experts.127.w1", "model.layers.4.block_sparse_moe.experts.128.w1", "model.layers.4.block_sparse_moe.experts.129.w1", "model.layers.4.block_sparse_moe.experts.130.w1", "model.layers.4.block_sparse_moe.experts.131.w1", "model.layers.4.block_sparse_moe.experts.132.w1", "model.layers.4.block_sparse_moe.experts.133.w1", "model.layers.4.block_sparse_moe.experts.134.w1", "model.layers.4.block_sparse_moe.experts.135.w1", "model.layers.4.block_sparse_moe.experts.136.w1", "model.layers.4.block_sparse_moe.experts.137.w1", "model.layers.4.block_sparse_moe.experts.138.w1", "model.layers.4.block_sparse_moe.experts.139.w1", "model.layers.4.block_sparse_moe.experts.140.w1", "model.layers.4.block_sparse_moe.experts.141.w1", "model.layers.4.block_sparse_moe.experts.142.w1", "model.layers.4.block_sparse_moe.experts.143.w1", "model.layers.4.block_sparse_moe.experts.144.w1", "model.layers.4.block_sparse_moe.experts.145.w1", "model.layers.4.block_sparse_moe.experts.146.w1", "model.layers.4.block_sparse_moe.experts.147.w1", "model.layers.4.block_sparse_moe.experts.148.w1", "model.layers.4.block_sparse_moe.experts.149.w1", "model.layers.4.block_sparse_moe.experts.150.w1", "model.layers.4.block_sparse_moe.experts.151.w1", "model.layers.4.block_sparse_moe.experts.152.w1", "model.layers.4.block_sparse_moe.experts.153.w1", "model.layers.4.block_sparse_moe.experts.154.w1", "model.layers.4.block_sparse_moe.experts.155.w1", "model.layers.4.block_sparse_moe.experts.156.w1", "model.layers.4.block_sparse_moe.experts.157.w1", "model.layers.4.block_sparse_moe.experts.158.w1", "model.layers.4.block_sparse_moe.experts.159.w1", "model.layers.4.block_sparse_moe.experts.160.w1", "model.layers.4.block_sparse_moe.experts.161.w1", "model.layers.4.block_sparse_moe.experts.162.w1", "model.layers.4.block_sparse_moe.experts.163.w1", "model.layers.4.block_sparse_moe.experts.164.w1", "model.layers.4.block_sparse_moe.experts.165.w1", "model.layers.4.block_sparse_moe.experts.166.w1", "model.layers.4.block_sparse_moe.experts.167.w1", "model.layers.4.block_sparse_moe.experts.168.w1", "model.layers.4.block_sparse_moe.experts.169.w1", "model.layers.4.block_sparse_moe.experts.170.w1", "model.layers.4.block_sparse_moe.experts.171.w1", "model.layers.4.block_sparse_moe.experts.172.w1", "model.layers.4.block_sparse_moe.experts.173.w1", "model.layers.4.block_sparse_moe.experts.174.w1", "model.layers.4.block_sparse_moe.experts.175.w1", "model.layers.4.block_sparse_moe.experts.176.w1", "model.layers.4.block_sparse_moe.experts.177.w1", "model.layers.4.block_sparse_moe.experts.178.w1", "model.layers.4.block_sparse_moe.experts.179.w1", "model.layers.4.block_sparse_moe.experts.180.w1", "model.layers.4.block_sparse_moe.experts.181.w1", "model.layers.4.block_sparse_moe.experts.182.w1", "model.layers.4.block_sparse_moe.experts.183.w1", "model.layers.4.block_sparse_moe.experts.184.w1", "model.layers.4.block_sparse_moe.experts.185.w1", "model.layers.4.block_sparse_moe.experts.186.w1", "model.layers.4.block_sparse_moe.experts.187.w1", "model.layers.4.block_sparse_moe.experts.188.w1", "model.layers.4.block_sparse_moe.experts.189.w1", "model.layers.4.block_sparse_moe.experts.190.w1", "model.layers.4.block_sparse_moe.experts.191.w1", "model.layers.4.block_sparse_moe.experts.192.w1", "model.layers.4.block_sparse_moe.experts.193.w1", "model.layers.4.block_sparse_moe.experts.194.w1", "model.layers.4.block_sparse_moe.experts.195.w1", "model.layers.4.block_sparse_moe.experts.196.w1", "model.layers.4.block_sparse_moe.experts.197.w1", "model.layers.4.block_sparse_moe.experts.198.w1", "model.layers.4.block_sparse_moe.experts.199.w1", "model.layers.4.block_sparse_moe.experts.200.w1", "model.layers.4.block_sparse_moe.experts.201.w1", "model.layers.4.block_sparse_moe.experts.202.w1", "model.layers.4.block_sparse_moe.experts.203.w1", "model.layers.4.block_sparse_moe.experts.204.w1", "model.layers.4.block_sparse_moe.experts.205.w1", "model.layers.4.block_sparse_moe.experts.206.w1", "model.layers.4.block_sparse_moe.experts.207.w1", "model.layers.4.block_sparse_moe.experts.208.w1", "model.layers.4.block_sparse_moe.experts.209.w1", "model.layers.4.block_sparse_moe.experts.210.w1", "model.layers.4.block_sparse_moe.experts.211.w1", "model.layers.4.block_sparse_moe.experts.212.w1", "model.layers.4.block_sparse_moe.experts.213.w1", "model.layers.4.block_sparse_moe.experts.214.w1", "model.layers.4.block_sparse_moe.experts.215.w1", "model.layers.4.block_sparse_moe.experts.216.w1", "model.layers.4.block_sparse_moe.experts.217.w1", "model.layers.4.block_sparse_moe.experts.218.w1", "model.layers.4.block_sparse_moe.experts.219.w1", "model.layers.4.block_sparse_moe.experts.220.w1", "model.layers.4.block_sparse_moe.experts.221.w1", "model.layers.4.block_sparse_moe.experts.222.w1", "model.layers.4.block_sparse_moe.experts.223.w1", "model.layers.4.block_sparse_moe.experts.224.w1", "model.layers.4.block_sparse_moe.experts.225.w1", "model.layers.4.block_sparse_moe.experts.226.w1", "model.layers.4.block_sparse_moe.experts.227.w1", "model.layers.4.block_sparse_moe.experts.228.w1", "model.layers.4.block_sparse_moe.experts.229.w1", "model.layers.4.block_sparse_moe.experts.230.w1", "model.layers.4.block_sparse_moe.experts.231.w1", "model.layers.4.block_sparse_moe.experts.232.w1", "model.layers.4.block_sparse_moe.experts.233.w1", "model.layers.4.block_sparse_moe.experts.234.w1", "model.layers.4.block_sparse_moe.experts.235.w1", "model.layers.4.block_sparse_moe.experts.236.w1", "model.layers.4.block_sparse_moe.experts.237.w1", "model.layers.4.block_sparse_moe.experts.238.w1", "model.layers.4.block_sparse_moe.experts.239.w1", "model.layers.4.block_sparse_moe.experts.240.w1", "model.layers.4.block_sparse_moe.experts.241.w1", "model.layers.4.block_sparse_moe.experts.242.w1", "model.layers.4.block_sparse_moe.experts.243.w1", "model.layers.4.block_sparse_moe.experts.244.w1", "model.layers.4.block_sparse_moe.experts.245.w1", "model.layers.4.block_sparse_moe.experts.246.w1", "model.layers.4.block_sparse_moe.experts.247.w1", "model.layers.4.block_sparse_moe.experts.248.w1", "model.layers.4.block_sparse_moe.experts.249.w1", "model.layers.4.block_sparse_moe.experts.250.w1", "model.layers.4.block_sparse_moe.experts.251.w1", "model.layers.4.block_sparse_moe.experts.252.w1", "model.layers.4.block_sparse_moe.experts.253.w1", "model.layers.4.block_sparse_moe.experts.254.w1", "model.layers.4.block_sparse_moe.experts.255.w1", "model.layers.4.block_sparse_moe.experts.0.w3", "model.layers.4.block_sparse_moe.experts.1.w3", "model.layers.4.block_sparse_moe.experts.2.w3", "model.layers.4.block_sparse_moe.experts.3.w3", "model.layers.4.block_sparse_moe.experts.4.w3", "model.layers.4.block_sparse_moe.experts.5.w3", "model.layers.4.block_sparse_moe.experts.6.w3", "model.layers.4.block_sparse_moe.experts.7.w3", "model.layers.4.block_sparse_moe.experts.8.w3", "model.layers.4.block_sparse_moe.experts.9.w3", "model.layers.4.block_sparse_moe.experts.10.w3", "model.layers.4.block_sparse_moe.experts.11.w3", "model.layers.4.block_sparse_moe.experts.12.w3", "model.layers.4.block_sparse_moe.experts.13.w3", "model.layers.4.block_sparse_moe.experts.14.w3", "model.layers.4.block_sparse_moe.experts.15.w3", "model.layers.4.block_sparse_moe.experts.16.w3", "model.layers.4.block_sparse_moe.experts.17.w3", "model.layers.4.block_sparse_moe.experts.18.w3", "model.layers.4.block_sparse_moe.experts.19.w3", "model.layers.4.block_sparse_moe.experts.20.w3", "model.layers.4.block_sparse_moe.experts.21.w3", "model.layers.4.block_sparse_moe.experts.22.w3", "model.layers.4.block_sparse_moe.experts.23.w3", "model.layers.4.block_sparse_moe.experts.24.w3", "model.layers.4.block_sparse_moe.experts.25.w3", "model.layers.4.block_sparse_moe.experts.26.w3", "model.layers.4.block_sparse_moe.experts.27.w3", "model.layers.4.block_sparse_moe.experts.28.w3", "model.layers.4.block_sparse_moe.experts.29.w3", "model.layers.4.block_sparse_moe.experts.30.w3", "model.layers.4.block_sparse_moe.experts.31.w3", "model.layers.4.block_sparse_moe.experts.32.w3", "model.layers.4.block_sparse_moe.experts.33.w3", "model.layers.4.block_sparse_moe.experts.34.w3", "model.layers.4.block_sparse_moe.experts.35.w3", "model.layers.4.block_sparse_moe.experts.36.w3", "model.layers.4.block_sparse_moe.experts.37.w3", "model.layers.4.block_sparse_moe.experts.38.w3", "model.layers.4.block_sparse_moe.experts.39.w3", "model.layers.4.block_sparse_moe.experts.40.w3", "model.layers.4.block_sparse_moe.experts.41.w3", "model.layers.4.block_sparse_moe.experts.42.w3", "model.layers.4.block_sparse_moe.experts.43.w3", "model.layers.4.block_sparse_moe.experts.44.w3", "model.layers.4.block_sparse_moe.experts.45.w3", "model.layers.4.block_sparse_moe.experts.46.w3", "model.layers.4.block_sparse_moe.experts.47.w3", "model.layers.4.block_sparse_moe.experts.48.w3", "model.layers.4.block_sparse_moe.experts.49.w3", "model.layers.4.block_sparse_moe.experts.50.w3", "model.layers.4.block_sparse_moe.experts.51.w3", "model.layers.4.block_sparse_moe.experts.52.w3", "model.layers.4.block_sparse_moe.experts.53.w3", "model.layers.4.block_sparse_moe.experts.54.w3", "model.layers.4.block_sparse_moe.experts.55.w3", "model.layers.4.block_sparse_moe.experts.56.w3", "model.layers.4.block_sparse_moe.experts.57.w3", "model.layers.4.block_sparse_moe.experts.58.w3", "model.layers.4.block_sparse_moe.experts.59.w3", "model.layers.4.block_sparse_moe.experts.60.w3", "model.layers.4.block_sparse_moe.experts.61.w3", "model.layers.4.block_sparse_moe.experts.62.w3", "model.layers.4.block_sparse_moe.experts.63.w3", "model.layers.4.block_sparse_moe.experts.64.w3", "model.layers.4.block_sparse_moe.experts.65.w3", "model.layers.4.block_sparse_moe.experts.66.w3", "model.layers.4.block_sparse_moe.experts.67.w3", "model.layers.4.block_sparse_moe.experts.68.w3", "model.layers.4.block_sparse_moe.experts.69.w3", "model.layers.4.block_sparse_moe.experts.70.w3", "model.layers.4.block_sparse_moe.experts.71.w3", "model.layers.4.block_sparse_moe.experts.72.w3", "model.layers.4.block_sparse_moe.experts.73.w3", "model.layers.4.block_sparse_moe.experts.74.w3", "model.layers.4.block_sparse_moe.experts.75.w3", "model.layers.4.block_sparse_moe.experts.76.w3", "model.layers.4.block_sparse_moe.experts.77.w3", "model.layers.4.block_sparse_moe.experts.78.w3", "model.layers.4.block_sparse_moe.experts.79.w3", "model.layers.4.block_sparse_moe.experts.80.w3", "model.layers.4.block_sparse_moe.experts.81.w3", "model.layers.4.block_sparse_moe.experts.82.w3", "model.layers.4.block_sparse_moe.experts.83.w3", "model.layers.4.block_sparse_moe.experts.84.w3", "model.layers.4.block_sparse_moe.experts.85.w3", "model.layers.4.block_sparse_moe.experts.86.w3", "model.layers.4.block_sparse_moe.experts.87.w3", "model.layers.4.block_sparse_moe.experts.88.w3", "model.layers.4.block_sparse_moe.experts.89.w3", "model.layers.4.block_sparse_moe.experts.90.w3", "model.layers.4.block_sparse_moe.experts.91.w3", "model.layers.4.block_sparse_moe.experts.92.w3", "model.layers.4.block_sparse_moe.experts.93.w3", "model.layers.4.block_sparse_moe.experts.94.w3", "model.layers.4.block_sparse_moe.experts.95.w3", "model.layers.4.block_sparse_moe.experts.96.w3", "model.layers.4.block_sparse_moe.experts.97.w3", "model.layers.4.block_sparse_moe.experts.98.w3", "model.layers.4.block_sparse_moe.experts.99.w3", "model.layers.4.block_sparse_moe.experts.100.w3", "model.layers.4.block_sparse_moe.experts.101.w3", "model.layers.4.block_sparse_moe.experts.102.w3", "model.layers.4.block_sparse_moe.experts.103.w3", "model.layers.4.block_sparse_moe.experts.104.w3", "model.layers.4.block_sparse_moe.experts.105.w3", "model.layers.4.block_sparse_moe.experts.106.w3", "model.layers.4.block_sparse_moe.experts.107.w3", "model.layers.4.block_sparse_moe.experts.108.w3", "model.layers.4.block_sparse_moe.experts.109.w3", "model.layers.4.block_sparse_moe.experts.110.w3", "model.layers.4.block_sparse_moe.experts.111.w3", "model.layers.4.block_sparse_moe.experts.112.w3", "model.layers.4.block_sparse_moe.experts.113.w3", "model.layers.4.block_sparse_moe.experts.114.w3", "model.layers.4.block_sparse_moe.experts.115.w3", "model.layers.4.block_sparse_moe.experts.116.w3", "model.layers.4.block_sparse_moe.experts.117.w3", "model.layers.4.block_sparse_moe.experts.118.w3", "model.layers.4.block_sparse_moe.experts.119.w3", "model.layers.4.block_sparse_moe.experts.120.w3", "model.layers.4.block_sparse_moe.experts.121.w3", "model.layers.4.block_sparse_moe.experts.122.w3", "model.layers.4.block_sparse_moe.experts.123.w3", "model.layers.4.block_sparse_moe.experts.124.w3", "model.layers.4.block_sparse_moe.experts.125.w3", "model.layers.4.block_sparse_moe.experts.126.w3", "model.layers.4.block_sparse_moe.experts.127.w3", "model.layers.4.block_sparse_moe.experts.128.w3", "model.layers.4.block_sparse_moe.experts.129.w3", "model.layers.4.block_sparse_moe.experts.130.w3", "model.layers.4.block_sparse_moe.experts.131.w3", "model.layers.4.block_sparse_moe.experts.132.w3", "model.layers.4.block_sparse_moe.experts.133.w3", "model.layers.4.block_sparse_moe.experts.134.w3", "model.layers.4.block_sparse_moe.experts.135.w3", "model.layers.4.block_sparse_moe.experts.136.w3", "model.layers.4.block_sparse_moe.experts.137.w3", "model.layers.4.block_sparse_moe.experts.138.w3", "model.layers.4.block_sparse_moe.experts.139.w3", "model.layers.4.block_sparse_moe.experts.140.w3", "model.layers.4.block_sparse_moe.experts.141.w3", "model.layers.4.block_sparse_moe.experts.142.w3", "model.layers.4.block_sparse_moe.experts.143.w3", "model.layers.4.block_sparse_moe.experts.144.w3", "model.layers.4.block_sparse_moe.experts.145.w3", "model.layers.4.block_sparse_moe.experts.146.w3", "model.layers.4.block_sparse_moe.experts.147.w3", "model.layers.4.block_sparse_moe.experts.148.w3", "model.layers.4.block_sparse_moe.experts.149.w3", "model.layers.4.block_sparse_moe.experts.150.w3", "model.layers.4.block_sparse_moe.experts.151.w3", "model.layers.4.block_sparse_moe.experts.152.w3", "model.layers.4.block_sparse_moe.experts.153.w3", "model.layers.4.block_sparse_moe.experts.154.w3", "model.layers.4.block_sparse_moe.experts.155.w3", "model.layers.4.block_sparse_moe.experts.156.w3", "model.layers.4.block_sparse_moe.experts.157.w3", "model.layers.4.block_sparse_moe.experts.158.w3", "model.layers.4.block_sparse_moe.experts.159.w3", "model.layers.4.block_sparse_moe.experts.160.w3", "model.layers.4.block_sparse_moe.experts.161.w3", "model.layers.4.block_sparse_moe.experts.162.w3", "model.layers.4.block_sparse_moe.experts.163.w3", "model.layers.4.block_sparse_moe.experts.164.w3", "model.layers.4.block_sparse_moe.experts.165.w3", "model.layers.4.block_sparse_moe.experts.166.w3", "model.layers.4.block_sparse_moe.experts.167.w3", "model.layers.4.block_sparse_moe.experts.168.w3", "model.layers.4.block_sparse_moe.experts.169.w3", "model.layers.4.block_sparse_moe.experts.170.w3", "model.layers.4.block_sparse_moe.experts.171.w3", "model.layers.4.block_sparse_moe.experts.172.w3", "model.layers.4.block_sparse_moe.experts.173.w3", "model.layers.4.block_sparse_moe.experts.174.w3", "model.layers.4.block_sparse_moe.experts.175.w3", "model.layers.4.block_sparse_moe.experts.176.w3", "model.layers.4.block_sparse_moe.experts.177.w3", "model.layers.4.block_sparse_moe.experts.178.w3", "model.layers.4.block_sparse_moe.experts.179.w3", "model.layers.4.block_sparse_moe.experts.180.w3", "model.layers.4.block_sparse_moe.experts.181.w3", "model.layers.4.block_sparse_moe.experts.182.w3", "model.layers.4.block_sparse_moe.experts.183.w3", "model.layers.4.block_sparse_moe.experts.184.w3", "model.layers.4.block_sparse_moe.experts.185.w3", "model.layers.4.block_sparse_moe.experts.186.w3", "model.layers.4.block_sparse_moe.experts.187.w3", "model.layers.4.block_sparse_moe.experts.188.w3", "model.layers.4.block_sparse_moe.experts.189.w3", "model.layers.4.block_sparse_moe.experts.190.w3", "model.layers.4.block_sparse_moe.experts.191.w3", "model.layers.4.block_sparse_moe.experts.192.w3", "model.layers.4.block_sparse_moe.experts.193.w3", "model.layers.4.block_sparse_moe.experts.194.w3", "model.layers.4.block_sparse_moe.experts.195.w3", "model.layers.4.block_sparse_moe.experts.196.w3", "model.layers.4.block_sparse_moe.experts.197.w3", "model.layers.4.block_sparse_moe.experts.198.w3", "model.layers.4.block_sparse_moe.experts.199.w3", "model.layers.4.block_sparse_moe.experts.200.w3", "model.layers.4.block_sparse_moe.experts.201.w3", "model.layers.4.block_sparse_moe.experts.202.w3", "model.layers.4.block_sparse_moe.experts.203.w3", "model.layers.4.block_sparse_moe.experts.204.w3", "model.layers.4.block_sparse_moe.experts.205.w3", "model.layers.4.block_sparse_moe.experts.206.w3", "model.layers.4.block_sparse_moe.experts.207.w3", "model.layers.4.block_sparse_moe.experts.208.w3", "model.layers.4.block_sparse_moe.experts.209.w3", "model.layers.4.block_sparse_moe.experts.210.w3", "model.layers.4.block_sparse_moe.experts.211.w3", "model.layers.4.block_sparse_moe.experts.212.w3", "model.layers.4.block_sparse_moe.experts.213.w3", "model.layers.4.block_sparse_moe.experts.214.w3", "model.layers.4.block_sparse_moe.experts.215.w3", "model.layers.4.block_sparse_moe.experts.216.w3", "model.layers.4.block_sparse_moe.experts.217.w3", "model.layers.4.block_sparse_moe.experts.218.w3", "model.layers.4.block_sparse_moe.experts.219.w3", "model.layers.4.block_sparse_moe.experts.220.w3", "model.layers.4.block_sparse_moe.experts.221.w3", "model.layers.4.block_sparse_moe.experts.222.w3", "model.layers.4.block_sparse_moe.experts.223.w3", "model.layers.4.block_sparse_moe.experts.224.w3", "model.layers.4.block_sparse_moe.experts.225.w3", "model.layers.4.block_sparse_moe.experts.226.w3", "model.layers.4.block_sparse_moe.experts.227.w3", "model.layers.4.block_sparse_moe.experts.228.w3", "model.layers.4.block_sparse_moe.experts.229.w3", "model.layers.4.block_sparse_moe.experts.230.w3", "model.layers.4.block_sparse_moe.experts.231.w3", "model.layers.4.block_sparse_moe.experts.232.w3", "model.layers.4.block_sparse_moe.experts.233.w3", "model.layers.4.block_sparse_moe.experts.234.w3", "model.layers.4.block_sparse_moe.experts.235.w3", "model.layers.4.block_sparse_moe.experts.236.w3", "model.layers.4.block_sparse_moe.experts.237.w3", "model.layers.4.block_sparse_moe.experts.238.w3", "model.layers.4.block_sparse_moe.experts.239.w3", "model.layers.4.block_sparse_moe.experts.240.w3", "model.layers.4.block_sparse_moe.experts.241.w3", "model.layers.4.block_sparse_moe.experts.242.w3", "model.layers.4.block_sparse_moe.experts.243.w3", "model.layers.4.block_sparse_moe.experts.244.w3", "model.layers.4.block_sparse_moe.experts.245.w3", "model.layers.4.block_sparse_moe.experts.246.w3", "model.layers.4.block_sparse_moe.experts.247.w3", "model.layers.4.block_sparse_moe.experts.248.w3", "model.layers.4.block_sparse_moe.experts.249.w3", "model.layers.4.block_sparse_moe.experts.250.w3", "model.layers.4.block_sparse_moe.experts.251.w3", "model.layers.4.block_sparse_moe.experts.252.w3", "model.layers.4.block_sparse_moe.experts.253.w3", "model.layers.4.block_sparse_moe.experts.254.w3", "model.layers.4.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.010060405358672142, "dbits": 2415919104 } ] }, { "idx": 24, "layers": [ "model.layers.4.block_sparse_moe.experts.0.w2", "model.layers.4.block_sparse_moe.experts.1.w2", "model.layers.4.block_sparse_moe.experts.2.w2", "model.layers.4.block_sparse_moe.experts.3.w2", "model.layers.4.block_sparse_moe.experts.4.w2", "model.layers.4.block_sparse_moe.experts.5.w2", "model.layers.4.block_sparse_moe.experts.6.w2", "model.layers.4.block_sparse_moe.experts.7.w2", "model.layers.4.block_sparse_moe.experts.8.w2", "model.layers.4.block_sparse_moe.experts.9.w2", "model.layers.4.block_sparse_moe.experts.10.w2", "model.layers.4.block_sparse_moe.experts.11.w2", "model.layers.4.block_sparse_moe.experts.12.w2", "model.layers.4.block_sparse_moe.experts.13.w2", "model.layers.4.block_sparse_moe.experts.14.w2", "model.layers.4.block_sparse_moe.experts.15.w2", "model.layers.4.block_sparse_moe.experts.16.w2", "model.layers.4.block_sparse_moe.experts.17.w2", "model.layers.4.block_sparse_moe.experts.18.w2", "model.layers.4.block_sparse_moe.experts.19.w2", "model.layers.4.block_sparse_moe.experts.20.w2", "model.layers.4.block_sparse_moe.experts.21.w2", "model.layers.4.block_sparse_moe.experts.22.w2", "model.layers.4.block_sparse_moe.experts.23.w2", "model.layers.4.block_sparse_moe.experts.24.w2", "model.layers.4.block_sparse_moe.experts.25.w2", "model.layers.4.block_sparse_moe.experts.26.w2", "model.layers.4.block_sparse_moe.experts.27.w2", "model.layers.4.block_sparse_moe.experts.28.w2", "model.layers.4.block_sparse_moe.experts.29.w2", "model.layers.4.block_sparse_moe.experts.30.w2", "model.layers.4.block_sparse_moe.experts.31.w2", "model.layers.4.block_sparse_moe.experts.32.w2", "model.layers.4.block_sparse_moe.experts.33.w2", "model.layers.4.block_sparse_moe.experts.34.w2", "model.layers.4.block_sparse_moe.experts.35.w2", "model.layers.4.block_sparse_moe.experts.36.w2", "model.layers.4.block_sparse_moe.experts.37.w2", "model.layers.4.block_sparse_moe.experts.38.w2", "model.layers.4.block_sparse_moe.experts.39.w2", "model.layers.4.block_sparse_moe.experts.40.w2", "model.layers.4.block_sparse_moe.experts.41.w2", "model.layers.4.block_sparse_moe.experts.42.w2", "model.layers.4.block_sparse_moe.experts.43.w2", "model.layers.4.block_sparse_moe.experts.44.w2", "model.layers.4.block_sparse_moe.experts.45.w2", "model.layers.4.block_sparse_moe.experts.46.w2", "model.layers.4.block_sparse_moe.experts.47.w2", "model.layers.4.block_sparse_moe.experts.48.w2", "model.layers.4.block_sparse_moe.experts.49.w2", "model.layers.4.block_sparse_moe.experts.50.w2", "model.layers.4.block_sparse_moe.experts.51.w2", "model.layers.4.block_sparse_moe.experts.52.w2", "model.layers.4.block_sparse_moe.experts.53.w2", "model.layers.4.block_sparse_moe.experts.54.w2", "model.layers.4.block_sparse_moe.experts.55.w2", "model.layers.4.block_sparse_moe.experts.56.w2", "model.layers.4.block_sparse_moe.experts.57.w2", "model.layers.4.block_sparse_moe.experts.58.w2", "model.layers.4.block_sparse_moe.experts.59.w2", "model.layers.4.block_sparse_moe.experts.60.w2", "model.layers.4.block_sparse_moe.experts.61.w2", "model.layers.4.block_sparse_moe.experts.62.w2", "model.layers.4.block_sparse_moe.experts.63.w2", "model.layers.4.block_sparse_moe.experts.64.w2", "model.layers.4.block_sparse_moe.experts.65.w2", "model.layers.4.block_sparse_moe.experts.66.w2", "model.layers.4.block_sparse_moe.experts.67.w2", "model.layers.4.block_sparse_moe.experts.68.w2", "model.layers.4.block_sparse_moe.experts.69.w2", "model.layers.4.block_sparse_moe.experts.70.w2", "model.layers.4.block_sparse_moe.experts.71.w2", "model.layers.4.block_sparse_moe.experts.72.w2", "model.layers.4.block_sparse_moe.experts.73.w2", "model.layers.4.block_sparse_moe.experts.74.w2", "model.layers.4.block_sparse_moe.experts.75.w2", "model.layers.4.block_sparse_moe.experts.76.w2", "model.layers.4.block_sparse_moe.experts.77.w2", "model.layers.4.block_sparse_moe.experts.78.w2", "model.layers.4.block_sparse_moe.experts.79.w2", "model.layers.4.block_sparse_moe.experts.80.w2", "model.layers.4.block_sparse_moe.experts.81.w2", "model.layers.4.block_sparse_moe.experts.82.w2", "model.layers.4.block_sparse_moe.experts.83.w2", "model.layers.4.block_sparse_moe.experts.84.w2", "model.layers.4.block_sparse_moe.experts.85.w2", "model.layers.4.block_sparse_moe.experts.86.w2", "model.layers.4.block_sparse_moe.experts.87.w2", "model.layers.4.block_sparse_moe.experts.88.w2", "model.layers.4.block_sparse_moe.experts.89.w2", "model.layers.4.block_sparse_moe.experts.90.w2", "model.layers.4.block_sparse_moe.experts.91.w2", "model.layers.4.block_sparse_moe.experts.92.w2", "model.layers.4.block_sparse_moe.experts.93.w2", "model.layers.4.block_sparse_moe.experts.94.w2", "model.layers.4.block_sparse_moe.experts.95.w2", "model.layers.4.block_sparse_moe.experts.96.w2", "model.layers.4.block_sparse_moe.experts.97.w2", "model.layers.4.block_sparse_moe.experts.98.w2", "model.layers.4.block_sparse_moe.experts.99.w2", "model.layers.4.block_sparse_moe.experts.100.w2", "model.layers.4.block_sparse_moe.experts.101.w2", "model.layers.4.block_sparse_moe.experts.102.w2", "model.layers.4.block_sparse_moe.experts.103.w2", "model.layers.4.block_sparse_moe.experts.104.w2", "model.layers.4.block_sparse_moe.experts.105.w2", "model.layers.4.block_sparse_moe.experts.106.w2", "model.layers.4.block_sparse_moe.experts.107.w2", "model.layers.4.block_sparse_moe.experts.108.w2", "model.layers.4.block_sparse_moe.experts.109.w2", "model.layers.4.block_sparse_moe.experts.110.w2", "model.layers.4.block_sparse_moe.experts.111.w2", "model.layers.4.block_sparse_moe.experts.112.w2", "model.layers.4.block_sparse_moe.experts.113.w2", "model.layers.4.block_sparse_moe.experts.114.w2", "model.layers.4.block_sparse_moe.experts.115.w2", "model.layers.4.block_sparse_moe.experts.116.w2", "model.layers.4.block_sparse_moe.experts.117.w2", "model.layers.4.block_sparse_moe.experts.118.w2", "model.layers.4.block_sparse_moe.experts.119.w2", "model.layers.4.block_sparse_moe.experts.120.w2", "model.layers.4.block_sparse_moe.experts.121.w2", "model.layers.4.block_sparse_moe.experts.122.w2", "model.layers.4.block_sparse_moe.experts.123.w2", "model.layers.4.block_sparse_moe.experts.124.w2", "model.layers.4.block_sparse_moe.experts.125.w2", "model.layers.4.block_sparse_moe.experts.126.w2", "model.layers.4.block_sparse_moe.experts.127.w2", "model.layers.4.block_sparse_moe.experts.128.w2", "model.layers.4.block_sparse_moe.experts.129.w2", "model.layers.4.block_sparse_moe.experts.130.w2", "model.layers.4.block_sparse_moe.experts.131.w2", "model.layers.4.block_sparse_moe.experts.132.w2", "model.layers.4.block_sparse_moe.experts.133.w2", "model.layers.4.block_sparse_moe.experts.134.w2", "model.layers.4.block_sparse_moe.experts.135.w2", "model.layers.4.block_sparse_moe.experts.136.w2", "model.layers.4.block_sparse_moe.experts.137.w2", "model.layers.4.block_sparse_moe.experts.138.w2", "model.layers.4.block_sparse_moe.experts.139.w2", "model.layers.4.block_sparse_moe.experts.140.w2", "model.layers.4.block_sparse_moe.experts.141.w2", "model.layers.4.block_sparse_moe.experts.142.w2", "model.layers.4.block_sparse_moe.experts.143.w2", "model.layers.4.block_sparse_moe.experts.144.w2", "model.layers.4.block_sparse_moe.experts.145.w2", "model.layers.4.block_sparse_moe.experts.146.w2", "model.layers.4.block_sparse_moe.experts.147.w2", "model.layers.4.block_sparse_moe.experts.148.w2", "model.layers.4.block_sparse_moe.experts.149.w2", "model.layers.4.block_sparse_moe.experts.150.w2", "model.layers.4.block_sparse_moe.experts.151.w2", "model.layers.4.block_sparse_moe.experts.152.w2", "model.layers.4.block_sparse_moe.experts.153.w2", "model.layers.4.block_sparse_moe.experts.154.w2", "model.layers.4.block_sparse_moe.experts.155.w2", "model.layers.4.block_sparse_moe.experts.156.w2", "model.layers.4.block_sparse_moe.experts.157.w2", "model.layers.4.block_sparse_moe.experts.158.w2", "model.layers.4.block_sparse_moe.experts.159.w2", "model.layers.4.block_sparse_moe.experts.160.w2", "model.layers.4.block_sparse_moe.experts.161.w2", "model.layers.4.block_sparse_moe.experts.162.w2", "model.layers.4.block_sparse_moe.experts.163.w2", "model.layers.4.block_sparse_moe.experts.164.w2", "model.layers.4.block_sparse_moe.experts.165.w2", "model.layers.4.block_sparse_moe.experts.166.w2", "model.layers.4.block_sparse_moe.experts.167.w2", "model.layers.4.block_sparse_moe.experts.168.w2", "model.layers.4.block_sparse_moe.experts.169.w2", "model.layers.4.block_sparse_moe.experts.170.w2", "model.layers.4.block_sparse_moe.experts.171.w2", "model.layers.4.block_sparse_moe.experts.172.w2", "model.layers.4.block_sparse_moe.experts.173.w2", "model.layers.4.block_sparse_moe.experts.174.w2", "model.layers.4.block_sparse_moe.experts.175.w2", "model.layers.4.block_sparse_moe.experts.176.w2", "model.layers.4.block_sparse_moe.experts.177.w2", "model.layers.4.block_sparse_moe.experts.178.w2", "model.layers.4.block_sparse_moe.experts.179.w2", "model.layers.4.block_sparse_moe.experts.180.w2", "model.layers.4.block_sparse_moe.experts.181.w2", "model.layers.4.block_sparse_moe.experts.182.w2", "model.layers.4.block_sparse_moe.experts.183.w2", "model.layers.4.block_sparse_moe.experts.184.w2", "model.layers.4.block_sparse_moe.experts.185.w2", "model.layers.4.block_sparse_moe.experts.186.w2", "model.layers.4.block_sparse_moe.experts.187.w2", "model.layers.4.block_sparse_moe.experts.188.w2", "model.layers.4.block_sparse_moe.experts.189.w2", "model.layers.4.block_sparse_moe.experts.190.w2", "model.layers.4.block_sparse_moe.experts.191.w2", "model.layers.4.block_sparse_moe.experts.192.w2", "model.layers.4.block_sparse_moe.experts.193.w2", "model.layers.4.block_sparse_moe.experts.194.w2", "model.layers.4.block_sparse_moe.experts.195.w2", "model.layers.4.block_sparse_moe.experts.196.w2", "model.layers.4.block_sparse_moe.experts.197.w2", "model.layers.4.block_sparse_moe.experts.198.w2", "model.layers.4.block_sparse_moe.experts.199.w2", "model.layers.4.block_sparse_moe.experts.200.w2", "model.layers.4.block_sparse_moe.experts.201.w2", "model.layers.4.block_sparse_moe.experts.202.w2", "model.layers.4.block_sparse_moe.experts.203.w2", "model.layers.4.block_sparse_moe.experts.204.w2", "model.layers.4.block_sparse_moe.experts.205.w2", "model.layers.4.block_sparse_moe.experts.206.w2", "model.layers.4.block_sparse_moe.experts.207.w2", "model.layers.4.block_sparse_moe.experts.208.w2", "model.layers.4.block_sparse_moe.experts.209.w2", "model.layers.4.block_sparse_moe.experts.210.w2", "model.layers.4.block_sparse_moe.experts.211.w2", "model.layers.4.block_sparse_moe.experts.212.w2", "model.layers.4.block_sparse_moe.experts.213.w2", "model.layers.4.block_sparse_moe.experts.214.w2", "model.layers.4.block_sparse_moe.experts.215.w2", "model.layers.4.block_sparse_moe.experts.216.w2", "model.layers.4.block_sparse_moe.experts.217.w2", "model.layers.4.block_sparse_moe.experts.218.w2", "model.layers.4.block_sparse_moe.experts.219.w2", "model.layers.4.block_sparse_moe.experts.220.w2", "model.layers.4.block_sparse_moe.experts.221.w2", "model.layers.4.block_sparse_moe.experts.222.w2", "model.layers.4.block_sparse_moe.experts.223.w2", "model.layers.4.block_sparse_moe.experts.224.w2", "model.layers.4.block_sparse_moe.experts.225.w2", "model.layers.4.block_sparse_moe.experts.226.w2", "model.layers.4.block_sparse_moe.experts.227.w2", "model.layers.4.block_sparse_moe.experts.228.w2", "model.layers.4.block_sparse_moe.experts.229.w2", "model.layers.4.block_sparse_moe.experts.230.w2", "model.layers.4.block_sparse_moe.experts.231.w2", "model.layers.4.block_sparse_moe.experts.232.w2", "model.layers.4.block_sparse_moe.experts.233.w2", "model.layers.4.block_sparse_moe.experts.234.w2", "model.layers.4.block_sparse_moe.experts.235.w2", "model.layers.4.block_sparse_moe.experts.236.w2", "model.layers.4.block_sparse_moe.experts.237.w2", "model.layers.4.block_sparse_moe.experts.238.w2", "model.layers.4.block_sparse_moe.experts.239.w2", "model.layers.4.block_sparse_moe.experts.240.w2", "model.layers.4.block_sparse_moe.experts.241.w2", "model.layers.4.block_sparse_moe.experts.242.w2", "model.layers.4.block_sparse_moe.experts.243.w2", "model.layers.4.block_sparse_moe.experts.244.w2", "model.layers.4.block_sparse_moe.experts.245.w2", "model.layers.4.block_sparse_moe.experts.246.w2", "model.layers.4.block_sparse_moe.experts.247.w2", "model.layers.4.block_sparse_moe.experts.248.w2", "model.layers.4.block_sparse_moe.experts.249.w2", "model.layers.4.block_sparse_moe.experts.250.w2", "model.layers.4.block_sparse_moe.experts.251.w2", "model.layers.4.block_sparse_moe.experts.252.w2", "model.layers.4.block_sparse_moe.experts.253.w2", "model.layers.4.block_sparse_moe.experts.254.w2", "model.layers.4.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.000408484786748875, "dbits": 1207959552 } ] }, { "idx": 25, "layers": [ "model.layers.5.self_attn.q_proj" ], "candidates": [ { "dkld": -5.323886871339001e-05, "dbits": 18874368 } ] }, { "idx": 26, "layers": [ "model.layers.5.self_attn.k_proj", "model.layers.5.self_attn.v_proj" ], "candidates": [ { "dkld": 0.010064665600657441, "dbits": 6291456 } ] }, { "idx": 27, "layers": [ "model.layers.5.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0030010659247636795, "dbits": 18874368 } ] }, { "idx": 28, "layers": [ "model.layers.5.block_sparse_moe.experts.0.w1", "model.layers.5.block_sparse_moe.experts.1.w1", "model.layers.5.block_sparse_moe.experts.2.w1", "model.layers.5.block_sparse_moe.experts.3.w1", "model.layers.5.block_sparse_moe.experts.4.w1", "model.layers.5.block_sparse_moe.experts.5.w1", "model.layers.5.block_sparse_moe.experts.6.w1", "model.layers.5.block_sparse_moe.experts.7.w1", "model.layers.5.block_sparse_moe.experts.8.w1", "model.layers.5.block_sparse_moe.experts.9.w1", "model.layers.5.block_sparse_moe.experts.10.w1", "model.layers.5.block_sparse_moe.experts.11.w1", "model.layers.5.block_sparse_moe.experts.12.w1", "model.layers.5.block_sparse_moe.experts.13.w1", "model.layers.5.block_sparse_moe.experts.14.w1", "model.layers.5.block_sparse_moe.experts.15.w1", "model.layers.5.block_sparse_moe.experts.16.w1", "model.layers.5.block_sparse_moe.experts.17.w1", "model.layers.5.block_sparse_moe.experts.18.w1", "model.layers.5.block_sparse_moe.experts.19.w1", "model.layers.5.block_sparse_moe.experts.20.w1", "model.layers.5.block_sparse_moe.experts.21.w1", "model.layers.5.block_sparse_moe.experts.22.w1", "model.layers.5.block_sparse_moe.experts.23.w1", "model.layers.5.block_sparse_moe.experts.24.w1", "model.layers.5.block_sparse_moe.experts.25.w1", "model.layers.5.block_sparse_moe.experts.26.w1", "model.layers.5.block_sparse_moe.experts.27.w1", "model.layers.5.block_sparse_moe.experts.28.w1", "model.layers.5.block_sparse_moe.experts.29.w1", "model.layers.5.block_sparse_moe.experts.30.w1", "model.layers.5.block_sparse_moe.experts.31.w1", "model.layers.5.block_sparse_moe.experts.32.w1", "model.layers.5.block_sparse_moe.experts.33.w1", "model.layers.5.block_sparse_moe.experts.34.w1", "model.layers.5.block_sparse_moe.experts.35.w1", "model.layers.5.block_sparse_moe.experts.36.w1", "model.layers.5.block_sparse_moe.experts.37.w1", "model.layers.5.block_sparse_moe.experts.38.w1", "model.layers.5.block_sparse_moe.experts.39.w1", "model.layers.5.block_sparse_moe.experts.40.w1", "model.layers.5.block_sparse_moe.experts.41.w1", "model.layers.5.block_sparse_moe.experts.42.w1", "model.layers.5.block_sparse_moe.experts.43.w1", "model.layers.5.block_sparse_moe.experts.44.w1", "model.layers.5.block_sparse_moe.experts.45.w1", "model.layers.5.block_sparse_moe.experts.46.w1", "model.layers.5.block_sparse_moe.experts.47.w1", "model.layers.5.block_sparse_moe.experts.48.w1", "model.layers.5.block_sparse_moe.experts.49.w1", "model.layers.5.block_sparse_moe.experts.50.w1", "model.layers.5.block_sparse_moe.experts.51.w1", "model.layers.5.block_sparse_moe.experts.52.w1", "model.layers.5.block_sparse_moe.experts.53.w1", "model.layers.5.block_sparse_moe.experts.54.w1", "model.layers.5.block_sparse_moe.experts.55.w1", "model.layers.5.block_sparse_moe.experts.56.w1", "model.layers.5.block_sparse_moe.experts.57.w1", "model.layers.5.block_sparse_moe.experts.58.w1", "model.layers.5.block_sparse_moe.experts.59.w1", "model.layers.5.block_sparse_moe.experts.60.w1", "model.layers.5.block_sparse_moe.experts.61.w1", "model.layers.5.block_sparse_moe.experts.62.w1", "model.layers.5.block_sparse_moe.experts.63.w1", "model.layers.5.block_sparse_moe.experts.64.w1", "model.layers.5.block_sparse_moe.experts.65.w1", "model.layers.5.block_sparse_moe.experts.66.w1", "model.layers.5.block_sparse_moe.experts.67.w1", "model.layers.5.block_sparse_moe.experts.68.w1", "model.layers.5.block_sparse_moe.experts.69.w1", "model.layers.5.block_sparse_moe.experts.70.w1", "model.layers.5.block_sparse_moe.experts.71.w1", "model.layers.5.block_sparse_moe.experts.72.w1", "model.layers.5.block_sparse_moe.experts.73.w1", "model.layers.5.block_sparse_moe.experts.74.w1", "model.layers.5.block_sparse_moe.experts.75.w1", "model.layers.5.block_sparse_moe.experts.76.w1", "model.layers.5.block_sparse_moe.experts.77.w1", "model.layers.5.block_sparse_moe.experts.78.w1", "model.layers.5.block_sparse_moe.experts.79.w1", "model.layers.5.block_sparse_moe.experts.80.w1", "model.layers.5.block_sparse_moe.experts.81.w1", "model.layers.5.block_sparse_moe.experts.82.w1", "model.layers.5.block_sparse_moe.experts.83.w1", "model.layers.5.block_sparse_moe.experts.84.w1", "model.layers.5.block_sparse_moe.experts.85.w1", "model.layers.5.block_sparse_moe.experts.86.w1", "model.layers.5.block_sparse_moe.experts.87.w1", "model.layers.5.block_sparse_moe.experts.88.w1", "model.layers.5.block_sparse_moe.experts.89.w1", "model.layers.5.block_sparse_moe.experts.90.w1", "model.layers.5.block_sparse_moe.experts.91.w1", "model.layers.5.block_sparse_moe.experts.92.w1", "model.layers.5.block_sparse_moe.experts.93.w1", "model.layers.5.block_sparse_moe.experts.94.w1", "model.layers.5.block_sparse_moe.experts.95.w1", "model.layers.5.block_sparse_moe.experts.96.w1", "model.layers.5.block_sparse_moe.experts.97.w1", "model.layers.5.block_sparse_moe.experts.98.w1", "model.layers.5.block_sparse_moe.experts.99.w1", "model.layers.5.block_sparse_moe.experts.100.w1", "model.layers.5.block_sparse_moe.experts.101.w1", "model.layers.5.block_sparse_moe.experts.102.w1", "model.layers.5.block_sparse_moe.experts.103.w1", "model.layers.5.block_sparse_moe.experts.104.w1", "model.layers.5.block_sparse_moe.experts.105.w1", "model.layers.5.block_sparse_moe.experts.106.w1", "model.layers.5.block_sparse_moe.experts.107.w1", "model.layers.5.block_sparse_moe.experts.108.w1", "model.layers.5.block_sparse_moe.experts.109.w1", "model.layers.5.block_sparse_moe.experts.110.w1", "model.layers.5.block_sparse_moe.experts.111.w1", "model.layers.5.block_sparse_moe.experts.112.w1", "model.layers.5.block_sparse_moe.experts.113.w1", "model.layers.5.block_sparse_moe.experts.114.w1", "model.layers.5.block_sparse_moe.experts.115.w1", "model.layers.5.block_sparse_moe.experts.116.w1", "model.layers.5.block_sparse_moe.experts.117.w1", "model.layers.5.block_sparse_moe.experts.118.w1", "model.layers.5.block_sparse_moe.experts.119.w1", "model.layers.5.block_sparse_moe.experts.120.w1", "model.layers.5.block_sparse_moe.experts.121.w1", "model.layers.5.block_sparse_moe.experts.122.w1", "model.layers.5.block_sparse_moe.experts.123.w1", "model.layers.5.block_sparse_moe.experts.124.w1", "model.layers.5.block_sparse_moe.experts.125.w1", "model.layers.5.block_sparse_moe.experts.126.w1", "model.layers.5.block_sparse_moe.experts.127.w1", "model.layers.5.block_sparse_moe.experts.128.w1", "model.layers.5.block_sparse_moe.experts.129.w1", "model.layers.5.block_sparse_moe.experts.130.w1", "model.layers.5.block_sparse_moe.experts.131.w1", "model.layers.5.block_sparse_moe.experts.132.w1", "model.layers.5.block_sparse_moe.experts.133.w1", "model.layers.5.block_sparse_moe.experts.134.w1", "model.layers.5.block_sparse_moe.experts.135.w1", "model.layers.5.block_sparse_moe.experts.136.w1", "model.layers.5.block_sparse_moe.experts.137.w1", "model.layers.5.block_sparse_moe.experts.138.w1", "model.layers.5.block_sparse_moe.experts.139.w1", "model.layers.5.block_sparse_moe.experts.140.w1", "model.layers.5.block_sparse_moe.experts.141.w1", "model.layers.5.block_sparse_moe.experts.142.w1", "model.layers.5.block_sparse_moe.experts.143.w1", "model.layers.5.block_sparse_moe.experts.144.w1", "model.layers.5.block_sparse_moe.experts.145.w1", "model.layers.5.block_sparse_moe.experts.146.w1", "model.layers.5.block_sparse_moe.experts.147.w1", "model.layers.5.block_sparse_moe.experts.148.w1", "model.layers.5.block_sparse_moe.experts.149.w1", "model.layers.5.block_sparse_moe.experts.150.w1", "model.layers.5.block_sparse_moe.experts.151.w1", "model.layers.5.block_sparse_moe.experts.152.w1", "model.layers.5.block_sparse_moe.experts.153.w1", "model.layers.5.block_sparse_moe.experts.154.w1", "model.layers.5.block_sparse_moe.experts.155.w1", "model.layers.5.block_sparse_moe.experts.156.w1", "model.layers.5.block_sparse_moe.experts.157.w1", "model.layers.5.block_sparse_moe.experts.158.w1", "model.layers.5.block_sparse_moe.experts.159.w1", "model.layers.5.block_sparse_moe.experts.160.w1", "model.layers.5.block_sparse_moe.experts.161.w1", "model.layers.5.block_sparse_moe.experts.162.w1", "model.layers.5.block_sparse_moe.experts.163.w1", "model.layers.5.block_sparse_moe.experts.164.w1", "model.layers.5.block_sparse_moe.experts.165.w1", "model.layers.5.block_sparse_moe.experts.166.w1", "model.layers.5.block_sparse_moe.experts.167.w1", "model.layers.5.block_sparse_moe.experts.168.w1", "model.layers.5.block_sparse_moe.experts.169.w1", "model.layers.5.block_sparse_moe.experts.170.w1", "model.layers.5.block_sparse_moe.experts.171.w1", "model.layers.5.block_sparse_moe.experts.172.w1", "model.layers.5.block_sparse_moe.experts.173.w1", "model.layers.5.block_sparse_moe.experts.174.w1", "model.layers.5.block_sparse_moe.experts.175.w1", "model.layers.5.block_sparse_moe.experts.176.w1", "model.layers.5.block_sparse_moe.experts.177.w1", "model.layers.5.block_sparse_moe.experts.178.w1", "model.layers.5.block_sparse_moe.experts.179.w1", "model.layers.5.block_sparse_moe.experts.180.w1", "model.layers.5.block_sparse_moe.experts.181.w1", "model.layers.5.block_sparse_moe.experts.182.w1", "model.layers.5.block_sparse_moe.experts.183.w1", "model.layers.5.block_sparse_moe.experts.184.w1", "model.layers.5.block_sparse_moe.experts.185.w1", "model.layers.5.block_sparse_moe.experts.186.w1", "model.layers.5.block_sparse_moe.experts.187.w1", "model.layers.5.block_sparse_moe.experts.188.w1", "model.layers.5.block_sparse_moe.experts.189.w1", "model.layers.5.block_sparse_moe.experts.190.w1", "model.layers.5.block_sparse_moe.experts.191.w1", "model.layers.5.block_sparse_moe.experts.192.w1", "model.layers.5.block_sparse_moe.experts.193.w1", "model.layers.5.block_sparse_moe.experts.194.w1", "model.layers.5.block_sparse_moe.experts.195.w1", "model.layers.5.block_sparse_moe.experts.196.w1", "model.layers.5.block_sparse_moe.experts.197.w1", "model.layers.5.block_sparse_moe.experts.198.w1", "model.layers.5.block_sparse_moe.experts.199.w1", "model.layers.5.block_sparse_moe.experts.200.w1", "model.layers.5.block_sparse_moe.experts.201.w1", "model.layers.5.block_sparse_moe.experts.202.w1", "model.layers.5.block_sparse_moe.experts.203.w1", "model.layers.5.block_sparse_moe.experts.204.w1", "model.layers.5.block_sparse_moe.experts.205.w1", "model.layers.5.block_sparse_moe.experts.206.w1", "model.layers.5.block_sparse_moe.experts.207.w1", "model.layers.5.block_sparse_moe.experts.208.w1", "model.layers.5.block_sparse_moe.experts.209.w1", "model.layers.5.block_sparse_moe.experts.210.w1", "model.layers.5.block_sparse_moe.experts.211.w1", "model.layers.5.block_sparse_moe.experts.212.w1", "model.layers.5.block_sparse_moe.experts.213.w1", "model.layers.5.block_sparse_moe.experts.214.w1", "model.layers.5.block_sparse_moe.experts.215.w1", "model.layers.5.block_sparse_moe.experts.216.w1", "model.layers.5.block_sparse_moe.experts.217.w1", "model.layers.5.block_sparse_moe.experts.218.w1", "model.layers.5.block_sparse_moe.experts.219.w1", "model.layers.5.block_sparse_moe.experts.220.w1", "model.layers.5.block_sparse_moe.experts.221.w1", "model.layers.5.block_sparse_moe.experts.222.w1", "model.layers.5.block_sparse_moe.experts.223.w1", "model.layers.5.block_sparse_moe.experts.224.w1", "model.layers.5.block_sparse_moe.experts.225.w1", "model.layers.5.block_sparse_moe.experts.226.w1", "model.layers.5.block_sparse_moe.experts.227.w1", "model.layers.5.block_sparse_moe.experts.228.w1", "model.layers.5.block_sparse_moe.experts.229.w1", "model.layers.5.block_sparse_moe.experts.230.w1", "model.layers.5.block_sparse_moe.experts.231.w1", "model.layers.5.block_sparse_moe.experts.232.w1", "model.layers.5.block_sparse_moe.experts.233.w1", "model.layers.5.block_sparse_moe.experts.234.w1", "model.layers.5.block_sparse_moe.experts.235.w1", "model.layers.5.block_sparse_moe.experts.236.w1", "model.layers.5.block_sparse_moe.experts.237.w1", "model.layers.5.block_sparse_moe.experts.238.w1", "model.layers.5.block_sparse_moe.experts.239.w1", "model.layers.5.block_sparse_moe.experts.240.w1", "model.layers.5.block_sparse_moe.experts.241.w1", "model.layers.5.block_sparse_moe.experts.242.w1", "model.layers.5.block_sparse_moe.experts.243.w1", "model.layers.5.block_sparse_moe.experts.244.w1", "model.layers.5.block_sparse_moe.experts.245.w1", "model.layers.5.block_sparse_moe.experts.246.w1", "model.layers.5.block_sparse_moe.experts.247.w1", "model.layers.5.block_sparse_moe.experts.248.w1", "model.layers.5.block_sparse_moe.experts.249.w1", "model.layers.5.block_sparse_moe.experts.250.w1", "model.layers.5.block_sparse_moe.experts.251.w1", "model.layers.5.block_sparse_moe.experts.252.w1", "model.layers.5.block_sparse_moe.experts.253.w1", "model.layers.5.block_sparse_moe.experts.254.w1", "model.layers.5.block_sparse_moe.experts.255.w1", "model.layers.5.block_sparse_moe.experts.0.w3", "model.layers.5.block_sparse_moe.experts.1.w3", "model.layers.5.block_sparse_moe.experts.2.w3", "model.layers.5.block_sparse_moe.experts.3.w3", "model.layers.5.block_sparse_moe.experts.4.w3", "model.layers.5.block_sparse_moe.experts.5.w3", "model.layers.5.block_sparse_moe.experts.6.w3", "model.layers.5.block_sparse_moe.experts.7.w3", "model.layers.5.block_sparse_moe.experts.8.w3", "model.layers.5.block_sparse_moe.experts.9.w3", "model.layers.5.block_sparse_moe.experts.10.w3", "model.layers.5.block_sparse_moe.experts.11.w3", "model.layers.5.block_sparse_moe.experts.12.w3", "model.layers.5.block_sparse_moe.experts.13.w3", "model.layers.5.block_sparse_moe.experts.14.w3", "model.layers.5.block_sparse_moe.experts.15.w3", "model.layers.5.block_sparse_moe.experts.16.w3", "model.layers.5.block_sparse_moe.experts.17.w3", "model.layers.5.block_sparse_moe.experts.18.w3", "model.layers.5.block_sparse_moe.experts.19.w3", "model.layers.5.block_sparse_moe.experts.20.w3", "model.layers.5.block_sparse_moe.experts.21.w3", "model.layers.5.block_sparse_moe.experts.22.w3", "model.layers.5.block_sparse_moe.experts.23.w3", "model.layers.5.block_sparse_moe.experts.24.w3", "model.layers.5.block_sparse_moe.experts.25.w3", "model.layers.5.block_sparse_moe.experts.26.w3", "model.layers.5.block_sparse_moe.experts.27.w3", "model.layers.5.block_sparse_moe.experts.28.w3", "model.layers.5.block_sparse_moe.experts.29.w3", "model.layers.5.block_sparse_moe.experts.30.w3", "model.layers.5.block_sparse_moe.experts.31.w3", "model.layers.5.block_sparse_moe.experts.32.w3", "model.layers.5.block_sparse_moe.experts.33.w3", "model.layers.5.block_sparse_moe.experts.34.w3", "model.layers.5.block_sparse_moe.experts.35.w3", "model.layers.5.block_sparse_moe.experts.36.w3", "model.layers.5.block_sparse_moe.experts.37.w3", "model.layers.5.block_sparse_moe.experts.38.w3", "model.layers.5.block_sparse_moe.experts.39.w3", "model.layers.5.block_sparse_moe.experts.40.w3", "model.layers.5.block_sparse_moe.experts.41.w3", "model.layers.5.block_sparse_moe.experts.42.w3", "model.layers.5.block_sparse_moe.experts.43.w3", "model.layers.5.block_sparse_moe.experts.44.w3", "model.layers.5.block_sparse_moe.experts.45.w3", "model.layers.5.block_sparse_moe.experts.46.w3", "model.layers.5.block_sparse_moe.experts.47.w3", "model.layers.5.block_sparse_moe.experts.48.w3", "model.layers.5.block_sparse_moe.experts.49.w3", "model.layers.5.block_sparse_moe.experts.50.w3", "model.layers.5.block_sparse_moe.experts.51.w3", "model.layers.5.block_sparse_moe.experts.52.w3", "model.layers.5.block_sparse_moe.experts.53.w3", "model.layers.5.block_sparse_moe.experts.54.w3", "model.layers.5.block_sparse_moe.experts.55.w3", "model.layers.5.block_sparse_moe.experts.56.w3", "model.layers.5.block_sparse_moe.experts.57.w3", "model.layers.5.block_sparse_moe.experts.58.w3", "model.layers.5.block_sparse_moe.experts.59.w3", "model.layers.5.block_sparse_moe.experts.60.w3", "model.layers.5.block_sparse_moe.experts.61.w3", "model.layers.5.block_sparse_moe.experts.62.w3", "model.layers.5.block_sparse_moe.experts.63.w3", "model.layers.5.block_sparse_moe.experts.64.w3", "model.layers.5.block_sparse_moe.experts.65.w3", "model.layers.5.block_sparse_moe.experts.66.w3", "model.layers.5.block_sparse_moe.experts.67.w3", "model.layers.5.block_sparse_moe.experts.68.w3", "model.layers.5.block_sparse_moe.experts.69.w3", "model.layers.5.block_sparse_moe.experts.70.w3", "model.layers.5.block_sparse_moe.experts.71.w3", "model.layers.5.block_sparse_moe.experts.72.w3", "model.layers.5.block_sparse_moe.experts.73.w3", "model.layers.5.block_sparse_moe.experts.74.w3", "model.layers.5.block_sparse_moe.experts.75.w3", "model.layers.5.block_sparse_moe.experts.76.w3", "model.layers.5.block_sparse_moe.experts.77.w3", "model.layers.5.block_sparse_moe.experts.78.w3", "model.layers.5.block_sparse_moe.experts.79.w3", "model.layers.5.block_sparse_moe.experts.80.w3", "model.layers.5.block_sparse_moe.experts.81.w3", "model.layers.5.block_sparse_moe.experts.82.w3", "model.layers.5.block_sparse_moe.experts.83.w3", "model.layers.5.block_sparse_moe.experts.84.w3", "model.layers.5.block_sparse_moe.experts.85.w3", "model.layers.5.block_sparse_moe.experts.86.w3", "model.layers.5.block_sparse_moe.experts.87.w3", "model.layers.5.block_sparse_moe.experts.88.w3", "model.layers.5.block_sparse_moe.experts.89.w3", "model.layers.5.block_sparse_moe.experts.90.w3", "model.layers.5.block_sparse_moe.experts.91.w3", "model.layers.5.block_sparse_moe.experts.92.w3", "model.layers.5.block_sparse_moe.experts.93.w3", "model.layers.5.block_sparse_moe.experts.94.w3", "model.layers.5.block_sparse_moe.experts.95.w3", "model.layers.5.block_sparse_moe.experts.96.w3", "model.layers.5.block_sparse_moe.experts.97.w3", "model.layers.5.block_sparse_moe.experts.98.w3", "model.layers.5.block_sparse_moe.experts.99.w3", "model.layers.5.block_sparse_moe.experts.100.w3", "model.layers.5.block_sparse_moe.experts.101.w3", "model.layers.5.block_sparse_moe.experts.102.w3", "model.layers.5.block_sparse_moe.experts.103.w3", "model.layers.5.block_sparse_moe.experts.104.w3", "model.layers.5.block_sparse_moe.experts.105.w3", "model.layers.5.block_sparse_moe.experts.106.w3", "model.layers.5.block_sparse_moe.experts.107.w3", "model.layers.5.block_sparse_moe.experts.108.w3", "model.layers.5.block_sparse_moe.experts.109.w3", "model.layers.5.block_sparse_moe.experts.110.w3", "model.layers.5.block_sparse_moe.experts.111.w3", "model.layers.5.block_sparse_moe.experts.112.w3", "model.layers.5.block_sparse_moe.experts.113.w3", "model.layers.5.block_sparse_moe.experts.114.w3", "model.layers.5.block_sparse_moe.experts.115.w3", "model.layers.5.block_sparse_moe.experts.116.w3", "model.layers.5.block_sparse_moe.experts.117.w3", "model.layers.5.block_sparse_moe.experts.118.w3", "model.layers.5.block_sparse_moe.experts.119.w3", "model.layers.5.block_sparse_moe.experts.120.w3", "model.layers.5.block_sparse_moe.experts.121.w3", "model.layers.5.block_sparse_moe.experts.122.w3", "model.layers.5.block_sparse_moe.experts.123.w3", "model.layers.5.block_sparse_moe.experts.124.w3", "model.layers.5.block_sparse_moe.experts.125.w3", "model.layers.5.block_sparse_moe.experts.126.w3", "model.layers.5.block_sparse_moe.experts.127.w3", "model.layers.5.block_sparse_moe.experts.128.w3", "model.layers.5.block_sparse_moe.experts.129.w3", "model.layers.5.block_sparse_moe.experts.130.w3", "model.layers.5.block_sparse_moe.experts.131.w3", "model.layers.5.block_sparse_moe.experts.132.w3", "model.layers.5.block_sparse_moe.experts.133.w3", "model.layers.5.block_sparse_moe.experts.134.w3", "model.layers.5.block_sparse_moe.experts.135.w3", "model.layers.5.block_sparse_moe.experts.136.w3", "model.layers.5.block_sparse_moe.experts.137.w3", "model.layers.5.block_sparse_moe.experts.138.w3", "model.layers.5.block_sparse_moe.experts.139.w3", "model.layers.5.block_sparse_moe.experts.140.w3", "model.layers.5.block_sparse_moe.experts.141.w3", "model.layers.5.block_sparse_moe.experts.142.w3", "model.layers.5.block_sparse_moe.experts.143.w3", "model.layers.5.block_sparse_moe.experts.144.w3", "model.layers.5.block_sparse_moe.experts.145.w3", "model.layers.5.block_sparse_moe.experts.146.w3", "model.layers.5.block_sparse_moe.experts.147.w3", "model.layers.5.block_sparse_moe.experts.148.w3", "model.layers.5.block_sparse_moe.experts.149.w3", "model.layers.5.block_sparse_moe.experts.150.w3", "model.layers.5.block_sparse_moe.experts.151.w3", "model.layers.5.block_sparse_moe.experts.152.w3", "model.layers.5.block_sparse_moe.experts.153.w3", "model.layers.5.block_sparse_moe.experts.154.w3", "model.layers.5.block_sparse_moe.experts.155.w3", "model.layers.5.block_sparse_moe.experts.156.w3", "model.layers.5.block_sparse_moe.experts.157.w3", "model.layers.5.block_sparse_moe.experts.158.w3", "model.layers.5.block_sparse_moe.experts.159.w3", "model.layers.5.block_sparse_moe.experts.160.w3", "model.layers.5.block_sparse_moe.experts.161.w3", "model.layers.5.block_sparse_moe.experts.162.w3", "model.layers.5.block_sparse_moe.experts.163.w3", "model.layers.5.block_sparse_moe.experts.164.w3", "model.layers.5.block_sparse_moe.experts.165.w3", "model.layers.5.block_sparse_moe.experts.166.w3", "model.layers.5.block_sparse_moe.experts.167.w3", "model.layers.5.block_sparse_moe.experts.168.w3", "model.layers.5.block_sparse_moe.experts.169.w3", "model.layers.5.block_sparse_moe.experts.170.w3", "model.layers.5.block_sparse_moe.experts.171.w3", "model.layers.5.block_sparse_moe.experts.172.w3", "model.layers.5.block_sparse_moe.experts.173.w3", "model.layers.5.block_sparse_moe.experts.174.w3", "model.layers.5.block_sparse_moe.experts.175.w3", "model.layers.5.block_sparse_moe.experts.176.w3", "model.layers.5.block_sparse_moe.experts.177.w3", "model.layers.5.block_sparse_moe.experts.178.w3", "model.layers.5.block_sparse_moe.experts.179.w3", "model.layers.5.block_sparse_moe.experts.180.w3", "model.layers.5.block_sparse_moe.experts.181.w3", "model.layers.5.block_sparse_moe.experts.182.w3", "model.layers.5.block_sparse_moe.experts.183.w3", "model.layers.5.block_sparse_moe.experts.184.w3", "model.layers.5.block_sparse_moe.experts.185.w3", "model.layers.5.block_sparse_moe.experts.186.w3", "model.layers.5.block_sparse_moe.experts.187.w3", "model.layers.5.block_sparse_moe.experts.188.w3", "model.layers.5.block_sparse_moe.experts.189.w3", "model.layers.5.block_sparse_moe.experts.190.w3", "model.layers.5.block_sparse_moe.experts.191.w3", "model.layers.5.block_sparse_moe.experts.192.w3", "model.layers.5.block_sparse_moe.experts.193.w3", "model.layers.5.block_sparse_moe.experts.194.w3", "model.layers.5.block_sparse_moe.experts.195.w3", "model.layers.5.block_sparse_moe.experts.196.w3", "model.layers.5.block_sparse_moe.experts.197.w3", "model.layers.5.block_sparse_moe.experts.198.w3", "model.layers.5.block_sparse_moe.experts.199.w3", "model.layers.5.block_sparse_moe.experts.200.w3", "model.layers.5.block_sparse_moe.experts.201.w3", "model.layers.5.block_sparse_moe.experts.202.w3", "model.layers.5.block_sparse_moe.experts.203.w3", "model.layers.5.block_sparse_moe.experts.204.w3", "model.layers.5.block_sparse_moe.experts.205.w3", "model.layers.5.block_sparse_moe.experts.206.w3", "model.layers.5.block_sparse_moe.experts.207.w3", "model.layers.5.block_sparse_moe.experts.208.w3", "model.layers.5.block_sparse_moe.experts.209.w3", "model.layers.5.block_sparse_moe.experts.210.w3", "model.layers.5.block_sparse_moe.experts.211.w3", "model.layers.5.block_sparse_moe.experts.212.w3", "model.layers.5.block_sparse_moe.experts.213.w3", "model.layers.5.block_sparse_moe.experts.214.w3", "model.layers.5.block_sparse_moe.experts.215.w3", "model.layers.5.block_sparse_moe.experts.216.w3", "model.layers.5.block_sparse_moe.experts.217.w3", "model.layers.5.block_sparse_moe.experts.218.w3", "model.layers.5.block_sparse_moe.experts.219.w3", "model.layers.5.block_sparse_moe.experts.220.w3", "model.layers.5.block_sparse_moe.experts.221.w3", "model.layers.5.block_sparse_moe.experts.222.w3", "model.layers.5.block_sparse_moe.experts.223.w3", "model.layers.5.block_sparse_moe.experts.224.w3", "model.layers.5.block_sparse_moe.experts.225.w3", "model.layers.5.block_sparse_moe.experts.226.w3", "model.layers.5.block_sparse_moe.experts.227.w3", "model.layers.5.block_sparse_moe.experts.228.w3", "model.layers.5.block_sparse_moe.experts.229.w3", "model.layers.5.block_sparse_moe.experts.230.w3", "model.layers.5.block_sparse_moe.experts.231.w3", "model.layers.5.block_sparse_moe.experts.232.w3", "model.layers.5.block_sparse_moe.experts.233.w3", "model.layers.5.block_sparse_moe.experts.234.w3", "model.layers.5.block_sparse_moe.experts.235.w3", "model.layers.5.block_sparse_moe.experts.236.w3", "model.layers.5.block_sparse_moe.experts.237.w3", "model.layers.5.block_sparse_moe.experts.238.w3", "model.layers.5.block_sparse_moe.experts.239.w3", "model.layers.5.block_sparse_moe.experts.240.w3", "model.layers.5.block_sparse_moe.experts.241.w3", "model.layers.5.block_sparse_moe.experts.242.w3", "model.layers.5.block_sparse_moe.experts.243.w3", "model.layers.5.block_sparse_moe.experts.244.w3", "model.layers.5.block_sparse_moe.experts.245.w3", "model.layers.5.block_sparse_moe.experts.246.w3", "model.layers.5.block_sparse_moe.experts.247.w3", "model.layers.5.block_sparse_moe.experts.248.w3", "model.layers.5.block_sparse_moe.experts.249.w3", "model.layers.5.block_sparse_moe.experts.250.w3", "model.layers.5.block_sparse_moe.experts.251.w3", "model.layers.5.block_sparse_moe.experts.252.w3", "model.layers.5.block_sparse_moe.experts.253.w3", "model.layers.5.block_sparse_moe.experts.254.w3", "model.layers.5.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0029614776372909324, "dbits": 2415919104 } ] }, { "idx": 29, "layers": [ "model.layers.5.block_sparse_moe.experts.0.w2", "model.layers.5.block_sparse_moe.experts.1.w2", "model.layers.5.block_sparse_moe.experts.2.w2", "model.layers.5.block_sparse_moe.experts.3.w2", "model.layers.5.block_sparse_moe.experts.4.w2", "model.layers.5.block_sparse_moe.experts.5.w2", "model.layers.5.block_sparse_moe.experts.6.w2", "model.layers.5.block_sparse_moe.experts.7.w2", "model.layers.5.block_sparse_moe.experts.8.w2", "model.layers.5.block_sparse_moe.experts.9.w2", "model.layers.5.block_sparse_moe.experts.10.w2", "model.layers.5.block_sparse_moe.experts.11.w2", "model.layers.5.block_sparse_moe.experts.12.w2", "model.layers.5.block_sparse_moe.experts.13.w2", "model.layers.5.block_sparse_moe.experts.14.w2", "model.layers.5.block_sparse_moe.experts.15.w2", "model.layers.5.block_sparse_moe.experts.16.w2", "model.layers.5.block_sparse_moe.experts.17.w2", "model.layers.5.block_sparse_moe.experts.18.w2", "model.layers.5.block_sparse_moe.experts.19.w2", "model.layers.5.block_sparse_moe.experts.20.w2", "model.layers.5.block_sparse_moe.experts.21.w2", "model.layers.5.block_sparse_moe.experts.22.w2", "model.layers.5.block_sparse_moe.experts.23.w2", "model.layers.5.block_sparse_moe.experts.24.w2", "model.layers.5.block_sparse_moe.experts.25.w2", "model.layers.5.block_sparse_moe.experts.26.w2", "model.layers.5.block_sparse_moe.experts.27.w2", "model.layers.5.block_sparse_moe.experts.28.w2", "model.layers.5.block_sparse_moe.experts.29.w2", "model.layers.5.block_sparse_moe.experts.30.w2", "model.layers.5.block_sparse_moe.experts.31.w2", "model.layers.5.block_sparse_moe.experts.32.w2", "model.layers.5.block_sparse_moe.experts.33.w2", "model.layers.5.block_sparse_moe.experts.34.w2", "model.layers.5.block_sparse_moe.experts.35.w2", "model.layers.5.block_sparse_moe.experts.36.w2", "model.layers.5.block_sparse_moe.experts.37.w2", "model.layers.5.block_sparse_moe.experts.38.w2", "model.layers.5.block_sparse_moe.experts.39.w2", "model.layers.5.block_sparse_moe.experts.40.w2", "model.layers.5.block_sparse_moe.experts.41.w2", "model.layers.5.block_sparse_moe.experts.42.w2", "model.layers.5.block_sparse_moe.experts.43.w2", "model.layers.5.block_sparse_moe.experts.44.w2", "model.layers.5.block_sparse_moe.experts.45.w2", "model.layers.5.block_sparse_moe.experts.46.w2", "model.layers.5.block_sparse_moe.experts.47.w2", "model.layers.5.block_sparse_moe.experts.48.w2", "model.layers.5.block_sparse_moe.experts.49.w2", "model.layers.5.block_sparse_moe.experts.50.w2", "model.layers.5.block_sparse_moe.experts.51.w2", "model.layers.5.block_sparse_moe.experts.52.w2", "model.layers.5.block_sparse_moe.experts.53.w2", "model.layers.5.block_sparse_moe.experts.54.w2", "model.layers.5.block_sparse_moe.experts.55.w2", "model.layers.5.block_sparse_moe.experts.56.w2", "model.layers.5.block_sparse_moe.experts.57.w2", "model.layers.5.block_sparse_moe.experts.58.w2", "model.layers.5.block_sparse_moe.experts.59.w2", "model.layers.5.block_sparse_moe.experts.60.w2", "model.layers.5.block_sparse_moe.experts.61.w2", "model.layers.5.block_sparse_moe.experts.62.w2", "model.layers.5.block_sparse_moe.experts.63.w2", "model.layers.5.block_sparse_moe.experts.64.w2", "model.layers.5.block_sparse_moe.experts.65.w2", "model.layers.5.block_sparse_moe.experts.66.w2", "model.layers.5.block_sparse_moe.experts.67.w2", "model.layers.5.block_sparse_moe.experts.68.w2", "model.layers.5.block_sparse_moe.experts.69.w2", "model.layers.5.block_sparse_moe.experts.70.w2", "model.layers.5.block_sparse_moe.experts.71.w2", "model.layers.5.block_sparse_moe.experts.72.w2", "model.layers.5.block_sparse_moe.experts.73.w2", "model.layers.5.block_sparse_moe.experts.74.w2", "model.layers.5.block_sparse_moe.experts.75.w2", "model.layers.5.block_sparse_moe.experts.76.w2", "model.layers.5.block_sparse_moe.experts.77.w2", "model.layers.5.block_sparse_moe.experts.78.w2", "model.layers.5.block_sparse_moe.experts.79.w2", "model.layers.5.block_sparse_moe.experts.80.w2", "model.layers.5.block_sparse_moe.experts.81.w2", "model.layers.5.block_sparse_moe.experts.82.w2", "model.layers.5.block_sparse_moe.experts.83.w2", "model.layers.5.block_sparse_moe.experts.84.w2", "model.layers.5.block_sparse_moe.experts.85.w2", "model.layers.5.block_sparse_moe.experts.86.w2", "model.layers.5.block_sparse_moe.experts.87.w2", "model.layers.5.block_sparse_moe.experts.88.w2", "model.layers.5.block_sparse_moe.experts.89.w2", "model.layers.5.block_sparse_moe.experts.90.w2", "model.layers.5.block_sparse_moe.experts.91.w2", "model.layers.5.block_sparse_moe.experts.92.w2", "model.layers.5.block_sparse_moe.experts.93.w2", "model.layers.5.block_sparse_moe.experts.94.w2", "model.layers.5.block_sparse_moe.experts.95.w2", "model.layers.5.block_sparse_moe.experts.96.w2", "model.layers.5.block_sparse_moe.experts.97.w2", "model.layers.5.block_sparse_moe.experts.98.w2", "model.layers.5.block_sparse_moe.experts.99.w2", "model.layers.5.block_sparse_moe.experts.100.w2", "model.layers.5.block_sparse_moe.experts.101.w2", "model.layers.5.block_sparse_moe.experts.102.w2", "model.layers.5.block_sparse_moe.experts.103.w2", "model.layers.5.block_sparse_moe.experts.104.w2", "model.layers.5.block_sparse_moe.experts.105.w2", "model.layers.5.block_sparse_moe.experts.106.w2", "model.layers.5.block_sparse_moe.experts.107.w2", "model.layers.5.block_sparse_moe.experts.108.w2", "model.layers.5.block_sparse_moe.experts.109.w2", "model.layers.5.block_sparse_moe.experts.110.w2", "model.layers.5.block_sparse_moe.experts.111.w2", "model.layers.5.block_sparse_moe.experts.112.w2", "model.layers.5.block_sparse_moe.experts.113.w2", "model.layers.5.block_sparse_moe.experts.114.w2", "model.layers.5.block_sparse_moe.experts.115.w2", "model.layers.5.block_sparse_moe.experts.116.w2", "model.layers.5.block_sparse_moe.experts.117.w2", "model.layers.5.block_sparse_moe.experts.118.w2", "model.layers.5.block_sparse_moe.experts.119.w2", "model.layers.5.block_sparse_moe.experts.120.w2", "model.layers.5.block_sparse_moe.experts.121.w2", "model.layers.5.block_sparse_moe.experts.122.w2", "model.layers.5.block_sparse_moe.experts.123.w2", "model.layers.5.block_sparse_moe.experts.124.w2", "model.layers.5.block_sparse_moe.experts.125.w2", "model.layers.5.block_sparse_moe.experts.126.w2", "model.layers.5.block_sparse_moe.experts.127.w2", "model.layers.5.block_sparse_moe.experts.128.w2", "model.layers.5.block_sparse_moe.experts.129.w2", "model.layers.5.block_sparse_moe.experts.130.w2", "model.layers.5.block_sparse_moe.experts.131.w2", "model.layers.5.block_sparse_moe.experts.132.w2", "model.layers.5.block_sparse_moe.experts.133.w2", "model.layers.5.block_sparse_moe.experts.134.w2", "model.layers.5.block_sparse_moe.experts.135.w2", "model.layers.5.block_sparse_moe.experts.136.w2", "model.layers.5.block_sparse_moe.experts.137.w2", "model.layers.5.block_sparse_moe.experts.138.w2", "model.layers.5.block_sparse_moe.experts.139.w2", "model.layers.5.block_sparse_moe.experts.140.w2", "model.layers.5.block_sparse_moe.experts.141.w2", "model.layers.5.block_sparse_moe.experts.142.w2", "model.layers.5.block_sparse_moe.experts.143.w2", "model.layers.5.block_sparse_moe.experts.144.w2", "model.layers.5.block_sparse_moe.experts.145.w2", "model.layers.5.block_sparse_moe.experts.146.w2", "model.layers.5.block_sparse_moe.experts.147.w2", "model.layers.5.block_sparse_moe.experts.148.w2", "model.layers.5.block_sparse_moe.experts.149.w2", "model.layers.5.block_sparse_moe.experts.150.w2", "model.layers.5.block_sparse_moe.experts.151.w2", "model.layers.5.block_sparse_moe.experts.152.w2", "model.layers.5.block_sparse_moe.experts.153.w2", "model.layers.5.block_sparse_moe.experts.154.w2", "model.layers.5.block_sparse_moe.experts.155.w2", "model.layers.5.block_sparse_moe.experts.156.w2", "model.layers.5.block_sparse_moe.experts.157.w2", "model.layers.5.block_sparse_moe.experts.158.w2", "model.layers.5.block_sparse_moe.experts.159.w2", "model.layers.5.block_sparse_moe.experts.160.w2", "model.layers.5.block_sparse_moe.experts.161.w2", "model.layers.5.block_sparse_moe.experts.162.w2", "model.layers.5.block_sparse_moe.experts.163.w2", "model.layers.5.block_sparse_moe.experts.164.w2", "model.layers.5.block_sparse_moe.experts.165.w2", "model.layers.5.block_sparse_moe.experts.166.w2", "model.layers.5.block_sparse_moe.experts.167.w2", "model.layers.5.block_sparse_moe.experts.168.w2", "model.layers.5.block_sparse_moe.experts.169.w2", "model.layers.5.block_sparse_moe.experts.170.w2", "model.layers.5.block_sparse_moe.experts.171.w2", "model.layers.5.block_sparse_moe.experts.172.w2", "model.layers.5.block_sparse_moe.experts.173.w2", "model.layers.5.block_sparse_moe.experts.174.w2", "model.layers.5.block_sparse_moe.experts.175.w2", "model.layers.5.block_sparse_moe.experts.176.w2", "model.layers.5.block_sparse_moe.experts.177.w2", "model.layers.5.block_sparse_moe.experts.178.w2", "model.layers.5.block_sparse_moe.experts.179.w2", "model.layers.5.block_sparse_moe.experts.180.w2", "model.layers.5.block_sparse_moe.experts.181.w2", "model.layers.5.block_sparse_moe.experts.182.w2", "model.layers.5.block_sparse_moe.experts.183.w2", "model.layers.5.block_sparse_moe.experts.184.w2", "model.layers.5.block_sparse_moe.experts.185.w2", "model.layers.5.block_sparse_moe.experts.186.w2", "model.layers.5.block_sparse_moe.experts.187.w2", "model.layers.5.block_sparse_moe.experts.188.w2", "model.layers.5.block_sparse_moe.experts.189.w2", "model.layers.5.block_sparse_moe.experts.190.w2", "model.layers.5.block_sparse_moe.experts.191.w2", "model.layers.5.block_sparse_moe.experts.192.w2", "model.layers.5.block_sparse_moe.experts.193.w2", "model.layers.5.block_sparse_moe.experts.194.w2", "model.layers.5.block_sparse_moe.experts.195.w2", "model.layers.5.block_sparse_moe.experts.196.w2", "model.layers.5.block_sparse_moe.experts.197.w2", "model.layers.5.block_sparse_moe.experts.198.w2", "model.layers.5.block_sparse_moe.experts.199.w2", "model.layers.5.block_sparse_moe.experts.200.w2", "model.layers.5.block_sparse_moe.experts.201.w2", "model.layers.5.block_sparse_moe.experts.202.w2", "model.layers.5.block_sparse_moe.experts.203.w2", "model.layers.5.block_sparse_moe.experts.204.w2", "model.layers.5.block_sparse_moe.experts.205.w2", "model.layers.5.block_sparse_moe.experts.206.w2", "model.layers.5.block_sparse_moe.experts.207.w2", "model.layers.5.block_sparse_moe.experts.208.w2", "model.layers.5.block_sparse_moe.experts.209.w2", "model.layers.5.block_sparse_moe.experts.210.w2", "model.layers.5.block_sparse_moe.experts.211.w2", "model.layers.5.block_sparse_moe.experts.212.w2", "model.layers.5.block_sparse_moe.experts.213.w2", "model.layers.5.block_sparse_moe.experts.214.w2", "model.layers.5.block_sparse_moe.experts.215.w2", "model.layers.5.block_sparse_moe.experts.216.w2", "model.layers.5.block_sparse_moe.experts.217.w2", "model.layers.5.block_sparse_moe.experts.218.w2", "model.layers.5.block_sparse_moe.experts.219.w2", "model.layers.5.block_sparse_moe.experts.220.w2", "model.layers.5.block_sparse_moe.experts.221.w2", "model.layers.5.block_sparse_moe.experts.222.w2", "model.layers.5.block_sparse_moe.experts.223.w2", "model.layers.5.block_sparse_moe.experts.224.w2", "model.layers.5.block_sparse_moe.experts.225.w2", "model.layers.5.block_sparse_moe.experts.226.w2", "model.layers.5.block_sparse_moe.experts.227.w2", "model.layers.5.block_sparse_moe.experts.228.w2", "model.layers.5.block_sparse_moe.experts.229.w2", "model.layers.5.block_sparse_moe.experts.230.w2", "model.layers.5.block_sparse_moe.experts.231.w2", "model.layers.5.block_sparse_moe.experts.232.w2", "model.layers.5.block_sparse_moe.experts.233.w2", "model.layers.5.block_sparse_moe.experts.234.w2", "model.layers.5.block_sparse_moe.experts.235.w2", "model.layers.5.block_sparse_moe.experts.236.w2", "model.layers.5.block_sparse_moe.experts.237.w2", "model.layers.5.block_sparse_moe.experts.238.w2", "model.layers.5.block_sparse_moe.experts.239.w2", "model.layers.5.block_sparse_moe.experts.240.w2", "model.layers.5.block_sparse_moe.experts.241.w2", "model.layers.5.block_sparse_moe.experts.242.w2", "model.layers.5.block_sparse_moe.experts.243.w2", "model.layers.5.block_sparse_moe.experts.244.w2", "model.layers.5.block_sparse_moe.experts.245.w2", "model.layers.5.block_sparse_moe.experts.246.w2", "model.layers.5.block_sparse_moe.experts.247.w2", "model.layers.5.block_sparse_moe.experts.248.w2", "model.layers.5.block_sparse_moe.experts.249.w2", "model.layers.5.block_sparse_moe.experts.250.w2", "model.layers.5.block_sparse_moe.experts.251.w2", "model.layers.5.block_sparse_moe.experts.252.w2", "model.layers.5.block_sparse_moe.experts.253.w2", "model.layers.5.block_sparse_moe.experts.254.w2", "model.layers.5.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0004389233887195365, "dbits": 1207959552 } ] }, { "idx": 30, "layers": [ "model.layers.6.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0024184122681617515, "dbits": 18874368 } ] }, { "idx": 31, "layers": [ "model.layers.6.self_attn.k_proj", "model.layers.6.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0006090342998504861, "dbits": 6291456 } ] }, { "idx": 32, "layers": [ "model.layers.6.self_attn.o_proj" ], "candidates": [ { "dkld": 8.918717503547668e-05, "dbits": 18874368 } ] }, { "idx": 33, "layers": [ "model.layers.6.block_sparse_moe.experts.0.w1", "model.layers.6.block_sparse_moe.experts.1.w1", "model.layers.6.block_sparse_moe.experts.2.w1", "model.layers.6.block_sparse_moe.experts.3.w1", "model.layers.6.block_sparse_moe.experts.4.w1", "model.layers.6.block_sparse_moe.experts.5.w1", "model.layers.6.block_sparse_moe.experts.6.w1", "model.layers.6.block_sparse_moe.experts.7.w1", "model.layers.6.block_sparse_moe.experts.8.w1", "model.layers.6.block_sparse_moe.experts.9.w1", "model.layers.6.block_sparse_moe.experts.10.w1", "model.layers.6.block_sparse_moe.experts.11.w1", "model.layers.6.block_sparse_moe.experts.12.w1", "model.layers.6.block_sparse_moe.experts.13.w1", "model.layers.6.block_sparse_moe.experts.14.w1", "model.layers.6.block_sparse_moe.experts.15.w1", "model.layers.6.block_sparse_moe.experts.16.w1", "model.layers.6.block_sparse_moe.experts.17.w1", "model.layers.6.block_sparse_moe.experts.18.w1", "model.layers.6.block_sparse_moe.experts.19.w1", "model.layers.6.block_sparse_moe.experts.20.w1", "model.layers.6.block_sparse_moe.experts.21.w1", "model.layers.6.block_sparse_moe.experts.22.w1", "model.layers.6.block_sparse_moe.experts.23.w1", "model.layers.6.block_sparse_moe.experts.24.w1", "model.layers.6.block_sparse_moe.experts.25.w1", "model.layers.6.block_sparse_moe.experts.26.w1", "model.layers.6.block_sparse_moe.experts.27.w1", "model.layers.6.block_sparse_moe.experts.28.w1", "model.layers.6.block_sparse_moe.experts.29.w1", "model.layers.6.block_sparse_moe.experts.30.w1", "model.layers.6.block_sparse_moe.experts.31.w1", "model.layers.6.block_sparse_moe.experts.32.w1", "model.layers.6.block_sparse_moe.experts.33.w1", "model.layers.6.block_sparse_moe.experts.34.w1", "model.layers.6.block_sparse_moe.experts.35.w1", "model.layers.6.block_sparse_moe.experts.36.w1", "model.layers.6.block_sparse_moe.experts.37.w1", "model.layers.6.block_sparse_moe.experts.38.w1", "model.layers.6.block_sparse_moe.experts.39.w1", "model.layers.6.block_sparse_moe.experts.40.w1", "model.layers.6.block_sparse_moe.experts.41.w1", "model.layers.6.block_sparse_moe.experts.42.w1", "model.layers.6.block_sparse_moe.experts.43.w1", "model.layers.6.block_sparse_moe.experts.44.w1", "model.layers.6.block_sparse_moe.experts.45.w1", "model.layers.6.block_sparse_moe.experts.46.w1", "model.layers.6.block_sparse_moe.experts.47.w1", "model.layers.6.block_sparse_moe.experts.48.w1", "model.layers.6.block_sparse_moe.experts.49.w1", "model.layers.6.block_sparse_moe.experts.50.w1", "model.layers.6.block_sparse_moe.experts.51.w1", "model.layers.6.block_sparse_moe.experts.52.w1", "model.layers.6.block_sparse_moe.experts.53.w1", "model.layers.6.block_sparse_moe.experts.54.w1", "model.layers.6.block_sparse_moe.experts.55.w1", "model.layers.6.block_sparse_moe.experts.56.w1", "model.layers.6.block_sparse_moe.experts.57.w1", "model.layers.6.block_sparse_moe.experts.58.w1", "model.layers.6.block_sparse_moe.experts.59.w1", "model.layers.6.block_sparse_moe.experts.60.w1", "model.layers.6.block_sparse_moe.experts.61.w1", "model.layers.6.block_sparse_moe.experts.62.w1", "model.layers.6.block_sparse_moe.experts.63.w1", "model.layers.6.block_sparse_moe.experts.64.w1", "model.layers.6.block_sparse_moe.experts.65.w1", "model.layers.6.block_sparse_moe.experts.66.w1", "model.layers.6.block_sparse_moe.experts.67.w1", "model.layers.6.block_sparse_moe.experts.68.w1", "model.layers.6.block_sparse_moe.experts.69.w1", "model.layers.6.block_sparse_moe.experts.70.w1", "model.layers.6.block_sparse_moe.experts.71.w1", "model.layers.6.block_sparse_moe.experts.72.w1", "model.layers.6.block_sparse_moe.experts.73.w1", "model.layers.6.block_sparse_moe.experts.74.w1", "model.layers.6.block_sparse_moe.experts.75.w1", "model.layers.6.block_sparse_moe.experts.76.w1", "model.layers.6.block_sparse_moe.experts.77.w1", "model.layers.6.block_sparse_moe.experts.78.w1", "model.layers.6.block_sparse_moe.experts.79.w1", "model.layers.6.block_sparse_moe.experts.80.w1", "model.layers.6.block_sparse_moe.experts.81.w1", "model.layers.6.block_sparse_moe.experts.82.w1", "model.layers.6.block_sparse_moe.experts.83.w1", "model.layers.6.block_sparse_moe.experts.84.w1", "model.layers.6.block_sparse_moe.experts.85.w1", "model.layers.6.block_sparse_moe.experts.86.w1", "model.layers.6.block_sparse_moe.experts.87.w1", "model.layers.6.block_sparse_moe.experts.88.w1", "model.layers.6.block_sparse_moe.experts.89.w1", "model.layers.6.block_sparse_moe.experts.90.w1", "model.layers.6.block_sparse_moe.experts.91.w1", "model.layers.6.block_sparse_moe.experts.92.w1", "model.layers.6.block_sparse_moe.experts.93.w1", "model.layers.6.block_sparse_moe.experts.94.w1", "model.layers.6.block_sparse_moe.experts.95.w1", "model.layers.6.block_sparse_moe.experts.96.w1", "model.layers.6.block_sparse_moe.experts.97.w1", "model.layers.6.block_sparse_moe.experts.98.w1", "model.layers.6.block_sparse_moe.experts.99.w1", "model.layers.6.block_sparse_moe.experts.100.w1", "model.layers.6.block_sparse_moe.experts.101.w1", "model.layers.6.block_sparse_moe.experts.102.w1", "model.layers.6.block_sparse_moe.experts.103.w1", "model.layers.6.block_sparse_moe.experts.104.w1", "model.layers.6.block_sparse_moe.experts.105.w1", "model.layers.6.block_sparse_moe.experts.106.w1", "model.layers.6.block_sparse_moe.experts.107.w1", "model.layers.6.block_sparse_moe.experts.108.w1", "model.layers.6.block_sparse_moe.experts.109.w1", "model.layers.6.block_sparse_moe.experts.110.w1", "model.layers.6.block_sparse_moe.experts.111.w1", "model.layers.6.block_sparse_moe.experts.112.w1", "model.layers.6.block_sparse_moe.experts.113.w1", "model.layers.6.block_sparse_moe.experts.114.w1", "model.layers.6.block_sparse_moe.experts.115.w1", "model.layers.6.block_sparse_moe.experts.116.w1", "model.layers.6.block_sparse_moe.experts.117.w1", "model.layers.6.block_sparse_moe.experts.118.w1", "model.layers.6.block_sparse_moe.experts.119.w1", "model.layers.6.block_sparse_moe.experts.120.w1", "model.layers.6.block_sparse_moe.experts.121.w1", "model.layers.6.block_sparse_moe.experts.122.w1", "model.layers.6.block_sparse_moe.experts.123.w1", "model.layers.6.block_sparse_moe.experts.124.w1", "model.layers.6.block_sparse_moe.experts.125.w1", "model.layers.6.block_sparse_moe.experts.126.w1", "model.layers.6.block_sparse_moe.experts.127.w1", "model.layers.6.block_sparse_moe.experts.128.w1", "model.layers.6.block_sparse_moe.experts.129.w1", "model.layers.6.block_sparse_moe.experts.130.w1", "model.layers.6.block_sparse_moe.experts.131.w1", "model.layers.6.block_sparse_moe.experts.132.w1", "model.layers.6.block_sparse_moe.experts.133.w1", "model.layers.6.block_sparse_moe.experts.134.w1", "model.layers.6.block_sparse_moe.experts.135.w1", "model.layers.6.block_sparse_moe.experts.136.w1", "model.layers.6.block_sparse_moe.experts.137.w1", "model.layers.6.block_sparse_moe.experts.138.w1", "model.layers.6.block_sparse_moe.experts.139.w1", "model.layers.6.block_sparse_moe.experts.140.w1", "model.layers.6.block_sparse_moe.experts.141.w1", "model.layers.6.block_sparse_moe.experts.142.w1", "model.layers.6.block_sparse_moe.experts.143.w1", "model.layers.6.block_sparse_moe.experts.144.w1", "model.layers.6.block_sparse_moe.experts.145.w1", "model.layers.6.block_sparse_moe.experts.146.w1", "model.layers.6.block_sparse_moe.experts.147.w1", "model.layers.6.block_sparse_moe.experts.148.w1", "model.layers.6.block_sparse_moe.experts.149.w1", "model.layers.6.block_sparse_moe.experts.150.w1", "model.layers.6.block_sparse_moe.experts.151.w1", "model.layers.6.block_sparse_moe.experts.152.w1", "model.layers.6.block_sparse_moe.experts.153.w1", "model.layers.6.block_sparse_moe.experts.154.w1", "model.layers.6.block_sparse_moe.experts.155.w1", "model.layers.6.block_sparse_moe.experts.156.w1", "model.layers.6.block_sparse_moe.experts.157.w1", "model.layers.6.block_sparse_moe.experts.158.w1", "model.layers.6.block_sparse_moe.experts.159.w1", "model.layers.6.block_sparse_moe.experts.160.w1", "model.layers.6.block_sparse_moe.experts.161.w1", "model.layers.6.block_sparse_moe.experts.162.w1", "model.layers.6.block_sparse_moe.experts.163.w1", "model.layers.6.block_sparse_moe.experts.164.w1", "model.layers.6.block_sparse_moe.experts.165.w1", "model.layers.6.block_sparse_moe.experts.166.w1", "model.layers.6.block_sparse_moe.experts.167.w1", "model.layers.6.block_sparse_moe.experts.168.w1", "model.layers.6.block_sparse_moe.experts.169.w1", "model.layers.6.block_sparse_moe.experts.170.w1", "model.layers.6.block_sparse_moe.experts.171.w1", "model.layers.6.block_sparse_moe.experts.172.w1", "model.layers.6.block_sparse_moe.experts.173.w1", "model.layers.6.block_sparse_moe.experts.174.w1", "model.layers.6.block_sparse_moe.experts.175.w1", "model.layers.6.block_sparse_moe.experts.176.w1", "model.layers.6.block_sparse_moe.experts.177.w1", "model.layers.6.block_sparse_moe.experts.178.w1", "model.layers.6.block_sparse_moe.experts.179.w1", "model.layers.6.block_sparse_moe.experts.180.w1", "model.layers.6.block_sparse_moe.experts.181.w1", "model.layers.6.block_sparse_moe.experts.182.w1", "model.layers.6.block_sparse_moe.experts.183.w1", "model.layers.6.block_sparse_moe.experts.184.w1", "model.layers.6.block_sparse_moe.experts.185.w1", "model.layers.6.block_sparse_moe.experts.186.w1", "model.layers.6.block_sparse_moe.experts.187.w1", "model.layers.6.block_sparse_moe.experts.188.w1", "model.layers.6.block_sparse_moe.experts.189.w1", "model.layers.6.block_sparse_moe.experts.190.w1", "model.layers.6.block_sparse_moe.experts.191.w1", "model.layers.6.block_sparse_moe.experts.192.w1", "model.layers.6.block_sparse_moe.experts.193.w1", "model.layers.6.block_sparse_moe.experts.194.w1", "model.layers.6.block_sparse_moe.experts.195.w1", "model.layers.6.block_sparse_moe.experts.196.w1", "model.layers.6.block_sparse_moe.experts.197.w1", "model.layers.6.block_sparse_moe.experts.198.w1", "model.layers.6.block_sparse_moe.experts.199.w1", "model.layers.6.block_sparse_moe.experts.200.w1", "model.layers.6.block_sparse_moe.experts.201.w1", "model.layers.6.block_sparse_moe.experts.202.w1", "model.layers.6.block_sparse_moe.experts.203.w1", "model.layers.6.block_sparse_moe.experts.204.w1", "model.layers.6.block_sparse_moe.experts.205.w1", "model.layers.6.block_sparse_moe.experts.206.w1", "model.layers.6.block_sparse_moe.experts.207.w1", "model.layers.6.block_sparse_moe.experts.208.w1", "model.layers.6.block_sparse_moe.experts.209.w1", "model.layers.6.block_sparse_moe.experts.210.w1", "model.layers.6.block_sparse_moe.experts.211.w1", "model.layers.6.block_sparse_moe.experts.212.w1", "model.layers.6.block_sparse_moe.experts.213.w1", "model.layers.6.block_sparse_moe.experts.214.w1", "model.layers.6.block_sparse_moe.experts.215.w1", "model.layers.6.block_sparse_moe.experts.216.w1", "model.layers.6.block_sparse_moe.experts.217.w1", "model.layers.6.block_sparse_moe.experts.218.w1", "model.layers.6.block_sparse_moe.experts.219.w1", "model.layers.6.block_sparse_moe.experts.220.w1", "model.layers.6.block_sparse_moe.experts.221.w1", "model.layers.6.block_sparse_moe.experts.222.w1", "model.layers.6.block_sparse_moe.experts.223.w1", "model.layers.6.block_sparse_moe.experts.224.w1", "model.layers.6.block_sparse_moe.experts.225.w1", "model.layers.6.block_sparse_moe.experts.226.w1", "model.layers.6.block_sparse_moe.experts.227.w1", "model.layers.6.block_sparse_moe.experts.228.w1", "model.layers.6.block_sparse_moe.experts.229.w1", "model.layers.6.block_sparse_moe.experts.230.w1", "model.layers.6.block_sparse_moe.experts.231.w1", "model.layers.6.block_sparse_moe.experts.232.w1", "model.layers.6.block_sparse_moe.experts.233.w1", "model.layers.6.block_sparse_moe.experts.234.w1", "model.layers.6.block_sparse_moe.experts.235.w1", "model.layers.6.block_sparse_moe.experts.236.w1", "model.layers.6.block_sparse_moe.experts.237.w1", "model.layers.6.block_sparse_moe.experts.238.w1", "model.layers.6.block_sparse_moe.experts.239.w1", "model.layers.6.block_sparse_moe.experts.240.w1", "model.layers.6.block_sparse_moe.experts.241.w1", "model.layers.6.block_sparse_moe.experts.242.w1", "model.layers.6.block_sparse_moe.experts.243.w1", "model.layers.6.block_sparse_moe.experts.244.w1", "model.layers.6.block_sparse_moe.experts.245.w1", "model.layers.6.block_sparse_moe.experts.246.w1", "model.layers.6.block_sparse_moe.experts.247.w1", "model.layers.6.block_sparse_moe.experts.248.w1", "model.layers.6.block_sparse_moe.experts.249.w1", "model.layers.6.block_sparse_moe.experts.250.w1", "model.layers.6.block_sparse_moe.experts.251.w1", "model.layers.6.block_sparse_moe.experts.252.w1", "model.layers.6.block_sparse_moe.experts.253.w1", "model.layers.6.block_sparse_moe.experts.254.w1", "model.layers.6.block_sparse_moe.experts.255.w1", "model.layers.6.block_sparse_moe.experts.0.w3", "model.layers.6.block_sparse_moe.experts.1.w3", "model.layers.6.block_sparse_moe.experts.2.w3", "model.layers.6.block_sparse_moe.experts.3.w3", "model.layers.6.block_sparse_moe.experts.4.w3", "model.layers.6.block_sparse_moe.experts.5.w3", "model.layers.6.block_sparse_moe.experts.6.w3", "model.layers.6.block_sparse_moe.experts.7.w3", "model.layers.6.block_sparse_moe.experts.8.w3", "model.layers.6.block_sparse_moe.experts.9.w3", "model.layers.6.block_sparse_moe.experts.10.w3", "model.layers.6.block_sparse_moe.experts.11.w3", "model.layers.6.block_sparse_moe.experts.12.w3", "model.layers.6.block_sparse_moe.experts.13.w3", "model.layers.6.block_sparse_moe.experts.14.w3", "model.layers.6.block_sparse_moe.experts.15.w3", "model.layers.6.block_sparse_moe.experts.16.w3", "model.layers.6.block_sparse_moe.experts.17.w3", "model.layers.6.block_sparse_moe.experts.18.w3", "model.layers.6.block_sparse_moe.experts.19.w3", "model.layers.6.block_sparse_moe.experts.20.w3", "model.layers.6.block_sparse_moe.experts.21.w3", "model.layers.6.block_sparse_moe.experts.22.w3", "model.layers.6.block_sparse_moe.experts.23.w3", "model.layers.6.block_sparse_moe.experts.24.w3", "model.layers.6.block_sparse_moe.experts.25.w3", "model.layers.6.block_sparse_moe.experts.26.w3", "model.layers.6.block_sparse_moe.experts.27.w3", "model.layers.6.block_sparse_moe.experts.28.w3", "model.layers.6.block_sparse_moe.experts.29.w3", "model.layers.6.block_sparse_moe.experts.30.w3", "model.layers.6.block_sparse_moe.experts.31.w3", "model.layers.6.block_sparse_moe.experts.32.w3", "model.layers.6.block_sparse_moe.experts.33.w3", "model.layers.6.block_sparse_moe.experts.34.w3", "model.layers.6.block_sparse_moe.experts.35.w3", "model.layers.6.block_sparse_moe.experts.36.w3", "model.layers.6.block_sparse_moe.experts.37.w3", "model.layers.6.block_sparse_moe.experts.38.w3", "model.layers.6.block_sparse_moe.experts.39.w3", "model.layers.6.block_sparse_moe.experts.40.w3", "model.layers.6.block_sparse_moe.experts.41.w3", "model.layers.6.block_sparse_moe.experts.42.w3", "model.layers.6.block_sparse_moe.experts.43.w3", "model.layers.6.block_sparse_moe.experts.44.w3", "model.layers.6.block_sparse_moe.experts.45.w3", "model.layers.6.block_sparse_moe.experts.46.w3", "model.layers.6.block_sparse_moe.experts.47.w3", "model.layers.6.block_sparse_moe.experts.48.w3", "model.layers.6.block_sparse_moe.experts.49.w3", "model.layers.6.block_sparse_moe.experts.50.w3", "model.layers.6.block_sparse_moe.experts.51.w3", "model.layers.6.block_sparse_moe.experts.52.w3", "model.layers.6.block_sparse_moe.experts.53.w3", "model.layers.6.block_sparse_moe.experts.54.w3", "model.layers.6.block_sparse_moe.experts.55.w3", "model.layers.6.block_sparse_moe.experts.56.w3", "model.layers.6.block_sparse_moe.experts.57.w3", "model.layers.6.block_sparse_moe.experts.58.w3", "model.layers.6.block_sparse_moe.experts.59.w3", "model.layers.6.block_sparse_moe.experts.60.w3", "model.layers.6.block_sparse_moe.experts.61.w3", "model.layers.6.block_sparse_moe.experts.62.w3", "model.layers.6.block_sparse_moe.experts.63.w3", "model.layers.6.block_sparse_moe.experts.64.w3", "model.layers.6.block_sparse_moe.experts.65.w3", "model.layers.6.block_sparse_moe.experts.66.w3", "model.layers.6.block_sparse_moe.experts.67.w3", "model.layers.6.block_sparse_moe.experts.68.w3", "model.layers.6.block_sparse_moe.experts.69.w3", "model.layers.6.block_sparse_moe.experts.70.w3", "model.layers.6.block_sparse_moe.experts.71.w3", "model.layers.6.block_sparse_moe.experts.72.w3", "model.layers.6.block_sparse_moe.experts.73.w3", "model.layers.6.block_sparse_moe.experts.74.w3", "model.layers.6.block_sparse_moe.experts.75.w3", "model.layers.6.block_sparse_moe.experts.76.w3", "model.layers.6.block_sparse_moe.experts.77.w3", "model.layers.6.block_sparse_moe.experts.78.w3", "model.layers.6.block_sparse_moe.experts.79.w3", "model.layers.6.block_sparse_moe.experts.80.w3", "model.layers.6.block_sparse_moe.experts.81.w3", "model.layers.6.block_sparse_moe.experts.82.w3", "model.layers.6.block_sparse_moe.experts.83.w3", "model.layers.6.block_sparse_moe.experts.84.w3", "model.layers.6.block_sparse_moe.experts.85.w3", "model.layers.6.block_sparse_moe.experts.86.w3", "model.layers.6.block_sparse_moe.experts.87.w3", "model.layers.6.block_sparse_moe.experts.88.w3", "model.layers.6.block_sparse_moe.experts.89.w3", "model.layers.6.block_sparse_moe.experts.90.w3", "model.layers.6.block_sparse_moe.experts.91.w3", "model.layers.6.block_sparse_moe.experts.92.w3", "model.layers.6.block_sparse_moe.experts.93.w3", "model.layers.6.block_sparse_moe.experts.94.w3", "model.layers.6.block_sparse_moe.experts.95.w3", "model.layers.6.block_sparse_moe.experts.96.w3", "model.layers.6.block_sparse_moe.experts.97.w3", "model.layers.6.block_sparse_moe.experts.98.w3", "model.layers.6.block_sparse_moe.experts.99.w3", "model.layers.6.block_sparse_moe.experts.100.w3", "model.layers.6.block_sparse_moe.experts.101.w3", "model.layers.6.block_sparse_moe.experts.102.w3", "model.layers.6.block_sparse_moe.experts.103.w3", "model.layers.6.block_sparse_moe.experts.104.w3", "model.layers.6.block_sparse_moe.experts.105.w3", "model.layers.6.block_sparse_moe.experts.106.w3", "model.layers.6.block_sparse_moe.experts.107.w3", "model.layers.6.block_sparse_moe.experts.108.w3", "model.layers.6.block_sparse_moe.experts.109.w3", "model.layers.6.block_sparse_moe.experts.110.w3", "model.layers.6.block_sparse_moe.experts.111.w3", "model.layers.6.block_sparse_moe.experts.112.w3", "model.layers.6.block_sparse_moe.experts.113.w3", "model.layers.6.block_sparse_moe.experts.114.w3", "model.layers.6.block_sparse_moe.experts.115.w3", "model.layers.6.block_sparse_moe.experts.116.w3", "model.layers.6.block_sparse_moe.experts.117.w3", "model.layers.6.block_sparse_moe.experts.118.w3", "model.layers.6.block_sparse_moe.experts.119.w3", "model.layers.6.block_sparse_moe.experts.120.w3", "model.layers.6.block_sparse_moe.experts.121.w3", "model.layers.6.block_sparse_moe.experts.122.w3", "model.layers.6.block_sparse_moe.experts.123.w3", "model.layers.6.block_sparse_moe.experts.124.w3", "model.layers.6.block_sparse_moe.experts.125.w3", "model.layers.6.block_sparse_moe.experts.126.w3", "model.layers.6.block_sparse_moe.experts.127.w3", "model.layers.6.block_sparse_moe.experts.128.w3", "model.layers.6.block_sparse_moe.experts.129.w3", "model.layers.6.block_sparse_moe.experts.130.w3", "model.layers.6.block_sparse_moe.experts.131.w3", "model.layers.6.block_sparse_moe.experts.132.w3", "model.layers.6.block_sparse_moe.experts.133.w3", "model.layers.6.block_sparse_moe.experts.134.w3", "model.layers.6.block_sparse_moe.experts.135.w3", "model.layers.6.block_sparse_moe.experts.136.w3", "model.layers.6.block_sparse_moe.experts.137.w3", "model.layers.6.block_sparse_moe.experts.138.w3", "model.layers.6.block_sparse_moe.experts.139.w3", "model.layers.6.block_sparse_moe.experts.140.w3", "model.layers.6.block_sparse_moe.experts.141.w3", "model.layers.6.block_sparse_moe.experts.142.w3", "model.layers.6.block_sparse_moe.experts.143.w3", "model.layers.6.block_sparse_moe.experts.144.w3", "model.layers.6.block_sparse_moe.experts.145.w3", "model.layers.6.block_sparse_moe.experts.146.w3", "model.layers.6.block_sparse_moe.experts.147.w3", "model.layers.6.block_sparse_moe.experts.148.w3", "model.layers.6.block_sparse_moe.experts.149.w3", "model.layers.6.block_sparse_moe.experts.150.w3", "model.layers.6.block_sparse_moe.experts.151.w3", "model.layers.6.block_sparse_moe.experts.152.w3", "model.layers.6.block_sparse_moe.experts.153.w3", "model.layers.6.block_sparse_moe.experts.154.w3", "model.layers.6.block_sparse_moe.experts.155.w3", "model.layers.6.block_sparse_moe.experts.156.w3", "model.layers.6.block_sparse_moe.experts.157.w3", "model.layers.6.block_sparse_moe.experts.158.w3", "model.layers.6.block_sparse_moe.experts.159.w3", "model.layers.6.block_sparse_moe.experts.160.w3", "model.layers.6.block_sparse_moe.experts.161.w3", "model.layers.6.block_sparse_moe.experts.162.w3", "model.layers.6.block_sparse_moe.experts.163.w3", "model.layers.6.block_sparse_moe.experts.164.w3", "model.layers.6.block_sparse_moe.experts.165.w3", "model.layers.6.block_sparse_moe.experts.166.w3", "model.layers.6.block_sparse_moe.experts.167.w3", "model.layers.6.block_sparse_moe.experts.168.w3", "model.layers.6.block_sparse_moe.experts.169.w3", "model.layers.6.block_sparse_moe.experts.170.w3", "model.layers.6.block_sparse_moe.experts.171.w3", "model.layers.6.block_sparse_moe.experts.172.w3", "model.layers.6.block_sparse_moe.experts.173.w3", "model.layers.6.block_sparse_moe.experts.174.w3", "model.layers.6.block_sparse_moe.experts.175.w3", "model.layers.6.block_sparse_moe.experts.176.w3", "model.layers.6.block_sparse_moe.experts.177.w3", "model.layers.6.block_sparse_moe.experts.178.w3", "model.layers.6.block_sparse_moe.experts.179.w3", "model.layers.6.block_sparse_moe.experts.180.w3", "model.layers.6.block_sparse_moe.experts.181.w3", "model.layers.6.block_sparse_moe.experts.182.w3", "model.layers.6.block_sparse_moe.experts.183.w3", "model.layers.6.block_sparse_moe.experts.184.w3", "model.layers.6.block_sparse_moe.experts.185.w3", "model.layers.6.block_sparse_moe.experts.186.w3", "model.layers.6.block_sparse_moe.experts.187.w3", "model.layers.6.block_sparse_moe.experts.188.w3", "model.layers.6.block_sparse_moe.experts.189.w3", "model.layers.6.block_sparse_moe.experts.190.w3", "model.layers.6.block_sparse_moe.experts.191.w3", "model.layers.6.block_sparse_moe.experts.192.w3", "model.layers.6.block_sparse_moe.experts.193.w3", "model.layers.6.block_sparse_moe.experts.194.w3", "model.layers.6.block_sparse_moe.experts.195.w3", "model.layers.6.block_sparse_moe.experts.196.w3", "model.layers.6.block_sparse_moe.experts.197.w3", "model.layers.6.block_sparse_moe.experts.198.w3", "model.layers.6.block_sparse_moe.experts.199.w3", "model.layers.6.block_sparse_moe.experts.200.w3", "model.layers.6.block_sparse_moe.experts.201.w3", "model.layers.6.block_sparse_moe.experts.202.w3", "model.layers.6.block_sparse_moe.experts.203.w3", "model.layers.6.block_sparse_moe.experts.204.w3", "model.layers.6.block_sparse_moe.experts.205.w3", "model.layers.6.block_sparse_moe.experts.206.w3", "model.layers.6.block_sparse_moe.experts.207.w3", "model.layers.6.block_sparse_moe.experts.208.w3", "model.layers.6.block_sparse_moe.experts.209.w3", "model.layers.6.block_sparse_moe.experts.210.w3", "model.layers.6.block_sparse_moe.experts.211.w3", "model.layers.6.block_sparse_moe.experts.212.w3", "model.layers.6.block_sparse_moe.experts.213.w3", "model.layers.6.block_sparse_moe.experts.214.w3", "model.layers.6.block_sparse_moe.experts.215.w3", "model.layers.6.block_sparse_moe.experts.216.w3", "model.layers.6.block_sparse_moe.experts.217.w3", "model.layers.6.block_sparse_moe.experts.218.w3", "model.layers.6.block_sparse_moe.experts.219.w3", "model.layers.6.block_sparse_moe.experts.220.w3", "model.layers.6.block_sparse_moe.experts.221.w3", "model.layers.6.block_sparse_moe.experts.222.w3", "model.layers.6.block_sparse_moe.experts.223.w3", "model.layers.6.block_sparse_moe.experts.224.w3", "model.layers.6.block_sparse_moe.experts.225.w3", "model.layers.6.block_sparse_moe.experts.226.w3", "model.layers.6.block_sparse_moe.experts.227.w3", "model.layers.6.block_sparse_moe.experts.228.w3", "model.layers.6.block_sparse_moe.experts.229.w3", "model.layers.6.block_sparse_moe.experts.230.w3", "model.layers.6.block_sparse_moe.experts.231.w3", "model.layers.6.block_sparse_moe.experts.232.w3", "model.layers.6.block_sparse_moe.experts.233.w3", "model.layers.6.block_sparse_moe.experts.234.w3", "model.layers.6.block_sparse_moe.experts.235.w3", "model.layers.6.block_sparse_moe.experts.236.w3", "model.layers.6.block_sparse_moe.experts.237.w3", "model.layers.6.block_sparse_moe.experts.238.w3", "model.layers.6.block_sparse_moe.experts.239.w3", "model.layers.6.block_sparse_moe.experts.240.w3", "model.layers.6.block_sparse_moe.experts.241.w3", "model.layers.6.block_sparse_moe.experts.242.w3", "model.layers.6.block_sparse_moe.experts.243.w3", "model.layers.6.block_sparse_moe.experts.244.w3", "model.layers.6.block_sparse_moe.experts.245.w3", "model.layers.6.block_sparse_moe.experts.246.w3", "model.layers.6.block_sparse_moe.experts.247.w3", "model.layers.6.block_sparse_moe.experts.248.w3", "model.layers.6.block_sparse_moe.experts.249.w3", "model.layers.6.block_sparse_moe.experts.250.w3", "model.layers.6.block_sparse_moe.experts.251.w3", "model.layers.6.block_sparse_moe.experts.252.w3", "model.layers.6.block_sparse_moe.experts.253.w3", "model.layers.6.block_sparse_moe.experts.254.w3", "model.layers.6.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.001311850920319546, "dbits": 2415919104 } ] }, { "idx": 34, "layers": [ "model.layers.6.block_sparse_moe.experts.0.w2", "model.layers.6.block_sparse_moe.experts.1.w2", "model.layers.6.block_sparse_moe.experts.2.w2", "model.layers.6.block_sparse_moe.experts.3.w2", "model.layers.6.block_sparse_moe.experts.4.w2", "model.layers.6.block_sparse_moe.experts.5.w2", "model.layers.6.block_sparse_moe.experts.6.w2", "model.layers.6.block_sparse_moe.experts.7.w2", "model.layers.6.block_sparse_moe.experts.8.w2", "model.layers.6.block_sparse_moe.experts.9.w2", "model.layers.6.block_sparse_moe.experts.10.w2", "model.layers.6.block_sparse_moe.experts.11.w2", "model.layers.6.block_sparse_moe.experts.12.w2", "model.layers.6.block_sparse_moe.experts.13.w2", "model.layers.6.block_sparse_moe.experts.14.w2", "model.layers.6.block_sparse_moe.experts.15.w2", "model.layers.6.block_sparse_moe.experts.16.w2", "model.layers.6.block_sparse_moe.experts.17.w2", "model.layers.6.block_sparse_moe.experts.18.w2", "model.layers.6.block_sparse_moe.experts.19.w2", "model.layers.6.block_sparse_moe.experts.20.w2", "model.layers.6.block_sparse_moe.experts.21.w2", "model.layers.6.block_sparse_moe.experts.22.w2", "model.layers.6.block_sparse_moe.experts.23.w2", "model.layers.6.block_sparse_moe.experts.24.w2", "model.layers.6.block_sparse_moe.experts.25.w2", "model.layers.6.block_sparse_moe.experts.26.w2", "model.layers.6.block_sparse_moe.experts.27.w2", "model.layers.6.block_sparse_moe.experts.28.w2", "model.layers.6.block_sparse_moe.experts.29.w2", "model.layers.6.block_sparse_moe.experts.30.w2", "model.layers.6.block_sparse_moe.experts.31.w2", "model.layers.6.block_sparse_moe.experts.32.w2", "model.layers.6.block_sparse_moe.experts.33.w2", "model.layers.6.block_sparse_moe.experts.34.w2", "model.layers.6.block_sparse_moe.experts.35.w2", "model.layers.6.block_sparse_moe.experts.36.w2", "model.layers.6.block_sparse_moe.experts.37.w2", "model.layers.6.block_sparse_moe.experts.38.w2", "model.layers.6.block_sparse_moe.experts.39.w2", "model.layers.6.block_sparse_moe.experts.40.w2", "model.layers.6.block_sparse_moe.experts.41.w2", "model.layers.6.block_sparse_moe.experts.42.w2", "model.layers.6.block_sparse_moe.experts.43.w2", "model.layers.6.block_sparse_moe.experts.44.w2", "model.layers.6.block_sparse_moe.experts.45.w2", "model.layers.6.block_sparse_moe.experts.46.w2", "model.layers.6.block_sparse_moe.experts.47.w2", "model.layers.6.block_sparse_moe.experts.48.w2", "model.layers.6.block_sparse_moe.experts.49.w2", "model.layers.6.block_sparse_moe.experts.50.w2", "model.layers.6.block_sparse_moe.experts.51.w2", "model.layers.6.block_sparse_moe.experts.52.w2", "model.layers.6.block_sparse_moe.experts.53.w2", "model.layers.6.block_sparse_moe.experts.54.w2", "model.layers.6.block_sparse_moe.experts.55.w2", "model.layers.6.block_sparse_moe.experts.56.w2", "model.layers.6.block_sparse_moe.experts.57.w2", "model.layers.6.block_sparse_moe.experts.58.w2", "model.layers.6.block_sparse_moe.experts.59.w2", "model.layers.6.block_sparse_moe.experts.60.w2", "model.layers.6.block_sparse_moe.experts.61.w2", "model.layers.6.block_sparse_moe.experts.62.w2", "model.layers.6.block_sparse_moe.experts.63.w2", "model.layers.6.block_sparse_moe.experts.64.w2", "model.layers.6.block_sparse_moe.experts.65.w2", "model.layers.6.block_sparse_moe.experts.66.w2", "model.layers.6.block_sparse_moe.experts.67.w2", "model.layers.6.block_sparse_moe.experts.68.w2", "model.layers.6.block_sparse_moe.experts.69.w2", "model.layers.6.block_sparse_moe.experts.70.w2", "model.layers.6.block_sparse_moe.experts.71.w2", "model.layers.6.block_sparse_moe.experts.72.w2", "model.layers.6.block_sparse_moe.experts.73.w2", "model.layers.6.block_sparse_moe.experts.74.w2", "model.layers.6.block_sparse_moe.experts.75.w2", "model.layers.6.block_sparse_moe.experts.76.w2", "model.layers.6.block_sparse_moe.experts.77.w2", "model.layers.6.block_sparse_moe.experts.78.w2", "model.layers.6.block_sparse_moe.experts.79.w2", "model.layers.6.block_sparse_moe.experts.80.w2", "model.layers.6.block_sparse_moe.experts.81.w2", "model.layers.6.block_sparse_moe.experts.82.w2", "model.layers.6.block_sparse_moe.experts.83.w2", "model.layers.6.block_sparse_moe.experts.84.w2", "model.layers.6.block_sparse_moe.experts.85.w2", "model.layers.6.block_sparse_moe.experts.86.w2", "model.layers.6.block_sparse_moe.experts.87.w2", "model.layers.6.block_sparse_moe.experts.88.w2", "model.layers.6.block_sparse_moe.experts.89.w2", "model.layers.6.block_sparse_moe.experts.90.w2", "model.layers.6.block_sparse_moe.experts.91.w2", "model.layers.6.block_sparse_moe.experts.92.w2", "model.layers.6.block_sparse_moe.experts.93.w2", "model.layers.6.block_sparse_moe.experts.94.w2", "model.layers.6.block_sparse_moe.experts.95.w2", "model.layers.6.block_sparse_moe.experts.96.w2", "model.layers.6.block_sparse_moe.experts.97.w2", "model.layers.6.block_sparse_moe.experts.98.w2", "model.layers.6.block_sparse_moe.experts.99.w2", "model.layers.6.block_sparse_moe.experts.100.w2", "model.layers.6.block_sparse_moe.experts.101.w2", "model.layers.6.block_sparse_moe.experts.102.w2", "model.layers.6.block_sparse_moe.experts.103.w2", "model.layers.6.block_sparse_moe.experts.104.w2", "model.layers.6.block_sparse_moe.experts.105.w2", "model.layers.6.block_sparse_moe.experts.106.w2", "model.layers.6.block_sparse_moe.experts.107.w2", "model.layers.6.block_sparse_moe.experts.108.w2", "model.layers.6.block_sparse_moe.experts.109.w2", "model.layers.6.block_sparse_moe.experts.110.w2", "model.layers.6.block_sparse_moe.experts.111.w2", "model.layers.6.block_sparse_moe.experts.112.w2", "model.layers.6.block_sparse_moe.experts.113.w2", "model.layers.6.block_sparse_moe.experts.114.w2", "model.layers.6.block_sparse_moe.experts.115.w2", "model.layers.6.block_sparse_moe.experts.116.w2", "model.layers.6.block_sparse_moe.experts.117.w2", "model.layers.6.block_sparse_moe.experts.118.w2", "model.layers.6.block_sparse_moe.experts.119.w2", "model.layers.6.block_sparse_moe.experts.120.w2", "model.layers.6.block_sparse_moe.experts.121.w2", "model.layers.6.block_sparse_moe.experts.122.w2", "model.layers.6.block_sparse_moe.experts.123.w2", "model.layers.6.block_sparse_moe.experts.124.w2", "model.layers.6.block_sparse_moe.experts.125.w2", "model.layers.6.block_sparse_moe.experts.126.w2", "model.layers.6.block_sparse_moe.experts.127.w2", "model.layers.6.block_sparse_moe.experts.128.w2", "model.layers.6.block_sparse_moe.experts.129.w2", "model.layers.6.block_sparse_moe.experts.130.w2", "model.layers.6.block_sparse_moe.experts.131.w2", "model.layers.6.block_sparse_moe.experts.132.w2", "model.layers.6.block_sparse_moe.experts.133.w2", "model.layers.6.block_sparse_moe.experts.134.w2", "model.layers.6.block_sparse_moe.experts.135.w2", "model.layers.6.block_sparse_moe.experts.136.w2", "model.layers.6.block_sparse_moe.experts.137.w2", "model.layers.6.block_sparse_moe.experts.138.w2", "model.layers.6.block_sparse_moe.experts.139.w2", "model.layers.6.block_sparse_moe.experts.140.w2", "model.layers.6.block_sparse_moe.experts.141.w2", "model.layers.6.block_sparse_moe.experts.142.w2", "model.layers.6.block_sparse_moe.experts.143.w2", "model.layers.6.block_sparse_moe.experts.144.w2", "model.layers.6.block_sparse_moe.experts.145.w2", "model.layers.6.block_sparse_moe.experts.146.w2", "model.layers.6.block_sparse_moe.experts.147.w2", "model.layers.6.block_sparse_moe.experts.148.w2", "model.layers.6.block_sparse_moe.experts.149.w2", "model.layers.6.block_sparse_moe.experts.150.w2", "model.layers.6.block_sparse_moe.experts.151.w2", "model.layers.6.block_sparse_moe.experts.152.w2", "model.layers.6.block_sparse_moe.experts.153.w2", "model.layers.6.block_sparse_moe.experts.154.w2", "model.layers.6.block_sparse_moe.experts.155.w2", "model.layers.6.block_sparse_moe.experts.156.w2", "model.layers.6.block_sparse_moe.experts.157.w2", "model.layers.6.block_sparse_moe.experts.158.w2", "model.layers.6.block_sparse_moe.experts.159.w2", "model.layers.6.block_sparse_moe.experts.160.w2", "model.layers.6.block_sparse_moe.experts.161.w2", "model.layers.6.block_sparse_moe.experts.162.w2", "model.layers.6.block_sparse_moe.experts.163.w2", "model.layers.6.block_sparse_moe.experts.164.w2", "model.layers.6.block_sparse_moe.experts.165.w2", "model.layers.6.block_sparse_moe.experts.166.w2", "model.layers.6.block_sparse_moe.experts.167.w2", "model.layers.6.block_sparse_moe.experts.168.w2", "model.layers.6.block_sparse_moe.experts.169.w2", "model.layers.6.block_sparse_moe.experts.170.w2", "model.layers.6.block_sparse_moe.experts.171.w2", "model.layers.6.block_sparse_moe.experts.172.w2", "model.layers.6.block_sparse_moe.experts.173.w2", "model.layers.6.block_sparse_moe.experts.174.w2", "model.layers.6.block_sparse_moe.experts.175.w2", "model.layers.6.block_sparse_moe.experts.176.w2", "model.layers.6.block_sparse_moe.experts.177.w2", "model.layers.6.block_sparse_moe.experts.178.w2", "model.layers.6.block_sparse_moe.experts.179.w2", "model.layers.6.block_sparse_moe.experts.180.w2", "model.layers.6.block_sparse_moe.experts.181.w2", "model.layers.6.block_sparse_moe.experts.182.w2", "model.layers.6.block_sparse_moe.experts.183.w2", "model.layers.6.block_sparse_moe.experts.184.w2", "model.layers.6.block_sparse_moe.experts.185.w2", "model.layers.6.block_sparse_moe.experts.186.w2", "model.layers.6.block_sparse_moe.experts.187.w2", "model.layers.6.block_sparse_moe.experts.188.w2", "model.layers.6.block_sparse_moe.experts.189.w2", "model.layers.6.block_sparse_moe.experts.190.w2", "model.layers.6.block_sparse_moe.experts.191.w2", "model.layers.6.block_sparse_moe.experts.192.w2", "model.layers.6.block_sparse_moe.experts.193.w2", "model.layers.6.block_sparse_moe.experts.194.w2", "model.layers.6.block_sparse_moe.experts.195.w2", "model.layers.6.block_sparse_moe.experts.196.w2", "model.layers.6.block_sparse_moe.experts.197.w2", "model.layers.6.block_sparse_moe.experts.198.w2", "model.layers.6.block_sparse_moe.experts.199.w2", "model.layers.6.block_sparse_moe.experts.200.w2", "model.layers.6.block_sparse_moe.experts.201.w2", "model.layers.6.block_sparse_moe.experts.202.w2", "model.layers.6.block_sparse_moe.experts.203.w2", "model.layers.6.block_sparse_moe.experts.204.w2", "model.layers.6.block_sparse_moe.experts.205.w2", "model.layers.6.block_sparse_moe.experts.206.w2", "model.layers.6.block_sparse_moe.experts.207.w2", "model.layers.6.block_sparse_moe.experts.208.w2", "model.layers.6.block_sparse_moe.experts.209.w2", "model.layers.6.block_sparse_moe.experts.210.w2", "model.layers.6.block_sparse_moe.experts.211.w2", "model.layers.6.block_sparse_moe.experts.212.w2", "model.layers.6.block_sparse_moe.experts.213.w2", "model.layers.6.block_sparse_moe.experts.214.w2", "model.layers.6.block_sparse_moe.experts.215.w2", "model.layers.6.block_sparse_moe.experts.216.w2", "model.layers.6.block_sparse_moe.experts.217.w2", "model.layers.6.block_sparse_moe.experts.218.w2", "model.layers.6.block_sparse_moe.experts.219.w2", "model.layers.6.block_sparse_moe.experts.220.w2", "model.layers.6.block_sparse_moe.experts.221.w2", "model.layers.6.block_sparse_moe.experts.222.w2", "model.layers.6.block_sparse_moe.experts.223.w2", "model.layers.6.block_sparse_moe.experts.224.w2", "model.layers.6.block_sparse_moe.experts.225.w2", "model.layers.6.block_sparse_moe.experts.226.w2", "model.layers.6.block_sparse_moe.experts.227.w2", "model.layers.6.block_sparse_moe.experts.228.w2", "model.layers.6.block_sparse_moe.experts.229.w2", "model.layers.6.block_sparse_moe.experts.230.w2", "model.layers.6.block_sparse_moe.experts.231.w2", "model.layers.6.block_sparse_moe.experts.232.w2", "model.layers.6.block_sparse_moe.experts.233.w2", "model.layers.6.block_sparse_moe.experts.234.w2", "model.layers.6.block_sparse_moe.experts.235.w2", "model.layers.6.block_sparse_moe.experts.236.w2", "model.layers.6.block_sparse_moe.experts.237.w2", "model.layers.6.block_sparse_moe.experts.238.w2", "model.layers.6.block_sparse_moe.experts.239.w2", "model.layers.6.block_sparse_moe.experts.240.w2", "model.layers.6.block_sparse_moe.experts.241.w2", "model.layers.6.block_sparse_moe.experts.242.w2", "model.layers.6.block_sparse_moe.experts.243.w2", "model.layers.6.block_sparse_moe.experts.244.w2", "model.layers.6.block_sparse_moe.experts.245.w2", "model.layers.6.block_sparse_moe.experts.246.w2", "model.layers.6.block_sparse_moe.experts.247.w2", "model.layers.6.block_sparse_moe.experts.248.w2", "model.layers.6.block_sparse_moe.experts.249.w2", "model.layers.6.block_sparse_moe.experts.250.w2", "model.layers.6.block_sparse_moe.experts.251.w2", "model.layers.6.block_sparse_moe.experts.252.w2", "model.layers.6.block_sparse_moe.experts.253.w2", "model.layers.6.block_sparse_moe.experts.254.w2", "model.layers.6.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.002083335444331147, "dbits": 1207959552 } ] }, { "idx": 35, "layers": [ "model.layers.7.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0009147353470325692, "dbits": 18874368 } ] }, { "idx": 36, "layers": [ "model.layers.7.self_attn.k_proj", "model.layers.7.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0009187497198581696, "dbits": 6291456 } ] }, { "idx": 37, "layers": [ "model.layers.7.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0025253701955080254, "dbits": 18874368 } ] }, { "idx": 38, "layers": [ "model.layers.7.block_sparse_moe.experts.0.w1", "model.layers.7.block_sparse_moe.experts.1.w1", "model.layers.7.block_sparse_moe.experts.2.w1", "model.layers.7.block_sparse_moe.experts.3.w1", "model.layers.7.block_sparse_moe.experts.4.w1", "model.layers.7.block_sparse_moe.experts.5.w1", "model.layers.7.block_sparse_moe.experts.6.w1", "model.layers.7.block_sparse_moe.experts.7.w1", "model.layers.7.block_sparse_moe.experts.8.w1", "model.layers.7.block_sparse_moe.experts.9.w1", "model.layers.7.block_sparse_moe.experts.10.w1", "model.layers.7.block_sparse_moe.experts.11.w1", "model.layers.7.block_sparse_moe.experts.12.w1", "model.layers.7.block_sparse_moe.experts.13.w1", "model.layers.7.block_sparse_moe.experts.14.w1", "model.layers.7.block_sparse_moe.experts.15.w1", "model.layers.7.block_sparse_moe.experts.16.w1", "model.layers.7.block_sparse_moe.experts.17.w1", "model.layers.7.block_sparse_moe.experts.18.w1", "model.layers.7.block_sparse_moe.experts.19.w1", "model.layers.7.block_sparse_moe.experts.20.w1", "model.layers.7.block_sparse_moe.experts.21.w1", "model.layers.7.block_sparse_moe.experts.22.w1", "model.layers.7.block_sparse_moe.experts.23.w1", "model.layers.7.block_sparse_moe.experts.24.w1", "model.layers.7.block_sparse_moe.experts.25.w1", "model.layers.7.block_sparse_moe.experts.26.w1", "model.layers.7.block_sparse_moe.experts.27.w1", "model.layers.7.block_sparse_moe.experts.28.w1", "model.layers.7.block_sparse_moe.experts.29.w1", "model.layers.7.block_sparse_moe.experts.30.w1", "model.layers.7.block_sparse_moe.experts.31.w1", "model.layers.7.block_sparse_moe.experts.32.w1", "model.layers.7.block_sparse_moe.experts.33.w1", "model.layers.7.block_sparse_moe.experts.34.w1", "model.layers.7.block_sparse_moe.experts.35.w1", "model.layers.7.block_sparse_moe.experts.36.w1", "model.layers.7.block_sparse_moe.experts.37.w1", "model.layers.7.block_sparse_moe.experts.38.w1", "model.layers.7.block_sparse_moe.experts.39.w1", "model.layers.7.block_sparse_moe.experts.40.w1", "model.layers.7.block_sparse_moe.experts.41.w1", "model.layers.7.block_sparse_moe.experts.42.w1", "model.layers.7.block_sparse_moe.experts.43.w1", "model.layers.7.block_sparse_moe.experts.44.w1", "model.layers.7.block_sparse_moe.experts.45.w1", "model.layers.7.block_sparse_moe.experts.46.w1", "model.layers.7.block_sparse_moe.experts.47.w1", "model.layers.7.block_sparse_moe.experts.48.w1", "model.layers.7.block_sparse_moe.experts.49.w1", "model.layers.7.block_sparse_moe.experts.50.w1", "model.layers.7.block_sparse_moe.experts.51.w1", "model.layers.7.block_sparse_moe.experts.52.w1", "model.layers.7.block_sparse_moe.experts.53.w1", "model.layers.7.block_sparse_moe.experts.54.w1", "model.layers.7.block_sparse_moe.experts.55.w1", "model.layers.7.block_sparse_moe.experts.56.w1", "model.layers.7.block_sparse_moe.experts.57.w1", "model.layers.7.block_sparse_moe.experts.58.w1", "model.layers.7.block_sparse_moe.experts.59.w1", "model.layers.7.block_sparse_moe.experts.60.w1", "model.layers.7.block_sparse_moe.experts.61.w1", "model.layers.7.block_sparse_moe.experts.62.w1", "model.layers.7.block_sparse_moe.experts.63.w1", "model.layers.7.block_sparse_moe.experts.64.w1", "model.layers.7.block_sparse_moe.experts.65.w1", "model.layers.7.block_sparse_moe.experts.66.w1", "model.layers.7.block_sparse_moe.experts.67.w1", "model.layers.7.block_sparse_moe.experts.68.w1", "model.layers.7.block_sparse_moe.experts.69.w1", "model.layers.7.block_sparse_moe.experts.70.w1", "model.layers.7.block_sparse_moe.experts.71.w1", "model.layers.7.block_sparse_moe.experts.72.w1", "model.layers.7.block_sparse_moe.experts.73.w1", "model.layers.7.block_sparse_moe.experts.74.w1", "model.layers.7.block_sparse_moe.experts.75.w1", "model.layers.7.block_sparse_moe.experts.76.w1", "model.layers.7.block_sparse_moe.experts.77.w1", "model.layers.7.block_sparse_moe.experts.78.w1", "model.layers.7.block_sparse_moe.experts.79.w1", "model.layers.7.block_sparse_moe.experts.80.w1", "model.layers.7.block_sparse_moe.experts.81.w1", "model.layers.7.block_sparse_moe.experts.82.w1", "model.layers.7.block_sparse_moe.experts.83.w1", "model.layers.7.block_sparse_moe.experts.84.w1", "model.layers.7.block_sparse_moe.experts.85.w1", "model.layers.7.block_sparse_moe.experts.86.w1", "model.layers.7.block_sparse_moe.experts.87.w1", "model.layers.7.block_sparse_moe.experts.88.w1", "model.layers.7.block_sparse_moe.experts.89.w1", "model.layers.7.block_sparse_moe.experts.90.w1", "model.layers.7.block_sparse_moe.experts.91.w1", "model.layers.7.block_sparse_moe.experts.92.w1", "model.layers.7.block_sparse_moe.experts.93.w1", "model.layers.7.block_sparse_moe.experts.94.w1", "model.layers.7.block_sparse_moe.experts.95.w1", "model.layers.7.block_sparse_moe.experts.96.w1", "model.layers.7.block_sparse_moe.experts.97.w1", "model.layers.7.block_sparse_moe.experts.98.w1", "model.layers.7.block_sparse_moe.experts.99.w1", "model.layers.7.block_sparse_moe.experts.100.w1", "model.layers.7.block_sparse_moe.experts.101.w1", "model.layers.7.block_sparse_moe.experts.102.w1", "model.layers.7.block_sparse_moe.experts.103.w1", "model.layers.7.block_sparse_moe.experts.104.w1", "model.layers.7.block_sparse_moe.experts.105.w1", "model.layers.7.block_sparse_moe.experts.106.w1", "model.layers.7.block_sparse_moe.experts.107.w1", "model.layers.7.block_sparse_moe.experts.108.w1", "model.layers.7.block_sparse_moe.experts.109.w1", "model.layers.7.block_sparse_moe.experts.110.w1", "model.layers.7.block_sparse_moe.experts.111.w1", "model.layers.7.block_sparse_moe.experts.112.w1", "model.layers.7.block_sparse_moe.experts.113.w1", "model.layers.7.block_sparse_moe.experts.114.w1", "model.layers.7.block_sparse_moe.experts.115.w1", "model.layers.7.block_sparse_moe.experts.116.w1", "model.layers.7.block_sparse_moe.experts.117.w1", "model.layers.7.block_sparse_moe.experts.118.w1", "model.layers.7.block_sparse_moe.experts.119.w1", "model.layers.7.block_sparse_moe.experts.120.w1", "model.layers.7.block_sparse_moe.experts.121.w1", "model.layers.7.block_sparse_moe.experts.122.w1", "model.layers.7.block_sparse_moe.experts.123.w1", "model.layers.7.block_sparse_moe.experts.124.w1", "model.layers.7.block_sparse_moe.experts.125.w1", "model.layers.7.block_sparse_moe.experts.126.w1", "model.layers.7.block_sparse_moe.experts.127.w1", "model.layers.7.block_sparse_moe.experts.128.w1", "model.layers.7.block_sparse_moe.experts.129.w1", "model.layers.7.block_sparse_moe.experts.130.w1", "model.layers.7.block_sparse_moe.experts.131.w1", "model.layers.7.block_sparse_moe.experts.132.w1", "model.layers.7.block_sparse_moe.experts.133.w1", "model.layers.7.block_sparse_moe.experts.134.w1", "model.layers.7.block_sparse_moe.experts.135.w1", "model.layers.7.block_sparse_moe.experts.136.w1", "model.layers.7.block_sparse_moe.experts.137.w1", "model.layers.7.block_sparse_moe.experts.138.w1", "model.layers.7.block_sparse_moe.experts.139.w1", "model.layers.7.block_sparse_moe.experts.140.w1", "model.layers.7.block_sparse_moe.experts.141.w1", "model.layers.7.block_sparse_moe.experts.142.w1", "model.layers.7.block_sparse_moe.experts.143.w1", "model.layers.7.block_sparse_moe.experts.144.w1", "model.layers.7.block_sparse_moe.experts.145.w1", "model.layers.7.block_sparse_moe.experts.146.w1", "model.layers.7.block_sparse_moe.experts.147.w1", "model.layers.7.block_sparse_moe.experts.148.w1", "model.layers.7.block_sparse_moe.experts.149.w1", "model.layers.7.block_sparse_moe.experts.150.w1", "model.layers.7.block_sparse_moe.experts.151.w1", "model.layers.7.block_sparse_moe.experts.152.w1", "model.layers.7.block_sparse_moe.experts.153.w1", "model.layers.7.block_sparse_moe.experts.154.w1", "model.layers.7.block_sparse_moe.experts.155.w1", "model.layers.7.block_sparse_moe.experts.156.w1", "model.layers.7.block_sparse_moe.experts.157.w1", "model.layers.7.block_sparse_moe.experts.158.w1", "model.layers.7.block_sparse_moe.experts.159.w1", "model.layers.7.block_sparse_moe.experts.160.w1", "model.layers.7.block_sparse_moe.experts.161.w1", "model.layers.7.block_sparse_moe.experts.162.w1", "model.layers.7.block_sparse_moe.experts.163.w1", "model.layers.7.block_sparse_moe.experts.164.w1", "model.layers.7.block_sparse_moe.experts.165.w1", "model.layers.7.block_sparse_moe.experts.166.w1", "model.layers.7.block_sparse_moe.experts.167.w1", "model.layers.7.block_sparse_moe.experts.168.w1", "model.layers.7.block_sparse_moe.experts.169.w1", "model.layers.7.block_sparse_moe.experts.170.w1", "model.layers.7.block_sparse_moe.experts.171.w1", "model.layers.7.block_sparse_moe.experts.172.w1", "model.layers.7.block_sparse_moe.experts.173.w1", "model.layers.7.block_sparse_moe.experts.174.w1", "model.layers.7.block_sparse_moe.experts.175.w1", "model.layers.7.block_sparse_moe.experts.176.w1", "model.layers.7.block_sparse_moe.experts.177.w1", "model.layers.7.block_sparse_moe.experts.178.w1", "model.layers.7.block_sparse_moe.experts.179.w1", "model.layers.7.block_sparse_moe.experts.180.w1", "model.layers.7.block_sparse_moe.experts.181.w1", "model.layers.7.block_sparse_moe.experts.182.w1", "model.layers.7.block_sparse_moe.experts.183.w1", "model.layers.7.block_sparse_moe.experts.184.w1", "model.layers.7.block_sparse_moe.experts.185.w1", "model.layers.7.block_sparse_moe.experts.186.w1", "model.layers.7.block_sparse_moe.experts.187.w1", "model.layers.7.block_sparse_moe.experts.188.w1", "model.layers.7.block_sparse_moe.experts.189.w1", "model.layers.7.block_sparse_moe.experts.190.w1", "model.layers.7.block_sparse_moe.experts.191.w1", "model.layers.7.block_sparse_moe.experts.192.w1", "model.layers.7.block_sparse_moe.experts.193.w1", "model.layers.7.block_sparse_moe.experts.194.w1", "model.layers.7.block_sparse_moe.experts.195.w1", "model.layers.7.block_sparse_moe.experts.196.w1", "model.layers.7.block_sparse_moe.experts.197.w1", "model.layers.7.block_sparse_moe.experts.198.w1", "model.layers.7.block_sparse_moe.experts.199.w1", "model.layers.7.block_sparse_moe.experts.200.w1", "model.layers.7.block_sparse_moe.experts.201.w1", "model.layers.7.block_sparse_moe.experts.202.w1", "model.layers.7.block_sparse_moe.experts.203.w1", "model.layers.7.block_sparse_moe.experts.204.w1", "model.layers.7.block_sparse_moe.experts.205.w1", "model.layers.7.block_sparse_moe.experts.206.w1", "model.layers.7.block_sparse_moe.experts.207.w1", "model.layers.7.block_sparse_moe.experts.208.w1", "model.layers.7.block_sparse_moe.experts.209.w1", "model.layers.7.block_sparse_moe.experts.210.w1", "model.layers.7.block_sparse_moe.experts.211.w1", "model.layers.7.block_sparse_moe.experts.212.w1", "model.layers.7.block_sparse_moe.experts.213.w1", "model.layers.7.block_sparse_moe.experts.214.w1", "model.layers.7.block_sparse_moe.experts.215.w1", "model.layers.7.block_sparse_moe.experts.216.w1", "model.layers.7.block_sparse_moe.experts.217.w1", "model.layers.7.block_sparse_moe.experts.218.w1", "model.layers.7.block_sparse_moe.experts.219.w1", "model.layers.7.block_sparse_moe.experts.220.w1", "model.layers.7.block_sparse_moe.experts.221.w1", "model.layers.7.block_sparse_moe.experts.222.w1", "model.layers.7.block_sparse_moe.experts.223.w1", "model.layers.7.block_sparse_moe.experts.224.w1", "model.layers.7.block_sparse_moe.experts.225.w1", "model.layers.7.block_sparse_moe.experts.226.w1", "model.layers.7.block_sparse_moe.experts.227.w1", "model.layers.7.block_sparse_moe.experts.228.w1", "model.layers.7.block_sparse_moe.experts.229.w1", "model.layers.7.block_sparse_moe.experts.230.w1", "model.layers.7.block_sparse_moe.experts.231.w1", "model.layers.7.block_sparse_moe.experts.232.w1", "model.layers.7.block_sparse_moe.experts.233.w1", "model.layers.7.block_sparse_moe.experts.234.w1", "model.layers.7.block_sparse_moe.experts.235.w1", "model.layers.7.block_sparse_moe.experts.236.w1", "model.layers.7.block_sparse_moe.experts.237.w1", "model.layers.7.block_sparse_moe.experts.238.w1", "model.layers.7.block_sparse_moe.experts.239.w1", "model.layers.7.block_sparse_moe.experts.240.w1", "model.layers.7.block_sparse_moe.experts.241.w1", "model.layers.7.block_sparse_moe.experts.242.w1", "model.layers.7.block_sparse_moe.experts.243.w1", "model.layers.7.block_sparse_moe.experts.244.w1", "model.layers.7.block_sparse_moe.experts.245.w1", "model.layers.7.block_sparse_moe.experts.246.w1", "model.layers.7.block_sparse_moe.experts.247.w1", "model.layers.7.block_sparse_moe.experts.248.w1", "model.layers.7.block_sparse_moe.experts.249.w1", "model.layers.7.block_sparse_moe.experts.250.w1", "model.layers.7.block_sparse_moe.experts.251.w1", "model.layers.7.block_sparse_moe.experts.252.w1", "model.layers.7.block_sparse_moe.experts.253.w1", "model.layers.7.block_sparse_moe.experts.254.w1", "model.layers.7.block_sparse_moe.experts.255.w1", "model.layers.7.block_sparse_moe.experts.0.w3", "model.layers.7.block_sparse_moe.experts.1.w3", "model.layers.7.block_sparse_moe.experts.2.w3", "model.layers.7.block_sparse_moe.experts.3.w3", "model.layers.7.block_sparse_moe.experts.4.w3", "model.layers.7.block_sparse_moe.experts.5.w3", "model.layers.7.block_sparse_moe.experts.6.w3", "model.layers.7.block_sparse_moe.experts.7.w3", "model.layers.7.block_sparse_moe.experts.8.w3", "model.layers.7.block_sparse_moe.experts.9.w3", "model.layers.7.block_sparse_moe.experts.10.w3", "model.layers.7.block_sparse_moe.experts.11.w3", "model.layers.7.block_sparse_moe.experts.12.w3", "model.layers.7.block_sparse_moe.experts.13.w3", "model.layers.7.block_sparse_moe.experts.14.w3", "model.layers.7.block_sparse_moe.experts.15.w3", "model.layers.7.block_sparse_moe.experts.16.w3", "model.layers.7.block_sparse_moe.experts.17.w3", "model.layers.7.block_sparse_moe.experts.18.w3", "model.layers.7.block_sparse_moe.experts.19.w3", "model.layers.7.block_sparse_moe.experts.20.w3", "model.layers.7.block_sparse_moe.experts.21.w3", "model.layers.7.block_sparse_moe.experts.22.w3", "model.layers.7.block_sparse_moe.experts.23.w3", "model.layers.7.block_sparse_moe.experts.24.w3", "model.layers.7.block_sparse_moe.experts.25.w3", "model.layers.7.block_sparse_moe.experts.26.w3", "model.layers.7.block_sparse_moe.experts.27.w3", "model.layers.7.block_sparse_moe.experts.28.w3", "model.layers.7.block_sparse_moe.experts.29.w3", "model.layers.7.block_sparse_moe.experts.30.w3", "model.layers.7.block_sparse_moe.experts.31.w3", "model.layers.7.block_sparse_moe.experts.32.w3", "model.layers.7.block_sparse_moe.experts.33.w3", "model.layers.7.block_sparse_moe.experts.34.w3", "model.layers.7.block_sparse_moe.experts.35.w3", "model.layers.7.block_sparse_moe.experts.36.w3", "model.layers.7.block_sparse_moe.experts.37.w3", "model.layers.7.block_sparse_moe.experts.38.w3", "model.layers.7.block_sparse_moe.experts.39.w3", "model.layers.7.block_sparse_moe.experts.40.w3", "model.layers.7.block_sparse_moe.experts.41.w3", "model.layers.7.block_sparse_moe.experts.42.w3", "model.layers.7.block_sparse_moe.experts.43.w3", "model.layers.7.block_sparse_moe.experts.44.w3", "model.layers.7.block_sparse_moe.experts.45.w3", "model.layers.7.block_sparse_moe.experts.46.w3", "model.layers.7.block_sparse_moe.experts.47.w3", "model.layers.7.block_sparse_moe.experts.48.w3", "model.layers.7.block_sparse_moe.experts.49.w3", "model.layers.7.block_sparse_moe.experts.50.w3", "model.layers.7.block_sparse_moe.experts.51.w3", "model.layers.7.block_sparse_moe.experts.52.w3", "model.layers.7.block_sparse_moe.experts.53.w3", "model.layers.7.block_sparse_moe.experts.54.w3", "model.layers.7.block_sparse_moe.experts.55.w3", "model.layers.7.block_sparse_moe.experts.56.w3", "model.layers.7.block_sparse_moe.experts.57.w3", "model.layers.7.block_sparse_moe.experts.58.w3", "model.layers.7.block_sparse_moe.experts.59.w3", "model.layers.7.block_sparse_moe.experts.60.w3", "model.layers.7.block_sparse_moe.experts.61.w3", "model.layers.7.block_sparse_moe.experts.62.w3", "model.layers.7.block_sparse_moe.experts.63.w3", "model.layers.7.block_sparse_moe.experts.64.w3", "model.layers.7.block_sparse_moe.experts.65.w3", "model.layers.7.block_sparse_moe.experts.66.w3", "model.layers.7.block_sparse_moe.experts.67.w3", "model.layers.7.block_sparse_moe.experts.68.w3", "model.layers.7.block_sparse_moe.experts.69.w3", "model.layers.7.block_sparse_moe.experts.70.w3", "model.layers.7.block_sparse_moe.experts.71.w3", "model.layers.7.block_sparse_moe.experts.72.w3", "model.layers.7.block_sparse_moe.experts.73.w3", "model.layers.7.block_sparse_moe.experts.74.w3", "model.layers.7.block_sparse_moe.experts.75.w3", "model.layers.7.block_sparse_moe.experts.76.w3", "model.layers.7.block_sparse_moe.experts.77.w3", "model.layers.7.block_sparse_moe.experts.78.w3", "model.layers.7.block_sparse_moe.experts.79.w3", "model.layers.7.block_sparse_moe.experts.80.w3", "model.layers.7.block_sparse_moe.experts.81.w3", "model.layers.7.block_sparse_moe.experts.82.w3", "model.layers.7.block_sparse_moe.experts.83.w3", "model.layers.7.block_sparse_moe.experts.84.w3", "model.layers.7.block_sparse_moe.experts.85.w3", "model.layers.7.block_sparse_moe.experts.86.w3", "model.layers.7.block_sparse_moe.experts.87.w3", "model.layers.7.block_sparse_moe.experts.88.w3", "model.layers.7.block_sparse_moe.experts.89.w3", "model.layers.7.block_sparse_moe.experts.90.w3", "model.layers.7.block_sparse_moe.experts.91.w3", "model.layers.7.block_sparse_moe.experts.92.w3", "model.layers.7.block_sparse_moe.experts.93.w3", "model.layers.7.block_sparse_moe.experts.94.w3", "model.layers.7.block_sparse_moe.experts.95.w3", "model.layers.7.block_sparse_moe.experts.96.w3", "model.layers.7.block_sparse_moe.experts.97.w3", "model.layers.7.block_sparse_moe.experts.98.w3", "model.layers.7.block_sparse_moe.experts.99.w3", "model.layers.7.block_sparse_moe.experts.100.w3", "model.layers.7.block_sparse_moe.experts.101.w3", "model.layers.7.block_sparse_moe.experts.102.w3", "model.layers.7.block_sparse_moe.experts.103.w3", "model.layers.7.block_sparse_moe.experts.104.w3", "model.layers.7.block_sparse_moe.experts.105.w3", "model.layers.7.block_sparse_moe.experts.106.w3", "model.layers.7.block_sparse_moe.experts.107.w3", "model.layers.7.block_sparse_moe.experts.108.w3", "model.layers.7.block_sparse_moe.experts.109.w3", "model.layers.7.block_sparse_moe.experts.110.w3", "model.layers.7.block_sparse_moe.experts.111.w3", "model.layers.7.block_sparse_moe.experts.112.w3", "model.layers.7.block_sparse_moe.experts.113.w3", "model.layers.7.block_sparse_moe.experts.114.w3", "model.layers.7.block_sparse_moe.experts.115.w3", "model.layers.7.block_sparse_moe.experts.116.w3", "model.layers.7.block_sparse_moe.experts.117.w3", "model.layers.7.block_sparse_moe.experts.118.w3", "model.layers.7.block_sparse_moe.experts.119.w3", "model.layers.7.block_sparse_moe.experts.120.w3", "model.layers.7.block_sparse_moe.experts.121.w3", "model.layers.7.block_sparse_moe.experts.122.w3", "model.layers.7.block_sparse_moe.experts.123.w3", "model.layers.7.block_sparse_moe.experts.124.w3", "model.layers.7.block_sparse_moe.experts.125.w3", "model.layers.7.block_sparse_moe.experts.126.w3", "model.layers.7.block_sparse_moe.experts.127.w3", "model.layers.7.block_sparse_moe.experts.128.w3", "model.layers.7.block_sparse_moe.experts.129.w3", "model.layers.7.block_sparse_moe.experts.130.w3", "model.layers.7.block_sparse_moe.experts.131.w3", "model.layers.7.block_sparse_moe.experts.132.w3", "model.layers.7.block_sparse_moe.experts.133.w3", "model.layers.7.block_sparse_moe.experts.134.w3", "model.layers.7.block_sparse_moe.experts.135.w3", "model.layers.7.block_sparse_moe.experts.136.w3", "model.layers.7.block_sparse_moe.experts.137.w3", "model.layers.7.block_sparse_moe.experts.138.w3", "model.layers.7.block_sparse_moe.experts.139.w3", "model.layers.7.block_sparse_moe.experts.140.w3", "model.layers.7.block_sparse_moe.experts.141.w3", "model.layers.7.block_sparse_moe.experts.142.w3", "model.layers.7.block_sparse_moe.experts.143.w3", "model.layers.7.block_sparse_moe.experts.144.w3", "model.layers.7.block_sparse_moe.experts.145.w3", "model.layers.7.block_sparse_moe.experts.146.w3", "model.layers.7.block_sparse_moe.experts.147.w3", "model.layers.7.block_sparse_moe.experts.148.w3", "model.layers.7.block_sparse_moe.experts.149.w3", "model.layers.7.block_sparse_moe.experts.150.w3", "model.layers.7.block_sparse_moe.experts.151.w3", "model.layers.7.block_sparse_moe.experts.152.w3", "model.layers.7.block_sparse_moe.experts.153.w3", "model.layers.7.block_sparse_moe.experts.154.w3", "model.layers.7.block_sparse_moe.experts.155.w3", "model.layers.7.block_sparse_moe.experts.156.w3", "model.layers.7.block_sparse_moe.experts.157.w3", "model.layers.7.block_sparse_moe.experts.158.w3", "model.layers.7.block_sparse_moe.experts.159.w3", "model.layers.7.block_sparse_moe.experts.160.w3", "model.layers.7.block_sparse_moe.experts.161.w3", "model.layers.7.block_sparse_moe.experts.162.w3", "model.layers.7.block_sparse_moe.experts.163.w3", "model.layers.7.block_sparse_moe.experts.164.w3", "model.layers.7.block_sparse_moe.experts.165.w3", "model.layers.7.block_sparse_moe.experts.166.w3", "model.layers.7.block_sparse_moe.experts.167.w3", "model.layers.7.block_sparse_moe.experts.168.w3", "model.layers.7.block_sparse_moe.experts.169.w3", "model.layers.7.block_sparse_moe.experts.170.w3", "model.layers.7.block_sparse_moe.experts.171.w3", "model.layers.7.block_sparse_moe.experts.172.w3", "model.layers.7.block_sparse_moe.experts.173.w3", "model.layers.7.block_sparse_moe.experts.174.w3", "model.layers.7.block_sparse_moe.experts.175.w3", "model.layers.7.block_sparse_moe.experts.176.w3", "model.layers.7.block_sparse_moe.experts.177.w3", "model.layers.7.block_sparse_moe.experts.178.w3", "model.layers.7.block_sparse_moe.experts.179.w3", "model.layers.7.block_sparse_moe.experts.180.w3", "model.layers.7.block_sparse_moe.experts.181.w3", "model.layers.7.block_sparse_moe.experts.182.w3", "model.layers.7.block_sparse_moe.experts.183.w3", "model.layers.7.block_sparse_moe.experts.184.w3", "model.layers.7.block_sparse_moe.experts.185.w3", "model.layers.7.block_sparse_moe.experts.186.w3", "model.layers.7.block_sparse_moe.experts.187.w3", "model.layers.7.block_sparse_moe.experts.188.w3", "model.layers.7.block_sparse_moe.experts.189.w3", "model.layers.7.block_sparse_moe.experts.190.w3", "model.layers.7.block_sparse_moe.experts.191.w3", "model.layers.7.block_sparse_moe.experts.192.w3", "model.layers.7.block_sparse_moe.experts.193.w3", "model.layers.7.block_sparse_moe.experts.194.w3", "model.layers.7.block_sparse_moe.experts.195.w3", "model.layers.7.block_sparse_moe.experts.196.w3", "model.layers.7.block_sparse_moe.experts.197.w3", "model.layers.7.block_sparse_moe.experts.198.w3", "model.layers.7.block_sparse_moe.experts.199.w3", "model.layers.7.block_sparse_moe.experts.200.w3", "model.layers.7.block_sparse_moe.experts.201.w3", "model.layers.7.block_sparse_moe.experts.202.w3", "model.layers.7.block_sparse_moe.experts.203.w3", "model.layers.7.block_sparse_moe.experts.204.w3", "model.layers.7.block_sparse_moe.experts.205.w3", "model.layers.7.block_sparse_moe.experts.206.w3", "model.layers.7.block_sparse_moe.experts.207.w3", "model.layers.7.block_sparse_moe.experts.208.w3", "model.layers.7.block_sparse_moe.experts.209.w3", "model.layers.7.block_sparse_moe.experts.210.w3", "model.layers.7.block_sparse_moe.experts.211.w3", "model.layers.7.block_sparse_moe.experts.212.w3", "model.layers.7.block_sparse_moe.experts.213.w3", "model.layers.7.block_sparse_moe.experts.214.w3", "model.layers.7.block_sparse_moe.experts.215.w3", "model.layers.7.block_sparse_moe.experts.216.w3", "model.layers.7.block_sparse_moe.experts.217.w3", "model.layers.7.block_sparse_moe.experts.218.w3", "model.layers.7.block_sparse_moe.experts.219.w3", "model.layers.7.block_sparse_moe.experts.220.w3", "model.layers.7.block_sparse_moe.experts.221.w3", "model.layers.7.block_sparse_moe.experts.222.w3", "model.layers.7.block_sparse_moe.experts.223.w3", "model.layers.7.block_sparse_moe.experts.224.w3", "model.layers.7.block_sparse_moe.experts.225.w3", "model.layers.7.block_sparse_moe.experts.226.w3", "model.layers.7.block_sparse_moe.experts.227.w3", "model.layers.7.block_sparse_moe.experts.228.w3", "model.layers.7.block_sparse_moe.experts.229.w3", "model.layers.7.block_sparse_moe.experts.230.w3", "model.layers.7.block_sparse_moe.experts.231.w3", "model.layers.7.block_sparse_moe.experts.232.w3", "model.layers.7.block_sparse_moe.experts.233.w3", "model.layers.7.block_sparse_moe.experts.234.w3", "model.layers.7.block_sparse_moe.experts.235.w3", "model.layers.7.block_sparse_moe.experts.236.w3", "model.layers.7.block_sparse_moe.experts.237.w3", "model.layers.7.block_sparse_moe.experts.238.w3", "model.layers.7.block_sparse_moe.experts.239.w3", "model.layers.7.block_sparse_moe.experts.240.w3", "model.layers.7.block_sparse_moe.experts.241.w3", "model.layers.7.block_sparse_moe.experts.242.w3", "model.layers.7.block_sparse_moe.experts.243.w3", "model.layers.7.block_sparse_moe.experts.244.w3", "model.layers.7.block_sparse_moe.experts.245.w3", "model.layers.7.block_sparse_moe.experts.246.w3", "model.layers.7.block_sparse_moe.experts.247.w3", "model.layers.7.block_sparse_moe.experts.248.w3", "model.layers.7.block_sparse_moe.experts.249.w3", "model.layers.7.block_sparse_moe.experts.250.w3", "model.layers.7.block_sparse_moe.experts.251.w3", "model.layers.7.block_sparse_moe.experts.252.w3", "model.layers.7.block_sparse_moe.experts.253.w3", "model.layers.7.block_sparse_moe.experts.254.w3", "model.layers.7.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.010673793032765388, "dbits": 2415919104 } ] }, { "idx": 39, "layers": [ "model.layers.7.block_sparse_moe.experts.0.w2", "model.layers.7.block_sparse_moe.experts.1.w2", "model.layers.7.block_sparse_moe.experts.2.w2", "model.layers.7.block_sparse_moe.experts.3.w2", "model.layers.7.block_sparse_moe.experts.4.w2", "model.layers.7.block_sparse_moe.experts.5.w2", "model.layers.7.block_sparse_moe.experts.6.w2", "model.layers.7.block_sparse_moe.experts.7.w2", "model.layers.7.block_sparse_moe.experts.8.w2", "model.layers.7.block_sparse_moe.experts.9.w2", "model.layers.7.block_sparse_moe.experts.10.w2", "model.layers.7.block_sparse_moe.experts.11.w2", "model.layers.7.block_sparse_moe.experts.12.w2", "model.layers.7.block_sparse_moe.experts.13.w2", "model.layers.7.block_sparse_moe.experts.14.w2", "model.layers.7.block_sparse_moe.experts.15.w2", "model.layers.7.block_sparse_moe.experts.16.w2", "model.layers.7.block_sparse_moe.experts.17.w2", "model.layers.7.block_sparse_moe.experts.18.w2", "model.layers.7.block_sparse_moe.experts.19.w2", "model.layers.7.block_sparse_moe.experts.20.w2", "model.layers.7.block_sparse_moe.experts.21.w2", "model.layers.7.block_sparse_moe.experts.22.w2", "model.layers.7.block_sparse_moe.experts.23.w2", "model.layers.7.block_sparse_moe.experts.24.w2", "model.layers.7.block_sparse_moe.experts.25.w2", "model.layers.7.block_sparse_moe.experts.26.w2", "model.layers.7.block_sparse_moe.experts.27.w2", "model.layers.7.block_sparse_moe.experts.28.w2", "model.layers.7.block_sparse_moe.experts.29.w2", "model.layers.7.block_sparse_moe.experts.30.w2", "model.layers.7.block_sparse_moe.experts.31.w2", "model.layers.7.block_sparse_moe.experts.32.w2", "model.layers.7.block_sparse_moe.experts.33.w2", "model.layers.7.block_sparse_moe.experts.34.w2", "model.layers.7.block_sparse_moe.experts.35.w2", "model.layers.7.block_sparse_moe.experts.36.w2", "model.layers.7.block_sparse_moe.experts.37.w2", "model.layers.7.block_sparse_moe.experts.38.w2", "model.layers.7.block_sparse_moe.experts.39.w2", "model.layers.7.block_sparse_moe.experts.40.w2", "model.layers.7.block_sparse_moe.experts.41.w2", "model.layers.7.block_sparse_moe.experts.42.w2", "model.layers.7.block_sparse_moe.experts.43.w2", "model.layers.7.block_sparse_moe.experts.44.w2", "model.layers.7.block_sparse_moe.experts.45.w2", "model.layers.7.block_sparse_moe.experts.46.w2", "model.layers.7.block_sparse_moe.experts.47.w2", "model.layers.7.block_sparse_moe.experts.48.w2", "model.layers.7.block_sparse_moe.experts.49.w2", "model.layers.7.block_sparse_moe.experts.50.w2", "model.layers.7.block_sparse_moe.experts.51.w2", "model.layers.7.block_sparse_moe.experts.52.w2", "model.layers.7.block_sparse_moe.experts.53.w2", "model.layers.7.block_sparse_moe.experts.54.w2", "model.layers.7.block_sparse_moe.experts.55.w2", "model.layers.7.block_sparse_moe.experts.56.w2", "model.layers.7.block_sparse_moe.experts.57.w2", "model.layers.7.block_sparse_moe.experts.58.w2", "model.layers.7.block_sparse_moe.experts.59.w2", "model.layers.7.block_sparse_moe.experts.60.w2", "model.layers.7.block_sparse_moe.experts.61.w2", "model.layers.7.block_sparse_moe.experts.62.w2", "model.layers.7.block_sparse_moe.experts.63.w2", "model.layers.7.block_sparse_moe.experts.64.w2", "model.layers.7.block_sparse_moe.experts.65.w2", "model.layers.7.block_sparse_moe.experts.66.w2", "model.layers.7.block_sparse_moe.experts.67.w2", "model.layers.7.block_sparse_moe.experts.68.w2", "model.layers.7.block_sparse_moe.experts.69.w2", "model.layers.7.block_sparse_moe.experts.70.w2", "model.layers.7.block_sparse_moe.experts.71.w2", "model.layers.7.block_sparse_moe.experts.72.w2", "model.layers.7.block_sparse_moe.experts.73.w2", "model.layers.7.block_sparse_moe.experts.74.w2", "model.layers.7.block_sparse_moe.experts.75.w2", "model.layers.7.block_sparse_moe.experts.76.w2", "model.layers.7.block_sparse_moe.experts.77.w2", "model.layers.7.block_sparse_moe.experts.78.w2", "model.layers.7.block_sparse_moe.experts.79.w2", "model.layers.7.block_sparse_moe.experts.80.w2", "model.layers.7.block_sparse_moe.experts.81.w2", "model.layers.7.block_sparse_moe.experts.82.w2", "model.layers.7.block_sparse_moe.experts.83.w2", "model.layers.7.block_sparse_moe.experts.84.w2", "model.layers.7.block_sparse_moe.experts.85.w2", "model.layers.7.block_sparse_moe.experts.86.w2", "model.layers.7.block_sparse_moe.experts.87.w2", "model.layers.7.block_sparse_moe.experts.88.w2", "model.layers.7.block_sparse_moe.experts.89.w2", "model.layers.7.block_sparse_moe.experts.90.w2", "model.layers.7.block_sparse_moe.experts.91.w2", "model.layers.7.block_sparse_moe.experts.92.w2", "model.layers.7.block_sparse_moe.experts.93.w2", "model.layers.7.block_sparse_moe.experts.94.w2", "model.layers.7.block_sparse_moe.experts.95.w2", "model.layers.7.block_sparse_moe.experts.96.w2", "model.layers.7.block_sparse_moe.experts.97.w2", "model.layers.7.block_sparse_moe.experts.98.w2", "model.layers.7.block_sparse_moe.experts.99.w2", "model.layers.7.block_sparse_moe.experts.100.w2", "model.layers.7.block_sparse_moe.experts.101.w2", "model.layers.7.block_sparse_moe.experts.102.w2", "model.layers.7.block_sparse_moe.experts.103.w2", "model.layers.7.block_sparse_moe.experts.104.w2", "model.layers.7.block_sparse_moe.experts.105.w2", "model.layers.7.block_sparse_moe.experts.106.w2", "model.layers.7.block_sparse_moe.experts.107.w2", "model.layers.7.block_sparse_moe.experts.108.w2", "model.layers.7.block_sparse_moe.experts.109.w2", "model.layers.7.block_sparse_moe.experts.110.w2", "model.layers.7.block_sparse_moe.experts.111.w2", "model.layers.7.block_sparse_moe.experts.112.w2", "model.layers.7.block_sparse_moe.experts.113.w2", "model.layers.7.block_sparse_moe.experts.114.w2", "model.layers.7.block_sparse_moe.experts.115.w2", "model.layers.7.block_sparse_moe.experts.116.w2", "model.layers.7.block_sparse_moe.experts.117.w2", "model.layers.7.block_sparse_moe.experts.118.w2", "model.layers.7.block_sparse_moe.experts.119.w2", "model.layers.7.block_sparse_moe.experts.120.w2", "model.layers.7.block_sparse_moe.experts.121.w2", "model.layers.7.block_sparse_moe.experts.122.w2", "model.layers.7.block_sparse_moe.experts.123.w2", "model.layers.7.block_sparse_moe.experts.124.w2", "model.layers.7.block_sparse_moe.experts.125.w2", "model.layers.7.block_sparse_moe.experts.126.w2", "model.layers.7.block_sparse_moe.experts.127.w2", "model.layers.7.block_sparse_moe.experts.128.w2", "model.layers.7.block_sparse_moe.experts.129.w2", "model.layers.7.block_sparse_moe.experts.130.w2", "model.layers.7.block_sparse_moe.experts.131.w2", "model.layers.7.block_sparse_moe.experts.132.w2", "model.layers.7.block_sparse_moe.experts.133.w2", "model.layers.7.block_sparse_moe.experts.134.w2", "model.layers.7.block_sparse_moe.experts.135.w2", "model.layers.7.block_sparse_moe.experts.136.w2", "model.layers.7.block_sparse_moe.experts.137.w2", "model.layers.7.block_sparse_moe.experts.138.w2", "model.layers.7.block_sparse_moe.experts.139.w2", "model.layers.7.block_sparse_moe.experts.140.w2", "model.layers.7.block_sparse_moe.experts.141.w2", "model.layers.7.block_sparse_moe.experts.142.w2", "model.layers.7.block_sparse_moe.experts.143.w2", "model.layers.7.block_sparse_moe.experts.144.w2", "model.layers.7.block_sparse_moe.experts.145.w2", "model.layers.7.block_sparse_moe.experts.146.w2", "model.layers.7.block_sparse_moe.experts.147.w2", "model.layers.7.block_sparse_moe.experts.148.w2", "model.layers.7.block_sparse_moe.experts.149.w2", "model.layers.7.block_sparse_moe.experts.150.w2", "model.layers.7.block_sparse_moe.experts.151.w2", "model.layers.7.block_sparse_moe.experts.152.w2", "model.layers.7.block_sparse_moe.experts.153.w2", "model.layers.7.block_sparse_moe.experts.154.w2", "model.layers.7.block_sparse_moe.experts.155.w2", "model.layers.7.block_sparse_moe.experts.156.w2", "model.layers.7.block_sparse_moe.experts.157.w2", "model.layers.7.block_sparse_moe.experts.158.w2", "model.layers.7.block_sparse_moe.experts.159.w2", "model.layers.7.block_sparse_moe.experts.160.w2", "model.layers.7.block_sparse_moe.experts.161.w2", "model.layers.7.block_sparse_moe.experts.162.w2", "model.layers.7.block_sparse_moe.experts.163.w2", "model.layers.7.block_sparse_moe.experts.164.w2", "model.layers.7.block_sparse_moe.experts.165.w2", "model.layers.7.block_sparse_moe.experts.166.w2", "model.layers.7.block_sparse_moe.experts.167.w2", "model.layers.7.block_sparse_moe.experts.168.w2", "model.layers.7.block_sparse_moe.experts.169.w2", "model.layers.7.block_sparse_moe.experts.170.w2", "model.layers.7.block_sparse_moe.experts.171.w2", "model.layers.7.block_sparse_moe.experts.172.w2", "model.layers.7.block_sparse_moe.experts.173.w2", "model.layers.7.block_sparse_moe.experts.174.w2", "model.layers.7.block_sparse_moe.experts.175.w2", "model.layers.7.block_sparse_moe.experts.176.w2", "model.layers.7.block_sparse_moe.experts.177.w2", "model.layers.7.block_sparse_moe.experts.178.w2", "model.layers.7.block_sparse_moe.experts.179.w2", "model.layers.7.block_sparse_moe.experts.180.w2", "model.layers.7.block_sparse_moe.experts.181.w2", "model.layers.7.block_sparse_moe.experts.182.w2", "model.layers.7.block_sparse_moe.experts.183.w2", "model.layers.7.block_sparse_moe.experts.184.w2", "model.layers.7.block_sparse_moe.experts.185.w2", "model.layers.7.block_sparse_moe.experts.186.w2", "model.layers.7.block_sparse_moe.experts.187.w2", "model.layers.7.block_sparse_moe.experts.188.w2", "model.layers.7.block_sparse_moe.experts.189.w2", "model.layers.7.block_sparse_moe.experts.190.w2", "model.layers.7.block_sparse_moe.experts.191.w2", "model.layers.7.block_sparse_moe.experts.192.w2", "model.layers.7.block_sparse_moe.experts.193.w2", "model.layers.7.block_sparse_moe.experts.194.w2", "model.layers.7.block_sparse_moe.experts.195.w2", "model.layers.7.block_sparse_moe.experts.196.w2", "model.layers.7.block_sparse_moe.experts.197.w2", "model.layers.7.block_sparse_moe.experts.198.w2", "model.layers.7.block_sparse_moe.experts.199.w2", "model.layers.7.block_sparse_moe.experts.200.w2", "model.layers.7.block_sparse_moe.experts.201.w2", "model.layers.7.block_sparse_moe.experts.202.w2", "model.layers.7.block_sparse_moe.experts.203.w2", "model.layers.7.block_sparse_moe.experts.204.w2", "model.layers.7.block_sparse_moe.experts.205.w2", "model.layers.7.block_sparse_moe.experts.206.w2", "model.layers.7.block_sparse_moe.experts.207.w2", "model.layers.7.block_sparse_moe.experts.208.w2", "model.layers.7.block_sparse_moe.experts.209.w2", "model.layers.7.block_sparse_moe.experts.210.w2", "model.layers.7.block_sparse_moe.experts.211.w2", "model.layers.7.block_sparse_moe.experts.212.w2", "model.layers.7.block_sparse_moe.experts.213.w2", "model.layers.7.block_sparse_moe.experts.214.w2", "model.layers.7.block_sparse_moe.experts.215.w2", "model.layers.7.block_sparse_moe.experts.216.w2", "model.layers.7.block_sparse_moe.experts.217.w2", "model.layers.7.block_sparse_moe.experts.218.w2", "model.layers.7.block_sparse_moe.experts.219.w2", "model.layers.7.block_sparse_moe.experts.220.w2", "model.layers.7.block_sparse_moe.experts.221.w2", "model.layers.7.block_sparse_moe.experts.222.w2", "model.layers.7.block_sparse_moe.experts.223.w2", "model.layers.7.block_sparse_moe.experts.224.w2", "model.layers.7.block_sparse_moe.experts.225.w2", "model.layers.7.block_sparse_moe.experts.226.w2", "model.layers.7.block_sparse_moe.experts.227.w2", "model.layers.7.block_sparse_moe.experts.228.w2", "model.layers.7.block_sparse_moe.experts.229.w2", "model.layers.7.block_sparse_moe.experts.230.w2", "model.layers.7.block_sparse_moe.experts.231.w2", "model.layers.7.block_sparse_moe.experts.232.w2", "model.layers.7.block_sparse_moe.experts.233.w2", "model.layers.7.block_sparse_moe.experts.234.w2", "model.layers.7.block_sparse_moe.experts.235.w2", "model.layers.7.block_sparse_moe.experts.236.w2", "model.layers.7.block_sparse_moe.experts.237.w2", "model.layers.7.block_sparse_moe.experts.238.w2", "model.layers.7.block_sparse_moe.experts.239.w2", "model.layers.7.block_sparse_moe.experts.240.w2", "model.layers.7.block_sparse_moe.experts.241.w2", "model.layers.7.block_sparse_moe.experts.242.w2", "model.layers.7.block_sparse_moe.experts.243.w2", "model.layers.7.block_sparse_moe.experts.244.w2", "model.layers.7.block_sparse_moe.experts.245.w2", "model.layers.7.block_sparse_moe.experts.246.w2", "model.layers.7.block_sparse_moe.experts.247.w2", "model.layers.7.block_sparse_moe.experts.248.w2", "model.layers.7.block_sparse_moe.experts.249.w2", "model.layers.7.block_sparse_moe.experts.250.w2", "model.layers.7.block_sparse_moe.experts.251.w2", "model.layers.7.block_sparse_moe.experts.252.w2", "model.layers.7.block_sparse_moe.experts.253.w2", "model.layers.7.block_sparse_moe.experts.254.w2", "model.layers.7.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.001611206308007207, "dbits": 1207959552 } ] }, { "idx": 40, "layers": [ "model.layers.8.self_attn.q_proj" ], "candidates": [ { "dkld": 0.00123676359653474, "dbits": 18874368 } ] }, { "idx": 41, "layers": [ "model.layers.8.self_attn.k_proj", "model.layers.8.self_attn.v_proj" ], "candidates": [ { "dkld": -0.006497730687260617, "dbits": 6291456 } ] }, { "idx": 42, "layers": [ "model.layers.8.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0020079679787158855, "dbits": 18874368 } ] }, { "idx": 43, "layers": [ "model.layers.8.block_sparse_moe.experts.0.w1", "model.layers.8.block_sparse_moe.experts.1.w1", "model.layers.8.block_sparse_moe.experts.2.w1", "model.layers.8.block_sparse_moe.experts.3.w1", "model.layers.8.block_sparse_moe.experts.4.w1", "model.layers.8.block_sparse_moe.experts.5.w1", "model.layers.8.block_sparse_moe.experts.6.w1", "model.layers.8.block_sparse_moe.experts.7.w1", "model.layers.8.block_sparse_moe.experts.8.w1", "model.layers.8.block_sparse_moe.experts.9.w1", "model.layers.8.block_sparse_moe.experts.10.w1", "model.layers.8.block_sparse_moe.experts.11.w1", "model.layers.8.block_sparse_moe.experts.12.w1", "model.layers.8.block_sparse_moe.experts.13.w1", "model.layers.8.block_sparse_moe.experts.14.w1", "model.layers.8.block_sparse_moe.experts.15.w1", "model.layers.8.block_sparse_moe.experts.16.w1", "model.layers.8.block_sparse_moe.experts.17.w1", "model.layers.8.block_sparse_moe.experts.18.w1", "model.layers.8.block_sparse_moe.experts.19.w1", "model.layers.8.block_sparse_moe.experts.20.w1", "model.layers.8.block_sparse_moe.experts.21.w1", "model.layers.8.block_sparse_moe.experts.22.w1", "model.layers.8.block_sparse_moe.experts.23.w1", "model.layers.8.block_sparse_moe.experts.24.w1", "model.layers.8.block_sparse_moe.experts.25.w1", "model.layers.8.block_sparse_moe.experts.26.w1", "model.layers.8.block_sparse_moe.experts.27.w1", "model.layers.8.block_sparse_moe.experts.28.w1", "model.layers.8.block_sparse_moe.experts.29.w1", "model.layers.8.block_sparse_moe.experts.30.w1", "model.layers.8.block_sparse_moe.experts.31.w1", "model.layers.8.block_sparse_moe.experts.32.w1", "model.layers.8.block_sparse_moe.experts.33.w1", "model.layers.8.block_sparse_moe.experts.34.w1", "model.layers.8.block_sparse_moe.experts.35.w1", "model.layers.8.block_sparse_moe.experts.36.w1", "model.layers.8.block_sparse_moe.experts.37.w1", "model.layers.8.block_sparse_moe.experts.38.w1", "model.layers.8.block_sparse_moe.experts.39.w1", "model.layers.8.block_sparse_moe.experts.40.w1", "model.layers.8.block_sparse_moe.experts.41.w1", "model.layers.8.block_sparse_moe.experts.42.w1", "model.layers.8.block_sparse_moe.experts.43.w1", "model.layers.8.block_sparse_moe.experts.44.w1", "model.layers.8.block_sparse_moe.experts.45.w1", "model.layers.8.block_sparse_moe.experts.46.w1", "model.layers.8.block_sparse_moe.experts.47.w1", "model.layers.8.block_sparse_moe.experts.48.w1", "model.layers.8.block_sparse_moe.experts.49.w1", "model.layers.8.block_sparse_moe.experts.50.w1", "model.layers.8.block_sparse_moe.experts.51.w1", "model.layers.8.block_sparse_moe.experts.52.w1", "model.layers.8.block_sparse_moe.experts.53.w1", "model.layers.8.block_sparse_moe.experts.54.w1", "model.layers.8.block_sparse_moe.experts.55.w1", "model.layers.8.block_sparse_moe.experts.56.w1", "model.layers.8.block_sparse_moe.experts.57.w1", "model.layers.8.block_sparse_moe.experts.58.w1", "model.layers.8.block_sparse_moe.experts.59.w1", "model.layers.8.block_sparse_moe.experts.60.w1", "model.layers.8.block_sparse_moe.experts.61.w1", "model.layers.8.block_sparse_moe.experts.62.w1", "model.layers.8.block_sparse_moe.experts.63.w1", "model.layers.8.block_sparse_moe.experts.64.w1", "model.layers.8.block_sparse_moe.experts.65.w1", "model.layers.8.block_sparse_moe.experts.66.w1", "model.layers.8.block_sparse_moe.experts.67.w1", "model.layers.8.block_sparse_moe.experts.68.w1", "model.layers.8.block_sparse_moe.experts.69.w1", "model.layers.8.block_sparse_moe.experts.70.w1", "model.layers.8.block_sparse_moe.experts.71.w1", "model.layers.8.block_sparse_moe.experts.72.w1", "model.layers.8.block_sparse_moe.experts.73.w1", "model.layers.8.block_sparse_moe.experts.74.w1", "model.layers.8.block_sparse_moe.experts.75.w1", "model.layers.8.block_sparse_moe.experts.76.w1", "model.layers.8.block_sparse_moe.experts.77.w1", "model.layers.8.block_sparse_moe.experts.78.w1", "model.layers.8.block_sparse_moe.experts.79.w1", "model.layers.8.block_sparse_moe.experts.80.w1", "model.layers.8.block_sparse_moe.experts.81.w1", "model.layers.8.block_sparse_moe.experts.82.w1", "model.layers.8.block_sparse_moe.experts.83.w1", "model.layers.8.block_sparse_moe.experts.84.w1", "model.layers.8.block_sparse_moe.experts.85.w1", "model.layers.8.block_sparse_moe.experts.86.w1", "model.layers.8.block_sparse_moe.experts.87.w1", "model.layers.8.block_sparse_moe.experts.88.w1", "model.layers.8.block_sparse_moe.experts.89.w1", "model.layers.8.block_sparse_moe.experts.90.w1", "model.layers.8.block_sparse_moe.experts.91.w1", "model.layers.8.block_sparse_moe.experts.92.w1", "model.layers.8.block_sparse_moe.experts.93.w1", "model.layers.8.block_sparse_moe.experts.94.w1", "model.layers.8.block_sparse_moe.experts.95.w1", "model.layers.8.block_sparse_moe.experts.96.w1", "model.layers.8.block_sparse_moe.experts.97.w1", "model.layers.8.block_sparse_moe.experts.98.w1", "model.layers.8.block_sparse_moe.experts.99.w1", "model.layers.8.block_sparse_moe.experts.100.w1", "model.layers.8.block_sparse_moe.experts.101.w1", "model.layers.8.block_sparse_moe.experts.102.w1", "model.layers.8.block_sparse_moe.experts.103.w1", "model.layers.8.block_sparse_moe.experts.104.w1", "model.layers.8.block_sparse_moe.experts.105.w1", "model.layers.8.block_sparse_moe.experts.106.w1", "model.layers.8.block_sparse_moe.experts.107.w1", "model.layers.8.block_sparse_moe.experts.108.w1", "model.layers.8.block_sparse_moe.experts.109.w1", "model.layers.8.block_sparse_moe.experts.110.w1", "model.layers.8.block_sparse_moe.experts.111.w1", "model.layers.8.block_sparse_moe.experts.112.w1", "model.layers.8.block_sparse_moe.experts.113.w1", "model.layers.8.block_sparse_moe.experts.114.w1", "model.layers.8.block_sparse_moe.experts.115.w1", "model.layers.8.block_sparse_moe.experts.116.w1", "model.layers.8.block_sparse_moe.experts.117.w1", "model.layers.8.block_sparse_moe.experts.118.w1", "model.layers.8.block_sparse_moe.experts.119.w1", "model.layers.8.block_sparse_moe.experts.120.w1", "model.layers.8.block_sparse_moe.experts.121.w1", "model.layers.8.block_sparse_moe.experts.122.w1", "model.layers.8.block_sparse_moe.experts.123.w1", "model.layers.8.block_sparse_moe.experts.124.w1", "model.layers.8.block_sparse_moe.experts.125.w1", "model.layers.8.block_sparse_moe.experts.126.w1", "model.layers.8.block_sparse_moe.experts.127.w1", "model.layers.8.block_sparse_moe.experts.128.w1", "model.layers.8.block_sparse_moe.experts.129.w1", "model.layers.8.block_sparse_moe.experts.130.w1", "model.layers.8.block_sparse_moe.experts.131.w1", "model.layers.8.block_sparse_moe.experts.132.w1", "model.layers.8.block_sparse_moe.experts.133.w1", "model.layers.8.block_sparse_moe.experts.134.w1", "model.layers.8.block_sparse_moe.experts.135.w1", "model.layers.8.block_sparse_moe.experts.136.w1", "model.layers.8.block_sparse_moe.experts.137.w1", "model.layers.8.block_sparse_moe.experts.138.w1", "model.layers.8.block_sparse_moe.experts.139.w1", "model.layers.8.block_sparse_moe.experts.140.w1", "model.layers.8.block_sparse_moe.experts.141.w1", "model.layers.8.block_sparse_moe.experts.142.w1", "model.layers.8.block_sparse_moe.experts.143.w1", "model.layers.8.block_sparse_moe.experts.144.w1", "model.layers.8.block_sparse_moe.experts.145.w1", "model.layers.8.block_sparse_moe.experts.146.w1", "model.layers.8.block_sparse_moe.experts.147.w1", "model.layers.8.block_sparse_moe.experts.148.w1", "model.layers.8.block_sparse_moe.experts.149.w1", "model.layers.8.block_sparse_moe.experts.150.w1", "model.layers.8.block_sparse_moe.experts.151.w1", "model.layers.8.block_sparse_moe.experts.152.w1", "model.layers.8.block_sparse_moe.experts.153.w1", "model.layers.8.block_sparse_moe.experts.154.w1", "model.layers.8.block_sparse_moe.experts.155.w1", "model.layers.8.block_sparse_moe.experts.156.w1", "model.layers.8.block_sparse_moe.experts.157.w1", "model.layers.8.block_sparse_moe.experts.158.w1", "model.layers.8.block_sparse_moe.experts.159.w1", "model.layers.8.block_sparse_moe.experts.160.w1", "model.layers.8.block_sparse_moe.experts.161.w1", "model.layers.8.block_sparse_moe.experts.162.w1", "model.layers.8.block_sparse_moe.experts.163.w1", "model.layers.8.block_sparse_moe.experts.164.w1", "model.layers.8.block_sparse_moe.experts.165.w1", "model.layers.8.block_sparse_moe.experts.166.w1", "model.layers.8.block_sparse_moe.experts.167.w1", "model.layers.8.block_sparse_moe.experts.168.w1", "model.layers.8.block_sparse_moe.experts.169.w1", "model.layers.8.block_sparse_moe.experts.170.w1", "model.layers.8.block_sparse_moe.experts.171.w1", "model.layers.8.block_sparse_moe.experts.172.w1", "model.layers.8.block_sparse_moe.experts.173.w1", "model.layers.8.block_sparse_moe.experts.174.w1", "model.layers.8.block_sparse_moe.experts.175.w1", "model.layers.8.block_sparse_moe.experts.176.w1", "model.layers.8.block_sparse_moe.experts.177.w1", "model.layers.8.block_sparse_moe.experts.178.w1", "model.layers.8.block_sparse_moe.experts.179.w1", "model.layers.8.block_sparse_moe.experts.180.w1", "model.layers.8.block_sparse_moe.experts.181.w1", "model.layers.8.block_sparse_moe.experts.182.w1", "model.layers.8.block_sparse_moe.experts.183.w1", "model.layers.8.block_sparse_moe.experts.184.w1", "model.layers.8.block_sparse_moe.experts.185.w1", "model.layers.8.block_sparse_moe.experts.186.w1", "model.layers.8.block_sparse_moe.experts.187.w1", "model.layers.8.block_sparse_moe.experts.188.w1", "model.layers.8.block_sparse_moe.experts.189.w1", "model.layers.8.block_sparse_moe.experts.190.w1", "model.layers.8.block_sparse_moe.experts.191.w1", "model.layers.8.block_sparse_moe.experts.192.w1", "model.layers.8.block_sparse_moe.experts.193.w1", "model.layers.8.block_sparse_moe.experts.194.w1", "model.layers.8.block_sparse_moe.experts.195.w1", "model.layers.8.block_sparse_moe.experts.196.w1", "model.layers.8.block_sparse_moe.experts.197.w1", "model.layers.8.block_sparse_moe.experts.198.w1", "model.layers.8.block_sparse_moe.experts.199.w1", "model.layers.8.block_sparse_moe.experts.200.w1", "model.layers.8.block_sparse_moe.experts.201.w1", "model.layers.8.block_sparse_moe.experts.202.w1", "model.layers.8.block_sparse_moe.experts.203.w1", "model.layers.8.block_sparse_moe.experts.204.w1", "model.layers.8.block_sparse_moe.experts.205.w1", "model.layers.8.block_sparse_moe.experts.206.w1", "model.layers.8.block_sparse_moe.experts.207.w1", "model.layers.8.block_sparse_moe.experts.208.w1", "model.layers.8.block_sparse_moe.experts.209.w1", "model.layers.8.block_sparse_moe.experts.210.w1", "model.layers.8.block_sparse_moe.experts.211.w1", "model.layers.8.block_sparse_moe.experts.212.w1", "model.layers.8.block_sparse_moe.experts.213.w1", "model.layers.8.block_sparse_moe.experts.214.w1", "model.layers.8.block_sparse_moe.experts.215.w1", "model.layers.8.block_sparse_moe.experts.216.w1", "model.layers.8.block_sparse_moe.experts.217.w1", "model.layers.8.block_sparse_moe.experts.218.w1", "model.layers.8.block_sparse_moe.experts.219.w1", "model.layers.8.block_sparse_moe.experts.220.w1", "model.layers.8.block_sparse_moe.experts.221.w1", "model.layers.8.block_sparse_moe.experts.222.w1", "model.layers.8.block_sparse_moe.experts.223.w1", "model.layers.8.block_sparse_moe.experts.224.w1", "model.layers.8.block_sparse_moe.experts.225.w1", "model.layers.8.block_sparse_moe.experts.226.w1", "model.layers.8.block_sparse_moe.experts.227.w1", "model.layers.8.block_sparse_moe.experts.228.w1", "model.layers.8.block_sparse_moe.experts.229.w1", "model.layers.8.block_sparse_moe.experts.230.w1", "model.layers.8.block_sparse_moe.experts.231.w1", "model.layers.8.block_sparse_moe.experts.232.w1", "model.layers.8.block_sparse_moe.experts.233.w1", "model.layers.8.block_sparse_moe.experts.234.w1", "model.layers.8.block_sparse_moe.experts.235.w1", "model.layers.8.block_sparse_moe.experts.236.w1", "model.layers.8.block_sparse_moe.experts.237.w1", "model.layers.8.block_sparse_moe.experts.238.w1", "model.layers.8.block_sparse_moe.experts.239.w1", "model.layers.8.block_sparse_moe.experts.240.w1", "model.layers.8.block_sparse_moe.experts.241.w1", "model.layers.8.block_sparse_moe.experts.242.w1", "model.layers.8.block_sparse_moe.experts.243.w1", "model.layers.8.block_sparse_moe.experts.244.w1", "model.layers.8.block_sparse_moe.experts.245.w1", "model.layers.8.block_sparse_moe.experts.246.w1", "model.layers.8.block_sparse_moe.experts.247.w1", "model.layers.8.block_sparse_moe.experts.248.w1", "model.layers.8.block_sparse_moe.experts.249.w1", "model.layers.8.block_sparse_moe.experts.250.w1", "model.layers.8.block_sparse_moe.experts.251.w1", "model.layers.8.block_sparse_moe.experts.252.w1", "model.layers.8.block_sparse_moe.experts.253.w1", "model.layers.8.block_sparse_moe.experts.254.w1", "model.layers.8.block_sparse_moe.experts.255.w1", "model.layers.8.block_sparse_moe.experts.0.w3", "model.layers.8.block_sparse_moe.experts.1.w3", "model.layers.8.block_sparse_moe.experts.2.w3", "model.layers.8.block_sparse_moe.experts.3.w3", "model.layers.8.block_sparse_moe.experts.4.w3", "model.layers.8.block_sparse_moe.experts.5.w3", "model.layers.8.block_sparse_moe.experts.6.w3", "model.layers.8.block_sparse_moe.experts.7.w3", "model.layers.8.block_sparse_moe.experts.8.w3", "model.layers.8.block_sparse_moe.experts.9.w3", "model.layers.8.block_sparse_moe.experts.10.w3", "model.layers.8.block_sparse_moe.experts.11.w3", "model.layers.8.block_sparse_moe.experts.12.w3", "model.layers.8.block_sparse_moe.experts.13.w3", "model.layers.8.block_sparse_moe.experts.14.w3", "model.layers.8.block_sparse_moe.experts.15.w3", "model.layers.8.block_sparse_moe.experts.16.w3", "model.layers.8.block_sparse_moe.experts.17.w3", "model.layers.8.block_sparse_moe.experts.18.w3", "model.layers.8.block_sparse_moe.experts.19.w3", "model.layers.8.block_sparse_moe.experts.20.w3", "model.layers.8.block_sparse_moe.experts.21.w3", "model.layers.8.block_sparse_moe.experts.22.w3", "model.layers.8.block_sparse_moe.experts.23.w3", "model.layers.8.block_sparse_moe.experts.24.w3", "model.layers.8.block_sparse_moe.experts.25.w3", "model.layers.8.block_sparse_moe.experts.26.w3", "model.layers.8.block_sparse_moe.experts.27.w3", "model.layers.8.block_sparse_moe.experts.28.w3", "model.layers.8.block_sparse_moe.experts.29.w3", "model.layers.8.block_sparse_moe.experts.30.w3", "model.layers.8.block_sparse_moe.experts.31.w3", "model.layers.8.block_sparse_moe.experts.32.w3", "model.layers.8.block_sparse_moe.experts.33.w3", "model.layers.8.block_sparse_moe.experts.34.w3", "model.layers.8.block_sparse_moe.experts.35.w3", "model.layers.8.block_sparse_moe.experts.36.w3", "model.layers.8.block_sparse_moe.experts.37.w3", "model.layers.8.block_sparse_moe.experts.38.w3", "model.layers.8.block_sparse_moe.experts.39.w3", "model.layers.8.block_sparse_moe.experts.40.w3", "model.layers.8.block_sparse_moe.experts.41.w3", "model.layers.8.block_sparse_moe.experts.42.w3", "model.layers.8.block_sparse_moe.experts.43.w3", "model.layers.8.block_sparse_moe.experts.44.w3", "model.layers.8.block_sparse_moe.experts.45.w3", "model.layers.8.block_sparse_moe.experts.46.w3", "model.layers.8.block_sparse_moe.experts.47.w3", "model.layers.8.block_sparse_moe.experts.48.w3", "model.layers.8.block_sparse_moe.experts.49.w3", "model.layers.8.block_sparse_moe.experts.50.w3", "model.layers.8.block_sparse_moe.experts.51.w3", "model.layers.8.block_sparse_moe.experts.52.w3", "model.layers.8.block_sparse_moe.experts.53.w3", "model.layers.8.block_sparse_moe.experts.54.w3", "model.layers.8.block_sparse_moe.experts.55.w3", "model.layers.8.block_sparse_moe.experts.56.w3", "model.layers.8.block_sparse_moe.experts.57.w3", "model.layers.8.block_sparse_moe.experts.58.w3", "model.layers.8.block_sparse_moe.experts.59.w3", "model.layers.8.block_sparse_moe.experts.60.w3", "model.layers.8.block_sparse_moe.experts.61.w3", "model.layers.8.block_sparse_moe.experts.62.w3", "model.layers.8.block_sparse_moe.experts.63.w3", "model.layers.8.block_sparse_moe.experts.64.w3", "model.layers.8.block_sparse_moe.experts.65.w3", "model.layers.8.block_sparse_moe.experts.66.w3", "model.layers.8.block_sparse_moe.experts.67.w3", "model.layers.8.block_sparse_moe.experts.68.w3", "model.layers.8.block_sparse_moe.experts.69.w3", "model.layers.8.block_sparse_moe.experts.70.w3", "model.layers.8.block_sparse_moe.experts.71.w3", "model.layers.8.block_sparse_moe.experts.72.w3", "model.layers.8.block_sparse_moe.experts.73.w3", "model.layers.8.block_sparse_moe.experts.74.w3", "model.layers.8.block_sparse_moe.experts.75.w3", "model.layers.8.block_sparse_moe.experts.76.w3", "model.layers.8.block_sparse_moe.experts.77.w3", "model.layers.8.block_sparse_moe.experts.78.w3", "model.layers.8.block_sparse_moe.experts.79.w3", "model.layers.8.block_sparse_moe.experts.80.w3", "model.layers.8.block_sparse_moe.experts.81.w3", "model.layers.8.block_sparse_moe.experts.82.w3", "model.layers.8.block_sparse_moe.experts.83.w3", "model.layers.8.block_sparse_moe.experts.84.w3", "model.layers.8.block_sparse_moe.experts.85.w3", "model.layers.8.block_sparse_moe.experts.86.w3", "model.layers.8.block_sparse_moe.experts.87.w3", "model.layers.8.block_sparse_moe.experts.88.w3", "model.layers.8.block_sparse_moe.experts.89.w3", "model.layers.8.block_sparse_moe.experts.90.w3", "model.layers.8.block_sparse_moe.experts.91.w3", "model.layers.8.block_sparse_moe.experts.92.w3", "model.layers.8.block_sparse_moe.experts.93.w3", "model.layers.8.block_sparse_moe.experts.94.w3", "model.layers.8.block_sparse_moe.experts.95.w3", "model.layers.8.block_sparse_moe.experts.96.w3", "model.layers.8.block_sparse_moe.experts.97.w3", "model.layers.8.block_sparse_moe.experts.98.w3", "model.layers.8.block_sparse_moe.experts.99.w3", "model.layers.8.block_sparse_moe.experts.100.w3", "model.layers.8.block_sparse_moe.experts.101.w3", "model.layers.8.block_sparse_moe.experts.102.w3", "model.layers.8.block_sparse_moe.experts.103.w3", "model.layers.8.block_sparse_moe.experts.104.w3", "model.layers.8.block_sparse_moe.experts.105.w3", "model.layers.8.block_sparse_moe.experts.106.w3", "model.layers.8.block_sparse_moe.experts.107.w3", "model.layers.8.block_sparse_moe.experts.108.w3", "model.layers.8.block_sparse_moe.experts.109.w3", "model.layers.8.block_sparse_moe.experts.110.w3", "model.layers.8.block_sparse_moe.experts.111.w3", "model.layers.8.block_sparse_moe.experts.112.w3", "model.layers.8.block_sparse_moe.experts.113.w3", "model.layers.8.block_sparse_moe.experts.114.w3", "model.layers.8.block_sparse_moe.experts.115.w3", "model.layers.8.block_sparse_moe.experts.116.w3", "model.layers.8.block_sparse_moe.experts.117.w3", "model.layers.8.block_sparse_moe.experts.118.w3", "model.layers.8.block_sparse_moe.experts.119.w3", "model.layers.8.block_sparse_moe.experts.120.w3", "model.layers.8.block_sparse_moe.experts.121.w3", "model.layers.8.block_sparse_moe.experts.122.w3", "model.layers.8.block_sparse_moe.experts.123.w3", "model.layers.8.block_sparse_moe.experts.124.w3", "model.layers.8.block_sparse_moe.experts.125.w3", "model.layers.8.block_sparse_moe.experts.126.w3", "model.layers.8.block_sparse_moe.experts.127.w3", "model.layers.8.block_sparse_moe.experts.128.w3", "model.layers.8.block_sparse_moe.experts.129.w3", "model.layers.8.block_sparse_moe.experts.130.w3", "model.layers.8.block_sparse_moe.experts.131.w3", "model.layers.8.block_sparse_moe.experts.132.w3", "model.layers.8.block_sparse_moe.experts.133.w3", "model.layers.8.block_sparse_moe.experts.134.w3", "model.layers.8.block_sparse_moe.experts.135.w3", "model.layers.8.block_sparse_moe.experts.136.w3", "model.layers.8.block_sparse_moe.experts.137.w3", "model.layers.8.block_sparse_moe.experts.138.w3", "model.layers.8.block_sparse_moe.experts.139.w3", "model.layers.8.block_sparse_moe.experts.140.w3", "model.layers.8.block_sparse_moe.experts.141.w3", "model.layers.8.block_sparse_moe.experts.142.w3", "model.layers.8.block_sparse_moe.experts.143.w3", "model.layers.8.block_sparse_moe.experts.144.w3", "model.layers.8.block_sparse_moe.experts.145.w3", "model.layers.8.block_sparse_moe.experts.146.w3", "model.layers.8.block_sparse_moe.experts.147.w3", "model.layers.8.block_sparse_moe.experts.148.w3", "model.layers.8.block_sparse_moe.experts.149.w3", "model.layers.8.block_sparse_moe.experts.150.w3", "model.layers.8.block_sparse_moe.experts.151.w3", "model.layers.8.block_sparse_moe.experts.152.w3", "model.layers.8.block_sparse_moe.experts.153.w3", "model.layers.8.block_sparse_moe.experts.154.w3", "model.layers.8.block_sparse_moe.experts.155.w3", "model.layers.8.block_sparse_moe.experts.156.w3", "model.layers.8.block_sparse_moe.experts.157.w3", "model.layers.8.block_sparse_moe.experts.158.w3", "model.layers.8.block_sparse_moe.experts.159.w3", "model.layers.8.block_sparse_moe.experts.160.w3", "model.layers.8.block_sparse_moe.experts.161.w3", "model.layers.8.block_sparse_moe.experts.162.w3", "model.layers.8.block_sparse_moe.experts.163.w3", "model.layers.8.block_sparse_moe.experts.164.w3", "model.layers.8.block_sparse_moe.experts.165.w3", "model.layers.8.block_sparse_moe.experts.166.w3", "model.layers.8.block_sparse_moe.experts.167.w3", "model.layers.8.block_sparse_moe.experts.168.w3", "model.layers.8.block_sparse_moe.experts.169.w3", "model.layers.8.block_sparse_moe.experts.170.w3", "model.layers.8.block_sparse_moe.experts.171.w3", "model.layers.8.block_sparse_moe.experts.172.w3", "model.layers.8.block_sparse_moe.experts.173.w3", "model.layers.8.block_sparse_moe.experts.174.w3", "model.layers.8.block_sparse_moe.experts.175.w3", "model.layers.8.block_sparse_moe.experts.176.w3", "model.layers.8.block_sparse_moe.experts.177.w3", "model.layers.8.block_sparse_moe.experts.178.w3", "model.layers.8.block_sparse_moe.experts.179.w3", "model.layers.8.block_sparse_moe.experts.180.w3", "model.layers.8.block_sparse_moe.experts.181.w3", "model.layers.8.block_sparse_moe.experts.182.w3", "model.layers.8.block_sparse_moe.experts.183.w3", "model.layers.8.block_sparse_moe.experts.184.w3", "model.layers.8.block_sparse_moe.experts.185.w3", "model.layers.8.block_sparse_moe.experts.186.w3", "model.layers.8.block_sparse_moe.experts.187.w3", "model.layers.8.block_sparse_moe.experts.188.w3", "model.layers.8.block_sparse_moe.experts.189.w3", "model.layers.8.block_sparse_moe.experts.190.w3", "model.layers.8.block_sparse_moe.experts.191.w3", "model.layers.8.block_sparse_moe.experts.192.w3", "model.layers.8.block_sparse_moe.experts.193.w3", "model.layers.8.block_sparse_moe.experts.194.w3", "model.layers.8.block_sparse_moe.experts.195.w3", "model.layers.8.block_sparse_moe.experts.196.w3", "model.layers.8.block_sparse_moe.experts.197.w3", "model.layers.8.block_sparse_moe.experts.198.w3", "model.layers.8.block_sparse_moe.experts.199.w3", "model.layers.8.block_sparse_moe.experts.200.w3", "model.layers.8.block_sparse_moe.experts.201.w3", "model.layers.8.block_sparse_moe.experts.202.w3", "model.layers.8.block_sparse_moe.experts.203.w3", "model.layers.8.block_sparse_moe.experts.204.w3", "model.layers.8.block_sparse_moe.experts.205.w3", "model.layers.8.block_sparse_moe.experts.206.w3", "model.layers.8.block_sparse_moe.experts.207.w3", "model.layers.8.block_sparse_moe.experts.208.w3", "model.layers.8.block_sparse_moe.experts.209.w3", "model.layers.8.block_sparse_moe.experts.210.w3", "model.layers.8.block_sparse_moe.experts.211.w3", "model.layers.8.block_sparse_moe.experts.212.w3", "model.layers.8.block_sparse_moe.experts.213.w3", "model.layers.8.block_sparse_moe.experts.214.w3", "model.layers.8.block_sparse_moe.experts.215.w3", "model.layers.8.block_sparse_moe.experts.216.w3", "model.layers.8.block_sparse_moe.experts.217.w3", "model.layers.8.block_sparse_moe.experts.218.w3", "model.layers.8.block_sparse_moe.experts.219.w3", "model.layers.8.block_sparse_moe.experts.220.w3", "model.layers.8.block_sparse_moe.experts.221.w3", "model.layers.8.block_sparse_moe.experts.222.w3", "model.layers.8.block_sparse_moe.experts.223.w3", "model.layers.8.block_sparse_moe.experts.224.w3", "model.layers.8.block_sparse_moe.experts.225.w3", "model.layers.8.block_sparse_moe.experts.226.w3", "model.layers.8.block_sparse_moe.experts.227.w3", "model.layers.8.block_sparse_moe.experts.228.w3", "model.layers.8.block_sparse_moe.experts.229.w3", "model.layers.8.block_sparse_moe.experts.230.w3", "model.layers.8.block_sparse_moe.experts.231.w3", "model.layers.8.block_sparse_moe.experts.232.w3", "model.layers.8.block_sparse_moe.experts.233.w3", "model.layers.8.block_sparse_moe.experts.234.w3", "model.layers.8.block_sparse_moe.experts.235.w3", "model.layers.8.block_sparse_moe.experts.236.w3", "model.layers.8.block_sparse_moe.experts.237.w3", "model.layers.8.block_sparse_moe.experts.238.w3", "model.layers.8.block_sparse_moe.experts.239.w3", "model.layers.8.block_sparse_moe.experts.240.w3", "model.layers.8.block_sparse_moe.experts.241.w3", "model.layers.8.block_sparse_moe.experts.242.w3", "model.layers.8.block_sparse_moe.experts.243.w3", "model.layers.8.block_sparse_moe.experts.244.w3", "model.layers.8.block_sparse_moe.experts.245.w3", "model.layers.8.block_sparse_moe.experts.246.w3", "model.layers.8.block_sparse_moe.experts.247.w3", "model.layers.8.block_sparse_moe.experts.248.w3", "model.layers.8.block_sparse_moe.experts.249.w3", "model.layers.8.block_sparse_moe.experts.250.w3", "model.layers.8.block_sparse_moe.experts.251.w3", "model.layers.8.block_sparse_moe.experts.252.w3", "model.layers.8.block_sparse_moe.experts.253.w3", "model.layers.8.block_sparse_moe.experts.254.w3", "model.layers.8.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00013701021671297386, "dbits": 2415919104 } ] }, { "idx": 44, "layers": [ "model.layers.8.block_sparse_moe.experts.0.w2", "model.layers.8.block_sparse_moe.experts.1.w2", "model.layers.8.block_sparse_moe.experts.2.w2", "model.layers.8.block_sparse_moe.experts.3.w2", "model.layers.8.block_sparse_moe.experts.4.w2", "model.layers.8.block_sparse_moe.experts.5.w2", "model.layers.8.block_sparse_moe.experts.6.w2", "model.layers.8.block_sparse_moe.experts.7.w2", "model.layers.8.block_sparse_moe.experts.8.w2", "model.layers.8.block_sparse_moe.experts.9.w2", "model.layers.8.block_sparse_moe.experts.10.w2", "model.layers.8.block_sparse_moe.experts.11.w2", "model.layers.8.block_sparse_moe.experts.12.w2", "model.layers.8.block_sparse_moe.experts.13.w2", "model.layers.8.block_sparse_moe.experts.14.w2", "model.layers.8.block_sparse_moe.experts.15.w2", "model.layers.8.block_sparse_moe.experts.16.w2", "model.layers.8.block_sparse_moe.experts.17.w2", "model.layers.8.block_sparse_moe.experts.18.w2", "model.layers.8.block_sparse_moe.experts.19.w2", "model.layers.8.block_sparse_moe.experts.20.w2", "model.layers.8.block_sparse_moe.experts.21.w2", "model.layers.8.block_sparse_moe.experts.22.w2", "model.layers.8.block_sparse_moe.experts.23.w2", "model.layers.8.block_sparse_moe.experts.24.w2", "model.layers.8.block_sparse_moe.experts.25.w2", "model.layers.8.block_sparse_moe.experts.26.w2", "model.layers.8.block_sparse_moe.experts.27.w2", "model.layers.8.block_sparse_moe.experts.28.w2", "model.layers.8.block_sparse_moe.experts.29.w2", "model.layers.8.block_sparse_moe.experts.30.w2", "model.layers.8.block_sparse_moe.experts.31.w2", "model.layers.8.block_sparse_moe.experts.32.w2", "model.layers.8.block_sparse_moe.experts.33.w2", "model.layers.8.block_sparse_moe.experts.34.w2", "model.layers.8.block_sparse_moe.experts.35.w2", "model.layers.8.block_sparse_moe.experts.36.w2", "model.layers.8.block_sparse_moe.experts.37.w2", "model.layers.8.block_sparse_moe.experts.38.w2", "model.layers.8.block_sparse_moe.experts.39.w2", "model.layers.8.block_sparse_moe.experts.40.w2", "model.layers.8.block_sparse_moe.experts.41.w2", "model.layers.8.block_sparse_moe.experts.42.w2", "model.layers.8.block_sparse_moe.experts.43.w2", "model.layers.8.block_sparse_moe.experts.44.w2", "model.layers.8.block_sparse_moe.experts.45.w2", "model.layers.8.block_sparse_moe.experts.46.w2", "model.layers.8.block_sparse_moe.experts.47.w2", "model.layers.8.block_sparse_moe.experts.48.w2", "model.layers.8.block_sparse_moe.experts.49.w2", "model.layers.8.block_sparse_moe.experts.50.w2", "model.layers.8.block_sparse_moe.experts.51.w2", "model.layers.8.block_sparse_moe.experts.52.w2", "model.layers.8.block_sparse_moe.experts.53.w2", "model.layers.8.block_sparse_moe.experts.54.w2", "model.layers.8.block_sparse_moe.experts.55.w2", "model.layers.8.block_sparse_moe.experts.56.w2", "model.layers.8.block_sparse_moe.experts.57.w2", "model.layers.8.block_sparse_moe.experts.58.w2", "model.layers.8.block_sparse_moe.experts.59.w2", "model.layers.8.block_sparse_moe.experts.60.w2", "model.layers.8.block_sparse_moe.experts.61.w2", "model.layers.8.block_sparse_moe.experts.62.w2", "model.layers.8.block_sparse_moe.experts.63.w2", "model.layers.8.block_sparse_moe.experts.64.w2", "model.layers.8.block_sparse_moe.experts.65.w2", "model.layers.8.block_sparse_moe.experts.66.w2", "model.layers.8.block_sparse_moe.experts.67.w2", "model.layers.8.block_sparse_moe.experts.68.w2", "model.layers.8.block_sparse_moe.experts.69.w2", "model.layers.8.block_sparse_moe.experts.70.w2", "model.layers.8.block_sparse_moe.experts.71.w2", "model.layers.8.block_sparse_moe.experts.72.w2", "model.layers.8.block_sparse_moe.experts.73.w2", "model.layers.8.block_sparse_moe.experts.74.w2", "model.layers.8.block_sparse_moe.experts.75.w2", "model.layers.8.block_sparse_moe.experts.76.w2", "model.layers.8.block_sparse_moe.experts.77.w2", "model.layers.8.block_sparse_moe.experts.78.w2", "model.layers.8.block_sparse_moe.experts.79.w2", "model.layers.8.block_sparse_moe.experts.80.w2", "model.layers.8.block_sparse_moe.experts.81.w2", "model.layers.8.block_sparse_moe.experts.82.w2", "model.layers.8.block_sparse_moe.experts.83.w2", "model.layers.8.block_sparse_moe.experts.84.w2", "model.layers.8.block_sparse_moe.experts.85.w2", "model.layers.8.block_sparse_moe.experts.86.w2", "model.layers.8.block_sparse_moe.experts.87.w2", "model.layers.8.block_sparse_moe.experts.88.w2", "model.layers.8.block_sparse_moe.experts.89.w2", "model.layers.8.block_sparse_moe.experts.90.w2", "model.layers.8.block_sparse_moe.experts.91.w2", "model.layers.8.block_sparse_moe.experts.92.w2", "model.layers.8.block_sparse_moe.experts.93.w2", "model.layers.8.block_sparse_moe.experts.94.w2", "model.layers.8.block_sparse_moe.experts.95.w2", "model.layers.8.block_sparse_moe.experts.96.w2", "model.layers.8.block_sparse_moe.experts.97.w2", "model.layers.8.block_sparse_moe.experts.98.w2", "model.layers.8.block_sparse_moe.experts.99.w2", "model.layers.8.block_sparse_moe.experts.100.w2", "model.layers.8.block_sparse_moe.experts.101.w2", "model.layers.8.block_sparse_moe.experts.102.w2", "model.layers.8.block_sparse_moe.experts.103.w2", "model.layers.8.block_sparse_moe.experts.104.w2", "model.layers.8.block_sparse_moe.experts.105.w2", "model.layers.8.block_sparse_moe.experts.106.w2", "model.layers.8.block_sparse_moe.experts.107.w2", "model.layers.8.block_sparse_moe.experts.108.w2", "model.layers.8.block_sparse_moe.experts.109.w2", "model.layers.8.block_sparse_moe.experts.110.w2", "model.layers.8.block_sparse_moe.experts.111.w2", "model.layers.8.block_sparse_moe.experts.112.w2", "model.layers.8.block_sparse_moe.experts.113.w2", "model.layers.8.block_sparse_moe.experts.114.w2", "model.layers.8.block_sparse_moe.experts.115.w2", "model.layers.8.block_sparse_moe.experts.116.w2", "model.layers.8.block_sparse_moe.experts.117.w2", "model.layers.8.block_sparse_moe.experts.118.w2", "model.layers.8.block_sparse_moe.experts.119.w2", "model.layers.8.block_sparse_moe.experts.120.w2", "model.layers.8.block_sparse_moe.experts.121.w2", "model.layers.8.block_sparse_moe.experts.122.w2", "model.layers.8.block_sparse_moe.experts.123.w2", "model.layers.8.block_sparse_moe.experts.124.w2", "model.layers.8.block_sparse_moe.experts.125.w2", "model.layers.8.block_sparse_moe.experts.126.w2", "model.layers.8.block_sparse_moe.experts.127.w2", "model.layers.8.block_sparse_moe.experts.128.w2", "model.layers.8.block_sparse_moe.experts.129.w2", "model.layers.8.block_sparse_moe.experts.130.w2", "model.layers.8.block_sparse_moe.experts.131.w2", "model.layers.8.block_sparse_moe.experts.132.w2", "model.layers.8.block_sparse_moe.experts.133.w2", "model.layers.8.block_sparse_moe.experts.134.w2", "model.layers.8.block_sparse_moe.experts.135.w2", "model.layers.8.block_sparse_moe.experts.136.w2", "model.layers.8.block_sparse_moe.experts.137.w2", "model.layers.8.block_sparse_moe.experts.138.w2", "model.layers.8.block_sparse_moe.experts.139.w2", "model.layers.8.block_sparse_moe.experts.140.w2", "model.layers.8.block_sparse_moe.experts.141.w2", "model.layers.8.block_sparse_moe.experts.142.w2", "model.layers.8.block_sparse_moe.experts.143.w2", "model.layers.8.block_sparse_moe.experts.144.w2", "model.layers.8.block_sparse_moe.experts.145.w2", "model.layers.8.block_sparse_moe.experts.146.w2", "model.layers.8.block_sparse_moe.experts.147.w2", "model.layers.8.block_sparse_moe.experts.148.w2", "model.layers.8.block_sparse_moe.experts.149.w2", "model.layers.8.block_sparse_moe.experts.150.w2", "model.layers.8.block_sparse_moe.experts.151.w2", "model.layers.8.block_sparse_moe.experts.152.w2", "model.layers.8.block_sparse_moe.experts.153.w2", "model.layers.8.block_sparse_moe.experts.154.w2", "model.layers.8.block_sparse_moe.experts.155.w2", "model.layers.8.block_sparse_moe.experts.156.w2", "model.layers.8.block_sparse_moe.experts.157.w2", "model.layers.8.block_sparse_moe.experts.158.w2", "model.layers.8.block_sparse_moe.experts.159.w2", "model.layers.8.block_sparse_moe.experts.160.w2", "model.layers.8.block_sparse_moe.experts.161.w2", "model.layers.8.block_sparse_moe.experts.162.w2", "model.layers.8.block_sparse_moe.experts.163.w2", "model.layers.8.block_sparse_moe.experts.164.w2", "model.layers.8.block_sparse_moe.experts.165.w2", "model.layers.8.block_sparse_moe.experts.166.w2", "model.layers.8.block_sparse_moe.experts.167.w2", "model.layers.8.block_sparse_moe.experts.168.w2", "model.layers.8.block_sparse_moe.experts.169.w2", "model.layers.8.block_sparse_moe.experts.170.w2", "model.layers.8.block_sparse_moe.experts.171.w2", "model.layers.8.block_sparse_moe.experts.172.w2", "model.layers.8.block_sparse_moe.experts.173.w2", "model.layers.8.block_sparse_moe.experts.174.w2", "model.layers.8.block_sparse_moe.experts.175.w2", "model.layers.8.block_sparse_moe.experts.176.w2", "model.layers.8.block_sparse_moe.experts.177.w2", "model.layers.8.block_sparse_moe.experts.178.w2", "model.layers.8.block_sparse_moe.experts.179.w2", "model.layers.8.block_sparse_moe.experts.180.w2", "model.layers.8.block_sparse_moe.experts.181.w2", "model.layers.8.block_sparse_moe.experts.182.w2", "model.layers.8.block_sparse_moe.experts.183.w2", "model.layers.8.block_sparse_moe.experts.184.w2", "model.layers.8.block_sparse_moe.experts.185.w2", "model.layers.8.block_sparse_moe.experts.186.w2", "model.layers.8.block_sparse_moe.experts.187.w2", "model.layers.8.block_sparse_moe.experts.188.w2", "model.layers.8.block_sparse_moe.experts.189.w2", "model.layers.8.block_sparse_moe.experts.190.w2", "model.layers.8.block_sparse_moe.experts.191.w2", "model.layers.8.block_sparse_moe.experts.192.w2", "model.layers.8.block_sparse_moe.experts.193.w2", "model.layers.8.block_sparse_moe.experts.194.w2", "model.layers.8.block_sparse_moe.experts.195.w2", "model.layers.8.block_sparse_moe.experts.196.w2", "model.layers.8.block_sparse_moe.experts.197.w2", "model.layers.8.block_sparse_moe.experts.198.w2", "model.layers.8.block_sparse_moe.experts.199.w2", "model.layers.8.block_sparse_moe.experts.200.w2", "model.layers.8.block_sparse_moe.experts.201.w2", "model.layers.8.block_sparse_moe.experts.202.w2", "model.layers.8.block_sparse_moe.experts.203.w2", "model.layers.8.block_sparse_moe.experts.204.w2", "model.layers.8.block_sparse_moe.experts.205.w2", "model.layers.8.block_sparse_moe.experts.206.w2", "model.layers.8.block_sparse_moe.experts.207.w2", "model.layers.8.block_sparse_moe.experts.208.w2", "model.layers.8.block_sparse_moe.experts.209.w2", "model.layers.8.block_sparse_moe.experts.210.w2", "model.layers.8.block_sparse_moe.experts.211.w2", "model.layers.8.block_sparse_moe.experts.212.w2", "model.layers.8.block_sparse_moe.experts.213.w2", "model.layers.8.block_sparse_moe.experts.214.w2", "model.layers.8.block_sparse_moe.experts.215.w2", "model.layers.8.block_sparse_moe.experts.216.w2", "model.layers.8.block_sparse_moe.experts.217.w2", "model.layers.8.block_sparse_moe.experts.218.w2", "model.layers.8.block_sparse_moe.experts.219.w2", "model.layers.8.block_sparse_moe.experts.220.w2", "model.layers.8.block_sparse_moe.experts.221.w2", "model.layers.8.block_sparse_moe.experts.222.w2", "model.layers.8.block_sparse_moe.experts.223.w2", "model.layers.8.block_sparse_moe.experts.224.w2", "model.layers.8.block_sparse_moe.experts.225.w2", "model.layers.8.block_sparse_moe.experts.226.w2", "model.layers.8.block_sparse_moe.experts.227.w2", "model.layers.8.block_sparse_moe.experts.228.w2", "model.layers.8.block_sparse_moe.experts.229.w2", "model.layers.8.block_sparse_moe.experts.230.w2", "model.layers.8.block_sparse_moe.experts.231.w2", "model.layers.8.block_sparse_moe.experts.232.w2", "model.layers.8.block_sparse_moe.experts.233.w2", "model.layers.8.block_sparse_moe.experts.234.w2", "model.layers.8.block_sparse_moe.experts.235.w2", "model.layers.8.block_sparse_moe.experts.236.w2", "model.layers.8.block_sparse_moe.experts.237.w2", "model.layers.8.block_sparse_moe.experts.238.w2", "model.layers.8.block_sparse_moe.experts.239.w2", "model.layers.8.block_sparse_moe.experts.240.w2", "model.layers.8.block_sparse_moe.experts.241.w2", "model.layers.8.block_sparse_moe.experts.242.w2", "model.layers.8.block_sparse_moe.experts.243.w2", "model.layers.8.block_sparse_moe.experts.244.w2", "model.layers.8.block_sparse_moe.experts.245.w2", "model.layers.8.block_sparse_moe.experts.246.w2", "model.layers.8.block_sparse_moe.experts.247.w2", "model.layers.8.block_sparse_moe.experts.248.w2", "model.layers.8.block_sparse_moe.experts.249.w2", "model.layers.8.block_sparse_moe.experts.250.w2", "model.layers.8.block_sparse_moe.experts.251.w2", "model.layers.8.block_sparse_moe.experts.252.w2", "model.layers.8.block_sparse_moe.experts.253.w2", "model.layers.8.block_sparse_moe.experts.254.w2", "model.layers.8.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00021866708993911743, "dbits": 1207959552 } ] }, { "idx": 45, "layers": [ "model.layers.9.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0014989968389272357, "dbits": 18874368 } ] }, { "idx": 46, "layers": [ "model.layers.9.self_attn.k_proj", "model.layers.9.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0008886419236660004, "dbits": 6291456 } ] }, { "idx": 47, "layers": [ "model.layers.9.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0011025510728359111, "dbits": 18874368 } ] }, { "idx": 48, "layers": [ "model.layers.9.block_sparse_moe.experts.0.w1", "model.layers.9.block_sparse_moe.experts.1.w1", "model.layers.9.block_sparse_moe.experts.2.w1", "model.layers.9.block_sparse_moe.experts.3.w1", "model.layers.9.block_sparse_moe.experts.4.w1", "model.layers.9.block_sparse_moe.experts.5.w1", "model.layers.9.block_sparse_moe.experts.6.w1", "model.layers.9.block_sparse_moe.experts.7.w1", "model.layers.9.block_sparse_moe.experts.8.w1", "model.layers.9.block_sparse_moe.experts.9.w1", "model.layers.9.block_sparse_moe.experts.10.w1", "model.layers.9.block_sparse_moe.experts.11.w1", "model.layers.9.block_sparse_moe.experts.12.w1", "model.layers.9.block_sparse_moe.experts.13.w1", "model.layers.9.block_sparse_moe.experts.14.w1", "model.layers.9.block_sparse_moe.experts.15.w1", "model.layers.9.block_sparse_moe.experts.16.w1", "model.layers.9.block_sparse_moe.experts.17.w1", "model.layers.9.block_sparse_moe.experts.18.w1", "model.layers.9.block_sparse_moe.experts.19.w1", "model.layers.9.block_sparse_moe.experts.20.w1", "model.layers.9.block_sparse_moe.experts.21.w1", "model.layers.9.block_sparse_moe.experts.22.w1", "model.layers.9.block_sparse_moe.experts.23.w1", "model.layers.9.block_sparse_moe.experts.24.w1", "model.layers.9.block_sparse_moe.experts.25.w1", "model.layers.9.block_sparse_moe.experts.26.w1", "model.layers.9.block_sparse_moe.experts.27.w1", "model.layers.9.block_sparse_moe.experts.28.w1", "model.layers.9.block_sparse_moe.experts.29.w1", "model.layers.9.block_sparse_moe.experts.30.w1", "model.layers.9.block_sparse_moe.experts.31.w1", "model.layers.9.block_sparse_moe.experts.32.w1", "model.layers.9.block_sparse_moe.experts.33.w1", "model.layers.9.block_sparse_moe.experts.34.w1", "model.layers.9.block_sparse_moe.experts.35.w1", "model.layers.9.block_sparse_moe.experts.36.w1", "model.layers.9.block_sparse_moe.experts.37.w1", "model.layers.9.block_sparse_moe.experts.38.w1", "model.layers.9.block_sparse_moe.experts.39.w1", "model.layers.9.block_sparse_moe.experts.40.w1", "model.layers.9.block_sparse_moe.experts.41.w1", "model.layers.9.block_sparse_moe.experts.42.w1", "model.layers.9.block_sparse_moe.experts.43.w1", "model.layers.9.block_sparse_moe.experts.44.w1", "model.layers.9.block_sparse_moe.experts.45.w1", "model.layers.9.block_sparse_moe.experts.46.w1", "model.layers.9.block_sparse_moe.experts.47.w1", "model.layers.9.block_sparse_moe.experts.48.w1", "model.layers.9.block_sparse_moe.experts.49.w1", "model.layers.9.block_sparse_moe.experts.50.w1", "model.layers.9.block_sparse_moe.experts.51.w1", "model.layers.9.block_sparse_moe.experts.52.w1", "model.layers.9.block_sparse_moe.experts.53.w1", "model.layers.9.block_sparse_moe.experts.54.w1", "model.layers.9.block_sparse_moe.experts.55.w1", "model.layers.9.block_sparse_moe.experts.56.w1", "model.layers.9.block_sparse_moe.experts.57.w1", "model.layers.9.block_sparse_moe.experts.58.w1", "model.layers.9.block_sparse_moe.experts.59.w1", "model.layers.9.block_sparse_moe.experts.60.w1", "model.layers.9.block_sparse_moe.experts.61.w1", "model.layers.9.block_sparse_moe.experts.62.w1", "model.layers.9.block_sparse_moe.experts.63.w1", "model.layers.9.block_sparse_moe.experts.64.w1", "model.layers.9.block_sparse_moe.experts.65.w1", "model.layers.9.block_sparse_moe.experts.66.w1", "model.layers.9.block_sparse_moe.experts.67.w1", "model.layers.9.block_sparse_moe.experts.68.w1", "model.layers.9.block_sparse_moe.experts.69.w1", "model.layers.9.block_sparse_moe.experts.70.w1", "model.layers.9.block_sparse_moe.experts.71.w1", "model.layers.9.block_sparse_moe.experts.72.w1", "model.layers.9.block_sparse_moe.experts.73.w1", "model.layers.9.block_sparse_moe.experts.74.w1", "model.layers.9.block_sparse_moe.experts.75.w1", "model.layers.9.block_sparse_moe.experts.76.w1", "model.layers.9.block_sparse_moe.experts.77.w1", "model.layers.9.block_sparse_moe.experts.78.w1", "model.layers.9.block_sparse_moe.experts.79.w1", "model.layers.9.block_sparse_moe.experts.80.w1", "model.layers.9.block_sparse_moe.experts.81.w1", "model.layers.9.block_sparse_moe.experts.82.w1", "model.layers.9.block_sparse_moe.experts.83.w1", "model.layers.9.block_sparse_moe.experts.84.w1", "model.layers.9.block_sparse_moe.experts.85.w1", "model.layers.9.block_sparse_moe.experts.86.w1", "model.layers.9.block_sparse_moe.experts.87.w1", "model.layers.9.block_sparse_moe.experts.88.w1", "model.layers.9.block_sparse_moe.experts.89.w1", "model.layers.9.block_sparse_moe.experts.90.w1", "model.layers.9.block_sparse_moe.experts.91.w1", "model.layers.9.block_sparse_moe.experts.92.w1", "model.layers.9.block_sparse_moe.experts.93.w1", "model.layers.9.block_sparse_moe.experts.94.w1", "model.layers.9.block_sparse_moe.experts.95.w1", "model.layers.9.block_sparse_moe.experts.96.w1", "model.layers.9.block_sparse_moe.experts.97.w1", "model.layers.9.block_sparse_moe.experts.98.w1", "model.layers.9.block_sparse_moe.experts.99.w1", "model.layers.9.block_sparse_moe.experts.100.w1", "model.layers.9.block_sparse_moe.experts.101.w1", "model.layers.9.block_sparse_moe.experts.102.w1", "model.layers.9.block_sparse_moe.experts.103.w1", "model.layers.9.block_sparse_moe.experts.104.w1", "model.layers.9.block_sparse_moe.experts.105.w1", "model.layers.9.block_sparse_moe.experts.106.w1", "model.layers.9.block_sparse_moe.experts.107.w1", "model.layers.9.block_sparse_moe.experts.108.w1", "model.layers.9.block_sparse_moe.experts.109.w1", "model.layers.9.block_sparse_moe.experts.110.w1", "model.layers.9.block_sparse_moe.experts.111.w1", "model.layers.9.block_sparse_moe.experts.112.w1", "model.layers.9.block_sparse_moe.experts.113.w1", "model.layers.9.block_sparse_moe.experts.114.w1", "model.layers.9.block_sparse_moe.experts.115.w1", "model.layers.9.block_sparse_moe.experts.116.w1", "model.layers.9.block_sparse_moe.experts.117.w1", "model.layers.9.block_sparse_moe.experts.118.w1", "model.layers.9.block_sparse_moe.experts.119.w1", "model.layers.9.block_sparse_moe.experts.120.w1", "model.layers.9.block_sparse_moe.experts.121.w1", "model.layers.9.block_sparse_moe.experts.122.w1", "model.layers.9.block_sparse_moe.experts.123.w1", "model.layers.9.block_sparse_moe.experts.124.w1", "model.layers.9.block_sparse_moe.experts.125.w1", "model.layers.9.block_sparse_moe.experts.126.w1", "model.layers.9.block_sparse_moe.experts.127.w1", "model.layers.9.block_sparse_moe.experts.128.w1", "model.layers.9.block_sparse_moe.experts.129.w1", "model.layers.9.block_sparse_moe.experts.130.w1", "model.layers.9.block_sparse_moe.experts.131.w1", "model.layers.9.block_sparse_moe.experts.132.w1", "model.layers.9.block_sparse_moe.experts.133.w1", "model.layers.9.block_sparse_moe.experts.134.w1", "model.layers.9.block_sparse_moe.experts.135.w1", "model.layers.9.block_sparse_moe.experts.136.w1", "model.layers.9.block_sparse_moe.experts.137.w1", "model.layers.9.block_sparse_moe.experts.138.w1", "model.layers.9.block_sparse_moe.experts.139.w1", "model.layers.9.block_sparse_moe.experts.140.w1", "model.layers.9.block_sparse_moe.experts.141.w1", "model.layers.9.block_sparse_moe.experts.142.w1", "model.layers.9.block_sparse_moe.experts.143.w1", "model.layers.9.block_sparse_moe.experts.144.w1", "model.layers.9.block_sparse_moe.experts.145.w1", "model.layers.9.block_sparse_moe.experts.146.w1", "model.layers.9.block_sparse_moe.experts.147.w1", "model.layers.9.block_sparse_moe.experts.148.w1", "model.layers.9.block_sparse_moe.experts.149.w1", "model.layers.9.block_sparse_moe.experts.150.w1", "model.layers.9.block_sparse_moe.experts.151.w1", "model.layers.9.block_sparse_moe.experts.152.w1", "model.layers.9.block_sparse_moe.experts.153.w1", "model.layers.9.block_sparse_moe.experts.154.w1", "model.layers.9.block_sparse_moe.experts.155.w1", "model.layers.9.block_sparse_moe.experts.156.w1", "model.layers.9.block_sparse_moe.experts.157.w1", "model.layers.9.block_sparse_moe.experts.158.w1", "model.layers.9.block_sparse_moe.experts.159.w1", "model.layers.9.block_sparse_moe.experts.160.w1", "model.layers.9.block_sparse_moe.experts.161.w1", "model.layers.9.block_sparse_moe.experts.162.w1", "model.layers.9.block_sparse_moe.experts.163.w1", "model.layers.9.block_sparse_moe.experts.164.w1", "model.layers.9.block_sparse_moe.experts.165.w1", "model.layers.9.block_sparse_moe.experts.166.w1", "model.layers.9.block_sparse_moe.experts.167.w1", "model.layers.9.block_sparse_moe.experts.168.w1", "model.layers.9.block_sparse_moe.experts.169.w1", "model.layers.9.block_sparse_moe.experts.170.w1", "model.layers.9.block_sparse_moe.experts.171.w1", "model.layers.9.block_sparse_moe.experts.172.w1", "model.layers.9.block_sparse_moe.experts.173.w1", "model.layers.9.block_sparse_moe.experts.174.w1", "model.layers.9.block_sparse_moe.experts.175.w1", "model.layers.9.block_sparse_moe.experts.176.w1", "model.layers.9.block_sparse_moe.experts.177.w1", "model.layers.9.block_sparse_moe.experts.178.w1", "model.layers.9.block_sparse_moe.experts.179.w1", "model.layers.9.block_sparse_moe.experts.180.w1", "model.layers.9.block_sparse_moe.experts.181.w1", "model.layers.9.block_sparse_moe.experts.182.w1", "model.layers.9.block_sparse_moe.experts.183.w1", "model.layers.9.block_sparse_moe.experts.184.w1", "model.layers.9.block_sparse_moe.experts.185.w1", "model.layers.9.block_sparse_moe.experts.186.w1", "model.layers.9.block_sparse_moe.experts.187.w1", "model.layers.9.block_sparse_moe.experts.188.w1", "model.layers.9.block_sparse_moe.experts.189.w1", "model.layers.9.block_sparse_moe.experts.190.w1", "model.layers.9.block_sparse_moe.experts.191.w1", "model.layers.9.block_sparse_moe.experts.192.w1", "model.layers.9.block_sparse_moe.experts.193.w1", "model.layers.9.block_sparse_moe.experts.194.w1", "model.layers.9.block_sparse_moe.experts.195.w1", "model.layers.9.block_sparse_moe.experts.196.w1", "model.layers.9.block_sparse_moe.experts.197.w1", "model.layers.9.block_sparse_moe.experts.198.w1", "model.layers.9.block_sparse_moe.experts.199.w1", "model.layers.9.block_sparse_moe.experts.200.w1", "model.layers.9.block_sparse_moe.experts.201.w1", "model.layers.9.block_sparse_moe.experts.202.w1", "model.layers.9.block_sparse_moe.experts.203.w1", "model.layers.9.block_sparse_moe.experts.204.w1", "model.layers.9.block_sparse_moe.experts.205.w1", "model.layers.9.block_sparse_moe.experts.206.w1", "model.layers.9.block_sparse_moe.experts.207.w1", "model.layers.9.block_sparse_moe.experts.208.w1", "model.layers.9.block_sparse_moe.experts.209.w1", "model.layers.9.block_sparse_moe.experts.210.w1", "model.layers.9.block_sparse_moe.experts.211.w1", "model.layers.9.block_sparse_moe.experts.212.w1", "model.layers.9.block_sparse_moe.experts.213.w1", "model.layers.9.block_sparse_moe.experts.214.w1", "model.layers.9.block_sparse_moe.experts.215.w1", "model.layers.9.block_sparse_moe.experts.216.w1", "model.layers.9.block_sparse_moe.experts.217.w1", "model.layers.9.block_sparse_moe.experts.218.w1", "model.layers.9.block_sparse_moe.experts.219.w1", "model.layers.9.block_sparse_moe.experts.220.w1", "model.layers.9.block_sparse_moe.experts.221.w1", "model.layers.9.block_sparse_moe.experts.222.w1", "model.layers.9.block_sparse_moe.experts.223.w1", "model.layers.9.block_sparse_moe.experts.224.w1", "model.layers.9.block_sparse_moe.experts.225.w1", "model.layers.9.block_sparse_moe.experts.226.w1", "model.layers.9.block_sparse_moe.experts.227.w1", "model.layers.9.block_sparse_moe.experts.228.w1", "model.layers.9.block_sparse_moe.experts.229.w1", "model.layers.9.block_sparse_moe.experts.230.w1", "model.layers.9.block_sparse_moe.experts.231.w1", "model.layers.9.block_sparse_moe.experts.232.w1", "model.layers.9.block_sparse_moe.experts.233.w1", "model.layers.9.block_sparse_moe.experts.234.w1", "model.layers.9.block_sparse_moe.experts.235.w1", "model.layers.9.block_sparse_moe.experts.236.w1", "model.layers.9.block_sparse_moe.experts.237.w1", "model.layers.9.block_sparse_moe.experts.238.w1", "model.layers.9.block_sparse_moe.experts.239.w1", "model.layers.9.block_sparse_moe.experts.240.w1", "model.layers.9.block_sparse_moe.experts.241.w1", "model.layers.9.block_sparse_moe.experts.242.w1", "model.layers.9.block_sparse_moe.experts.243.w1", "model.layers.9.block_sparse_moe.experts.244.w1", "model.layers.9.block_sparse_moe.experts.245.w1", "model.layers.9.block_sparse_moe.experts.246.w1", "model.layers.9.block_sparse_moe.experts.247.w1", "model.layers.9.block_sparse_moe.experts.248.w1", "model.layers.9.block_sparse_moe.experts.249.w1", "model.layers.9.block_sparse_moe.experts.250.w1", "model.layers.9.block_sparse_moe.experts.251.w1", "model.layers.9.block_sparse_moe.experts.252.w1", "model.layers.9.block_sparse_moe.experts.253.w1", "model.layers.9.block_sparse_moe.experts.254.w1", "model.layers.9.block_sparse_moe.experts.255.w1", "model.layers.9.block_sparse_moe.experts.0.w3", "model.layers.9.block_sparse_moe.experts.1.w3", "model.layers.9.block_sparse_moe.experts.2.w3", "model.layers.9.block_sparse_moe.experts.3.w3", "model.layers.9.block_sparse_moe.experts.4.w3", "model.layers.9.block_sparse_moe.experts.5.w3", "model.layers.9.block_sparse_moe.experts.6.w3", "model.layers.9.block_sparse_moe.experts.7.w3", "model.layers.9.block_sparse_moe.experts.8.w3", "model.layers.9.block_sparse_moe.experts.9.w3", "model.layers.9.block_sparse_moe.experts.10.w3", "model.layers.9.block_sparse_moe.experts.11.w3", "model.layers.9.block_sparse_moe.experts.12.w3", "model.layers.9.block_sparse_moe.experts.13.w3", "model.layers.9.block_sparse_moe.experts.14.w3", "model.layers.9.block_sparse_moe.experts.15.w3", "model.layers.9.block_sparse_moe.experts.16.w3", "model.layers.9.block_sparse_moe.experts.17.w3", "model.layers.9.block_sparse_moe.experts.18.w3", "model.layers.9.block_sparse_moe.experts.19.w3", "model.layers.9.block_sparse_moe.experts.20.w3", "model.layers.9.block_sparse_moe.experts.21.w3", "model.layers.9.block_sparse_moe.experts.22.w3", "model.layers.9.block_sparse_moe.experts.23.w3", "model.layers.9.block_sparse_moe.experts.24.w3", "model.layers.9.block_sparse_moe.experts.25.w3", "model.layers.9.block_sparse_moe.experts.26.w3", "model.layers.9.block_sparse_moe.experts.27.w3", "model.layers.9.block_sparse_moe.experts.28.w3", "model.layers.9.block_sparse_moe.experts.29.w3", "model.layers.9.block_sparse_moe.experts.30.w3", "model.layers.9.block_sparse_moe.experts.31.w3", "model.layers.9.block_sparse_moe.experts.32.w3", "model.layers.9.block_sparse_moe.experts.33.w3", "model.layers.9.block_sparse_moe.experts.34.w3", "model.layers.9.block_sparse_moe.experts.35.w3", "model.layers.9.block_sparse_moe.experts.36.w3", "model.layers.9.block_sparse_moe.experts.37.w3", "model.layers.9.block_sparse_moe.experts.38.w3", "model.layers.9.block_sparse_moe.experts.39.w3", "model.layers.9.block_sparse_moe.experts.40.w3", "model.layers.9.block_sparse_moe.experts.41.w3", "model.layers.9.block_sparse_moe.experts.42.w3", "model.layers.9.block_sparse_moe.experts.43.w3", "model.layers.9.block_sparse_moe.experts.44.w3", "model.layers.9.block_sparse_moe.experts.45.w3", "model.layers.9.block_sparse_moe.experts.46.w3", "model.layers.9.block_sparse_moe.experts.47.w3", "model.layers.9.block_sparse_moe.experts.48.w3", "model.layers.9.block_sparse_moe.experts.49.w3", "model.layers.9.block_sparse_moe.experts.50.w3", "model.layers.9.block_sparse_moe.experts.51.w3", "model.layers.9.block_sparse_moe.experts.52.w3", "model.layers.9.block_sparse_moe.experts.53.w3", "model.layers.9.block_sparse_moe.experts.54.w3", "model.layers.9.block_sparse_moe.experts.55.w3", "model.layers.9.block_sparse_moe.experts.56.w3", "model.layers.9.block_sparse_moe.experts.57.w3", "model.layers.9.block_sparse_moe.experts.58.w3", "model.layers.9.block_sparse_moe.experts.59.w3", "model.layers.9.block_sparse_moe.experts.60.w3", "model.layers.9.block_sparse_moe.experts.61.w3", "model.layers.9.block_sparse_moe.experts.62.w3", "model.layers.9.block_sparse_moe.experts.63.w3", "model.layers.9.block_sparse_moe.experts.64.w3", "model.layers.9.block_sparse_moe.experts.65.w3", "model.layers.9.block_sparse_moe.experts.66.w3", "model.layers.9.block_sparse_moe.experts.67.w3", "model.layers.9.block_sparse_moe.experts.68.w3", "model.layers.9.block_sparse_moe.experts.69.w3", "model.layers.9.block_sparse_moe.experts.70.w3", "model.layers.9.block_sparse_moe.experts.71.w3", "model.layers.9.block_sparse_moe.experts.72.w3", "model.layers.9.block_sparse_moe.experts.73.w3", "model.layers.9.block_sparse_moe.experts.74.w3", "model.layers.9.block_sparse_moe.experts.75.w3", "model.layers.9.block_sparse_moe.experts.76.w3", "model.layers.9.block_sparse_moe.experts.77.w3", "model.layers.9.block_sparse_moe.experts.78.w3", "model.layers.9.block_sparse_moe.experts.79.w3", "model.layers.9.block_sparse_moe.experts.80.w3", "model.layers.9.block_sparse_moe.experts.81.w3", "model.layers.9.block_sparse_moe.experts.82.w3", "model.layers.9.block_sparse_moe.experts.83.w3", "model.layers.9.block_sparse_moe.experts.84.w3", "model.layers.9.block_sparse_moe.experts.85.w3", "model.layers.9.block_sparse_moe.experts.86.w3", "model.layers.9.block_sparse_moe.experts.87.w3", "model.layers.9.block_sparse_moe.experts.88.w3", "model.layers.9.block_sparse_moe.experts.89.w3", "model.layers.9.block_sparse_moe.experts.90.w3", "model.layers.9.block_sparse_moe.experts.91.w3", "model.layers.9.block_sparse_moe.experts.92.w3", "model.layers.9.block_sparse_moe.experts.93.w3", "model.layers.9.block_sparse_moe.experts.94.w3", "model.layers.9.block_sparse_moe.experts.95.w3", "model.layers.9.block_sparse_moe.experts.96.w3", "model.layers.9.block_sparse_moe.experts.97.w3", "model.layers.9.block_sparse_moe.experts.98.w3", "model.layers.9.block_sparse_moe.experts.99.w3", "model.layers.9.block_sparse_moe.experts.100.w3", "model.layers.9.block_sparse_moe.experts.101.w3", "model.layers.9.block_sparse_moe.experts.102.w3", "model.layers.9.block_sparse_moe.experts.103.w3", "model.layers.9.block_sparse_moe.experts.104.w3", "model.layers.9.block_sparse_moe.experts.105.w3", "model.layers.9.block_sparse_moe.experts.106.w3", "model.layers.9.block_sparse_moe.experts.107.w3", "model.layers.9.block_sparse_moe.experts.108.w3", "model.layers.9.block_sparse_moe.experts.109.w3", "model.layers.9.block_sparse_moe.experts.110.w3", "model.layers.9.block_sparse_moe.experts.111.w3", "model.layers.9.block_sparse_moe.experts.112.w3", "model.layers.9.block_sparse_moe.experts.113.w3", "model.layers.9.block_sparse_moe.experts.114.w3", "model.layers.9.block_sparse_moe.experts.115.w3", "model.layers.9.block_sparse_moe.experts.116.w3", "model.layers.9.block_sparse_moe.experts.117.w3", "model.layers.9.block_sparse_moe.experts.118.w3", "model.layers.9.block_sparse_moe.experts.119.w3", "model.layers.9.block_sparse_moe.experts.120.w3", "model.layers.9.block_sparse_moe.experts.121.w3", "model.layers.9.block_sparse_moe.experts.122.w3", "model.layers.9.block_sparse_moe.experts.123.w3", "model.layers.9.block_sparse_moe.experts.124.w3", "model.layers.9.block_sparse_moe.experts.125.w3", "model.layers.9.block_sparse_moe.experts.126.w3", "model.layers.9.block_sparse_moe.experts.127.w3", "model.layers.9.block_sparse_moe.experts.128.w3", "model.layers.9.block_sparse_moe.experts.129.w3", "model.layers.9.block_sparse_moe.experts.130.w3", "model.layers.9.block_sparse_moe.experts.131.w3", "model.layers.9.block_sparse_moe.experts.132.w3", "model.layers.9.block_sparse_moe.experts.133.w3", "model.layers.9.block_sparse_moe.experts.134.w3", "model.layers.9.block_sparse_moe.experts.135.w3", "model.layers.9.block_sparse_moe.experts.136.w3", "model.layers.9.block_sparse_moe.experts.137.w3", "model.layers.9.block_sparse_moe.experts.138.w3", "model.layers.9.block_sparse_moe.experts.139.w3", "model.layers.9.block_sparse_moe.experts.140.w3", "model.layers.9.block_sparse_moe.experts.141.w3", "model.layers.9.block_sparse_moe.experts.142.w3", "model.layers.9.block_sparse_moe.experts.143.w3", "model.layers.9.block_sparse_moe.experts.144.w3", "model.layers.9.block_sparse_moe.experts.145.w3", "model.layers.9.block_sparse_moe.experts.146.w3", "model.layers.9.block_sparse_moe.experts.147.w3", "model.layers.9.block_sparse_moe.experts.148.w3", "model.layers.9.block_sparse_moe.experts.149.w3", "model.layers.9.block_sparse_moe.experts.150.w3", "model.layers.9.block_sparse_moe.experts.151.w3", "model.layers.9.block_sparse_moe.experts.152.w3", "model.layers.9.block_sparse_moe.experts.153.w3", "model.layers.9.block_sparse_moe.experts.154.w3", "model.layers.9.block_sparse_moe.experts.155.w3", "model.layers.9.block_sparse_moe.experts.156.w3", "model.layers.9.block_sparse_moe.experts.157.w3", "model.layers.9.block_sparse_moe.experts.158.w3", "model.layers.9.block_sparse_moe.experts.159.w3", "model.layers.9.block_sparse_moe.experts.160.w3", "model.layers.9.block_sparse_moe.experts.161.w3", "model.layers.9.block_sparse_moe.experts.162.w3", "model.layers.9.block_sparse_moe.experts.163.w3", "model.layers.9.block_sparse_moe.experts.164.w3", "model.layers.9.block_sparse_moe.experts.165.w3", "model.layers.9.block_sparse_moe.experts.166.w3", "model.layers.9.block_sparse_moe.experts.167.w3", "model.layers.9.block_sparse_moe.experts.168.w3", "model.layers.9.block_sparse_moe.experts.169.w3", "model.layers.9.block_sparse_moe.experts.170.w3", "model.layers.9.block_sparse_moe.experts.171.w3", "model.layers.9.block_sparse_moe.experts.172.w3", "model.layers.9.block_sparse_moe.experts.173.w3", "model.layers.9.block_sparse_moe.experts.174.w3", "model.layers.9.block_sparse_moe.experts.175.w3", "model.layers.9.block_sparse_moe.experts.176.w3", "model.layers.9.block_sparse_moe.experts.177.w3", "model.layers.9.block_sparse_moe.experts.178.w3", "model.layers.9.block_sparse_moe.experts.179.w3", "model.layers.9.block_sparse_moe.experts.180.w3", "model.layers.9.block_sparse_moe.experts.181.w3", "model.layers.9.block_sparse_moe.experts.182.w3", "model.layers.9.block_sparse_moe.experts.183.w3", "model.layers.9.block_sparse_moe.experts.184.w3", "model.layers.9.block_sparse_moe.experts.185.w3", "model.layers.9.block_sparse_moe.experts.186.w3", "model.layers.9.block_sparse_moe.experts.187.w3", "model.layers.9.block_sparse_moe.experts.188.w3", "model.layers.9.block_sparse_moe.experts.189.w3", "model.layers.9.block_sparse_moe.experts.190.w3", "model.layers.9.block_sparse_moe.experts.191.w3", "model.layers.9.block_sparse_moe.experts.192.w3", "model.layers.9.block_sparse_moe.experts.193.w3", "model.layers.9.block_sparse_moe.experts.194.w3", "model.layers.9.block_sparse_moe.experts.195.w3", "model.layers.9.block_sparse_moe.experts.196.w3", "model.layers.9.block_sparse_moe.experts.197.w3", "model.layers.9.block_sparse_moe.experts.198.w3", "model.layers.9.block_sparse_moe.experts.199.w3", "model.layers.9.block_sparse_moe.experts.200.w3", "model.layers.9.block_sparse_moe.experts.201.w3", "model.layers.9.block_sparse_moe.experts.202.w3", "model.layers.9.block_sparse_moe.experts.203.w3", "model.layers.9.block_sparse_moe.experts.204.w3", "model.layers.9.block_sparse_moe.experts.205.w3", "model.layers.9.block_sparse_moe.experts.206.w3", "model.layers.9.block_sparse_moe.experts.207.w3", "model.layers.9.block_sparse_moe.experts.208.w3", "model.layers.9.block_sparse_moe.experts.209.w3", "model.layers.9.block_sparse_moe.experts.210.w3", "model.layers.9.block_sparse_moe.experts.211.w3", "model.layers.9.block_sparse_moe.experts.212.w3", "model.layers.9.block_sparse_moe.experts.213.w3", "model.layers.9.block_sparse_moe.experts.214.w3", "model.layers.9.block_sparse_moe.experts.215.w3", "model.layers.9.block_sparse_moe.experts.216.w3", "model.layers.9.block_sparse_moe.experts.217.w3", "model.layers.9.block_sparse_moe.experts.218.w3", "model.layers.9.block_sparse_moe.experts.219.w3", "model.layers.9.block_sparse_moe.experts.220.w3", "model.layers.9.block_sparse_moe.experts.221.w3", "model.layers.9.block_sparse_moe.experts.222.w3", "model.layers.9.block_sparse_moe.experts.223.w3", "model.layers.9.block_sparse_moe.experts.224.w3", "model.layers.9.block_sparse_moe.experts.225.w3", "model.layers.9.block_sparse_moe.experts.226.w3", "model.layers.9.block_sparse_moe.experts.227.w3", "model.layers.9.block_sparse_moe.experts.228.w3", "model.layers.9.block_sparse_moe.experts.229.w3", "model.layers.9.block_sparse_moe.experts.230.w3", "model.layers.9.block_sparse_moe.experts.231.w3", "model.layers.9.block_sparse_moe.experts.232.w3", "model.layers.9.block_sparse_moe.experts.233.w3", "model.layers.9.block_sparse_moe.experts.234.w3", "model.layers.9.block_sparse_moe.experts.235.w3", "model.layers.9.block_sparse_moe.experts.236.w3", "model.layers.9.block_sparse_moe.experts.237.w3", "model.layers.9.block_sparse_moe.experts.238.w3", "model.layers.9.block_sparse_moe.experts.239.w3", "model.layers.9.block_sparse_moe.experts.240.w3", "model.layers.9.block_sparse_moe.experts.241.w3", "model.layers.9.block_sparse_moe.experts.242.w3", "model.layers.9.block_sparse_moe.experts.243.w3", "model.layers.9.block_sparse_moe.experts.244.w3", "model.layers.9.block_sparse_moe.experts.245.w3", "model.layers.9.block_sparse_moe.experts.246.w3", "model.layers.9.block_sparse_moe.experts.247.w3", "model.layers.9.block_sparse_moe.experts.248.w3", "model.layers.9.block_sparse_moe.experts.249.w3", "model.layers.9.block_sparse_moe.experts.250.w3", "model.layers.9.block_sparse_moe.experts.251.w3", "model.layers.9.block_sparse_moe.experts.252.w3", "model.layers.9.block_sparse_moe.experts.253.w3", "model.layers.9.block_sparse_moe.experts.254.w3", "model.layers.9.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0015568241477012523, "dbits": 2415919104 } ] }, { "idx": 49, "layers": [ "model.layers.9.block_sparse_moe.experts.0.w2", "model.layers.9.block_sparse_moe.experts.1.w2", "model.layers.9.block_sparse_moe.experts.2.w2", "model.layers.9.block_sparse_moe.experts.3.w2", "model.layers.9.block_sparse_moe.experts.4.w2", "model.layers.9.block_sparse_moe.experts.5.w2", "model.layers.9.block_sparse_moe.experts.6.w2", "model.layers.9.block_sparse_moe.experts.7.w2", "model.layers.9.block_sparse_moe.experts.8.w2", "model.layers.9.block_sparse_moe.experts.9.w2", "model.layers.9.block_sparse_moe.experts.10.w2", "model.layers.9.block_sparse_moe.experts.11.w2", "model.layers.9.block_sparse_moe.experts.12.w2", "model.layers.9.block_sparse_moe.experts.13.w2", "model.layers.9.block_sparse_moe.experts.14.w2", "model.layers.9.block_sparse_moe.experts.15.w2", "model.layers.9.block_sparse_moe.experts.16.w2", "model.layers.9.block_sparse_moe.experts.17.w2", "model.layers.9.block_sparse_moe.experts.18.w2", "model.layers.9.block_sparse_moe.experts.19.w2", "model.layers.9.block_sparse_moe.experts.20.w2", "model.layers.9.block_sparse_moe.experts.21.w2", "model.layers.9.block_sparse_moe.experts.22.w2", "model.layers.9.block_sparse_moe.experts.23.w2", "model.layers.9.block_sparse_moe.experts.24.w2", "model.layers.9.block_sparse_moe.experts.25.w2", "model.layers.9.block_sparse_moe.experts.26.w2", "model.layers.9.block_sparse_moe.experts.27.w2", "model.layers.9.block_sparse_moe.experts.28.w2", "model.layers.9.block_sparse_moe.experts.29.w2", "model.layers.9.block_sparse_moe.experts.30.w2", "model.layers.9.block_sparse_moe.experts.31.w2", "model.layers.9.block_sparse_moe.experts.32.w2", "model.layers.9.block_sparse_moe.experts.33.w2", "model.layers.9.block_sparse_moe.experts.34.w2", "model.layers.9.block_sparse_moe.experts.35.w2", "model.layers.9.block_sparse_moe.experts.36.w2", "model.layers.9.block_sparse_moe.experts.37.w2", "model.layers.9.block_sparse_moe.experts.38.w2", "model.layers.9.block_sparse_moe.experts.39.w2", "model.layers.9.block_sparse_moe.experts.40.w2", "model.layers.9.block_sparse_moe.experts.41.w2", "model.layers.9.block_sparse_moe.experts.42.w2", "model.layers.9.block_sparse_moe.experts.43.w2", "model.layers.9.block_sparse_moe.experts.44.w2", "model.layers.9.block_sparse_moe.experts.45.w2", "model.layers.9.block_sparse_moe.experts.46.w2", "model.layers.9.block_sparse_moe.experts.47.w2", "model.layers.9.block_sparse_moe.experts.48.w2", "model.layers.9.block_sparse_moe.experts.49.w2", "model.layers.9.block_sparse_moe.experts.50.w2", "model.layers.9.block_sparse_moe.experts.51.w2", "model.layers.9.block_sparse_moe.experts.52.w2", "model.layers.9.block_sparse_moe.experts.53.w2", "model.layers.9.block_sparse_moe.experts.54.w2", "model.layers.9.block_sparse_moe.experts.55.w2", "model.layers.9.block_sparse_moe.experts.56.w2", "model.layers.9.block_sparse_moe.experts.57.w2", "model.layers.9.block_sparse_moe.experts.58.w2", "model.layers.9.block_sparse_moe.experts.59.w2", "model.layers.9.block_sparse_moe.experts.60.w2", "model.layers.9.block_sparse_moe.experts.61.w2", "model.layers.9.block_sparse_moe.experts.62.w2", "model.layers.9.block_sparse_moe.experts.63.w2", "model.layers.9.block_sparse_moe.experts.64.w2", "model.layers.9.block_sparse_moe.experts.65.w2", "model.layers.9.block_sparse_moe.experts.66.w2", "model.layers.9.block_sparse_moe.experts.67.w2", "model.layers.9.block_sparse_moe.experts.68.w2", "model.layers.9.block_sparse_moe.experts.69.w2", "model.layers.9.block_sparse_moe.experts.70.w2", "model.layers.9.block_sparse_moe.experts.71.w2", "model.layers.9.block_sparse_moe.experts.72.w2", "model.layers.9.block_sparse_moe.experts.73.w2", "model.layers.9.block_sparse_moe.experts.74.w2", "model.layers.9.block_sparse_moe.experts.75.w2", "model.layers.9.block_sparse_moe.experts.76.w2", "model.layers.9.block_sparse_moe.experts.77.w2", "model.layers.9.block_sparse_moe.experts.78.w2", "model.layers.9.block_sparse_moe.experts.79.w2", "model.layers.9.block_sparse_moe.experts.80.w2", "model.layers.9.block_sparse_moe.experts.81.w2", "model.layers.9.block_sparse_moe.experts.82.w2", "model.layers.9.block_sparse_moe.experts.83.w2", "model.layers.9.block_sparse_moe.experts.84.w2", "model.layers.9.block_sparse_moe.experts.85.w2", "model.layers.9.block_sparse_moe.experts.86.w2", "model.layers.9.block_sparse_moe.experts.87.w2", "model.layers.9.block_sparse_moe.experts.88.w2", "model.layers.9.block_sparse_moe.experts.89.w2", "model.layers.9.block_sparse_moe.experts.90.w2", "model.layers.9.block_sparse_moe.experts.91.w2", "model.layers.9.block_sparse_moe.experts.92.w2", "model.layers.9.block_sparse_moe.experts.93.w2", "model.layers.9.block_sparse_moe.experts.94.w2", "model.layers.9.block_sparse_moe.experts.95.w2", "model.layers.9.block_sparse_moe.experts.96.w2", "model.layers.9.block_sparse_moe.experts.97.w2", "model.layers.9.block_sparse_moe.experts.98.w2", "model.layers.9.block_sparse_moe.experts.99.w2", "model.layers.9.block_sparse_moe.experts.100.w2", "model.layers.9.block_sparse_moe.experts.101.w2", "model.layers.9.block_sparse_moe.experts.102.w2", "model.layers.9.block_sparse_moe.experts.103.w2", "model.layers.9.block_sparse_moe.experts.104.w2", "model.layers.9.block_sparse_moe.experts.105.w2", "model.layers.9.block_sparse_moe.experts.106.w2", "model.layers.9.block_sparse_moe.experts.107.w2", "model.layers.9.block_sparse_moe.experts.108.w2", "model.layers.9.block_sparse_moe.experts.109.w2", "model.layers.9.block_sparse_moe.experts.110.w2", "model.layers.9.block_sparse_moe.experts.111.w2", "model.layers.9.block_sparse_moe.experts.112.w2", "model.layers.9.block_sparse_moe.experts.113.w2", "model.layers.9.block_sparse_moe.experts.114.w2", "model.layers.9.block_sparse_moe.experts.115.w2", "model.layers.9.block_sparse_moe.experts.116.w2", "model.layers.9.block_sparse_moe.experts.117.w2", "model.layers.9.block_sparse_moe.experts.118.w2", "model.layers.9.block_sparse_moe.experts.119.w2", "model.layers.9.block_sparse_moe.experts.120.w2", "model.layers.9.block_sparse_moe.experts.121.w2", "model.layers.9.block_sparse_moe.experts.122.w2", "model.layers.9.block_sparse_moe.experts.123.w2", "model.layers.9.block_sparse_moe.experts.124.w2", "model.layers.9.block_sparse_moe.experts.125.w2", "model.layers.9.block_sparse_moe.experts.126.w2", "model.layers.9.block_sparse_moe.experts.127.w2", "model.layers.9.block_sparse_moe.experts.128.w2", "model.layers.9.block_sparse_moe.experts.129.w2", "model.layers.9.block_sparse_moe.experts.130.w2", "model.layers.9.block_sparse_moe.experts.131.w2", "model.layers.9.block_sparse_moe.experts.132.w2", "model.layers.9.block_sparse_moe.experts.133.w2", "model.layers.9.block_sparse_moe.experts.134.w2", "model.layers.9.block_sparse_moe.experts.135.w2", "model.layers.9.block_sparse_moe.experts.136.w2", "model.layers.9.block_sparse_moe.experts.137.w2", "model.layers.9.block_sparse_moe.experts.138.w2", "model.layers.9.block_sparse_moe.experts.139.w2", "model.layers.9.block_sparse_moe.experts.140.w2", "model.layers.9.block_sparse_moe.experts.141.w2", "model.layers.9.block_sparse_moe.experts.142.w2", "model.layers.9.block_sparse_moe.experts.143.w2", "model.layers.9.block_sparse_moe.experts.144.w2", "model.layers.9.block_sparse_moe.experts.145.w2", "model.layers.9.block_sparse_moe.experts.146.w2", "model.layers.9.block_sparse_moe.experts.147.w2", "model.layers.9.block_sparse_moe.experts.148.w2", "model.layers.9.block_sparse_moe.experts.149.w2", "model.layers.9.block_sparse_moe.experts.150.w2", "model.layers.9.block_sparse_moe.experts.151.w2", "model.layers.9.block_sparse_moe.experts.152.w2", "model.layers.9.block_sparse_moe.experts.153.w2", "model.layers.9.block_sparse_moe.experts.154.w2", "model.layers.9.block_sparse_moe.experts.155.w2", "model.layers.9.block_sparse_moe.experts.156.w2", "model.layers.9.block_sparse_moe.experts.157.w2", "model.layers.9.block_sparse_moe.experts.158.w2", "model.layers.9.block_sparse_moe.experts.159.w2", "model.layers.9.block_sparse_moe.experts.160.w2", "model.layers.9.block_sparse_moe.experts.161.w2", "model.layers.9.block_sparse_moe.experts.162.w2", "model.layers.9.block_sparse_moe.experts.163.w2", "model.layers.9.block_sparse_moe.experts.164.w2", "model.layers.9.block_sparse_moe.experts.165.w2", "model.layers.9.block_sparse_moe.experts.166.w2", "model.layers.9.block_sparse_moe.experts.167.w2", "model.layers.9.block_sparse_moe.experts.168.w2", "model.layers.9.block_sparse_moe.experts.169.w2", "model.layers.9.block_sparse_moe.experts.170.w2", "model.layers.9.block_sparse_moe.experts.171.w2", "model.layers.9.block_sparse_moe.experts.172.w2", "model.layers.9.block_sparse_moe.experts.173.w2", "model.layers.9.block_sparse_moe.experts.174.w2", "model.layers.9.block_sparse_moe.experts.175.w2", "model.layers.9.block_sparse_moe.experts.176.w2", "model.layers.9.block_sparse_moe.experts.177.w2", "model.layers.9.block_sparse_moe.experts.178.w2", "model.layers.9.block_sparse_moe.experts.179.w2", "model.layers.9.block_sparse_moe.experts.180.w2", "model.layers.9.block_sparse_moe.experts.181.w2", "model.layers.9.block_sparse_moe.experts.182.w2", "model.layers.9.block_sparse_moe.experts.183.w2", "model.layers.9.block_sparse_moe.experts.184.w2", "model.layers.9.block_sparse_moe.experts.185.w2", "model.layers.9.block_sparse_moe.experts.186.w2", "model.layers.9.block_sparse_moe.experts.187.w2", "model.layers.9.block_sparse_moe.experts.188.w2", "model.layers.9.block_sparse_moe.experts.189.w2", "model.layers.9.block_sparse_moe.experts.190.w2", "model.layers.9.block_sparse_moe.experts.191.w2", "model.layers.9.block_sparse_moe.experts.192.w2", "model.layers.9.block_sparse_moe.experts.193.w2", "model.layers.9.block_sparse_moe.experts.194.w2", "model.layers.9.block_sparse_moe.experts.195.w2", "model.layers.9.block_sparse_moe.experts.196.w2", "model.layers.9.block_sparse_moe.experts.197.w2", "model.layers.9.block_sparse_moe.experts.198.w2", "model.layers.9.block_sparse_moe.experts.199.w2", "model.layers.9.block_sparse_moe.experts.200.w2", "model.layers.9.block_sparse_moe.experts.201.w2", "model.layers.9.block_sparse_moe.experts.202.w2", "model.layers.9.block_sparse_moe.experts.203.w2", "model.layers.9.block_sparse_moe.experts.204.w2", "model.layers.9.block_sparse_moe.experts.205.w2", "model.layers.9.block_sparse_moe.experts.206.w2", "model.layers.9.block_sparse_moe.experts.207.w2", "model.layers.9.block_sparse_moe.experts.208.w2", "model.layers.9.block_sparse_moe.experts.209.w2", "model.layers.9.block_sparse_moe.experts.210.w2", "model.layers.9.block_sparse_moe.experts.211.w2", "model.layers.9.block_sparse_moe.experts.212.w2", "model.layers.9.block_sparse_moe.experts.213.w2", "model.layers.9.block_sparse_moe.experts.214.w2", "model.layers.9.block_sparse_moe.experts.215.w2", "model.layers.9.block_sparse_moe.experts.216.w2", "model.layers.9.block_sparse_moe.experts.217.w2", "model.layers.9.block_sparse_moe.experts.218.w2", "model.layers.9.block_sparse_moe.experts.219.w2", "model.layers.9.block_sparse_moe.experts.220.w2", "model.layers.9.block_sparse_moe.experts.221.w2", "model.layers.9.block_sparse_moe.experts.222.w2", "model.layers.9.block_sparse_moe.experts.223.w2", "model.layers.9.block_sparse_moe.experts.224.w2", "model.layers.9.block_sparse_moe.experts.225.w2", "model.layers.9.block_sparse_moe.experts.226.w2", "model.layers.9.block_sparse_moe.experts.227.w2", "model.layers.9.block_sparse_moe.experts.228.w2", "model.layers.9.block_sparse_moe.experts.229.w2", "model.layers.9.block_sparse_moe.experts.230.w2", "model.layers.9.block_sparse_moe.experts.231.w2", "model.layers.9.block_sparse_moe.experts.232.w2", "model.layers.9.block_sparse_moe.experts.233.w2", "model.layers.9.block_sparse_moe.experts.234.w2", "model.layers.9.block_sparse_moe.experts.235.w2", "model.layers.9.block_sparse_moe.experts.236.w2", "model.layers.9.block_sparse_moe.experts.237.w2", "model.layers.9.block_sparse_moe.experts.238.w2", "model.layers.9.block_sparse_moe.experts.239.w2", "model.layers.9.block_sparse_moe.experts.240.w2", "model.layers.9.block_sparse_moe.experts.241.w2", "model.layers.9.block_sparse_moe.experts.242.w2", "model.layers.9.block_sparse_moe.experts.243.w2", "model.layers.9.block_sparse_moe.experts.244.w2", "model.layers.9.block_sparse_moe.experts.245.w2", "model.layers.9.block_sparse_moe.experts.246.w2", "model.layers.9.block_sparse_moe.experts.247.w2", "model.layers.9.block_sparse_moe.experts.248.w2", "model.layers.9.block_sparse_moe.experts.249.w2", "model.layers.9.block_sparse_moe.experts.250.w2", "model.layers.9.block_sparse_moe.experts.251.w2", "model.layers.9.block_sparse_moe.experts.252.w2", "model.layers.9.block_sparse_moe.experts.253.w2", "model.layers.9.block_sparse_moe.experts.254.w2", "model.layers.9.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00036046430468561486, "dbits": 1207959552 } ] }, { "idx": 50, "layers": [ "model.layers.10.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0012539085000753403, "dbits": 18874368 } ] }, { "idx": 51, "layers": [ "model.layers.10.self_attn.k_proj", "model.layers.10.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0012636095285415427, "dbits": 6291456 } ] }, { "idx": 52, "layers": [ "model.layers.10.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0010110981762408988, "dbits": 18874368 } ] }, { "idx": 53, "layers": [ "model.layers.10.block_sparse_moe.experts.0.w1", "model.layers.10.block_sparse_moe.experts.1.w1", "model.layers.10.block_sparse_moe.experts.2.w1", "model.layers.10.block_sparse_moe.experts.3.w1", "model.layers.10.block_sparse_moe.experts.4.w1", "model.layers.10.block_sparse_moe.experts.5.w1", "model.layers.10.block_sparse_moe.experts.6.w1", "model.layers.10.block_sparse_moe.experts.7.w1", "model.layers.10.block_sparse_moe.experts.8.w1", "model.layers.10.block_sparse_moe.experts.9.w1", "model.layers.10.block_sparse_moe.experts.10.w1", "model.layers.10.block_sparse_moe.experts.11.w1", "model.layers.10.block_sparse_moe.experts.12.w1", "model.layers.10.block_sparse_moe.experts.13.w1", "model.layers.10.block_sparse_moe.experts.14.w1", "model.layers.10.block_sparse_moe.experts.15.w1", "model.layers.10.block_sparse_moe.experts.16.w1", "model.layers.10.block_sparse_moe.experts.17.w1", "model.layers.10.block_sparse_moe.experts.18.w1", "model.layers.10.block_sparse_moe.experts.19.w1", "model.layers.10.block_sparse_moe.experts.20.w1", "model.layers.10.block_sparse_moe.experts.21.w1", "model.layers.10.block_sparse_moe.experts.22.w1", "model.layers.10.block_sparse_moe.experts.23.w1", "model.layers.10.block_sparse_moe.experts.24.w1", "model.layers.10.block_sparse_moe.experts.25.w1", "model.layers.10.block_sparse_moe.experts.26.w1", "model.layers.10.block_sparse_moe.experts.27.w1", "model.layers.10.block_sparse_moe.experts.28.w1", "model.layers.10.block_sparse_moe.experts.29.w1", "model.layers.10.block_sparse_moe.experts.30.w1", "model.layers.10.block_sparse_moe.experts.31.w1", "model.layers.10.block_sparse_moe.experts.32.w1", "model.layers.10.block_sparse_moe.experts.33.w1", "model.layers.10.block_sparse_moe.experts.34.w1", "model.layers.10.block_sparse_moe.experts.35.w1", "model.layers.10.block_sparse_moe.experts.36.w1", "model.layers.10.block_sparse_moe.experts.37.w1", "model.layers.10.block_sparse_moe.experts.38.w1", "model.layers.10.block_sparse_moe.experts.39.w1", "model.layers.10.block_sparse_moe.experts.40.w1", "model.layers.10.block_sparse_moe.experts.41.w1", "model.layers.10.block_sparse_moe.experts.42.w1", "model.layers.10.block_sparse_moe.experts.43.w1", "model.layers.10.block_sparse_moe.experts.44.w1", "model.layers.10.block_sparse_moe.experts.45.w1", "model.layers.10.block_sparse_moe.experts.46.w1", "model.layers.10.block_sparse_moe.experts.47.w1", "model.layers.10.block_sparse_moe.experts.48.w1", "model.layers.10.block_sparse_moe.experts.49.w1", "model.layers.10.block_sparse_moe.experts.50.w1", "model.layers.10.block_sparse_moe.experts.51.w1", "model.layers.10.block_sparse_moe.experts.52.w1", "model.layers.10.block_sparse_moe.experts.53.w1", "model.layers.10.block_sparse_moe.experts.54.w1", "model.layers.10.block_sparse_moe.experts.55.w1", "model.layers.10.block_sparse_moe.experts.56.w1", "model.layers.10.block_sparse_moe.experts.57.w1", "model.layers.10.block_sparse_moe.experts.58.w1", "model.layers.10.block_sparse_moe.experts.59.w1", "model.layers.10.block_sparse_moe.experts.60.w1", "model.layers.10.block_sparse_moe.experts.61.w1", "model.layers.10.block_sparse_moe.experts.62.w1", "model.layers.10.block_sparse_moe.experts.63.w1", "model.layers.10.block_sparse_moe.experts.64.w1", "model.layers.10.block_sparse_moe.experts.65.w1", "model.layers.10.block_sparse_moe.experts.66.w1", "model.layers.10.block_sparse_moe.experts.67.w1", "model.layers.10.block_sparse_moe.experts.68.w1", "model.layers.10.block_sparse_moe.experts.69.w1", "model.layers.10.block_sparse_moe.experts.70.w1", "model.layers.10.block_sparse_moe.experts.71.w1", "model.layers.10.block_sparse_moe.experts.72.w1", "model.layers.10.block_sparse_moe.experts.73.w1", "model.layers.10.block_sparse_moe.experts.74.w1", "model.layers.10.block_sparse_moe.experts.75.w1", "model.layers.10.block_sparse_moe.experts.76.w1", "model.layers.10.block_sparse_moe.experts.77.w1", "model.layers.10.block_sparse_moe.experts.78.w1", "model.layers.10.block_sparse_moe.experts.79.w1", "model.layers.10.block_sparse_moe.experts.80.w1", "model.layers.10.block_sparse_moe.experts.81.w1", "model.layers.10.block_sparse_moe.experts.82.w1", "model.layers.10.block_sparse_moe.experts.83.w1", "model.layers.10.block_sparse_moe.experts.84.w1", "model.layers.10.block_sparse_moe.experts.85.w1", "model.layers.10.block_sparse_moe.experts.86.w1", "model.layers.10.block_sparse_moe.experts.87.w1", "model.layers.10.block_sparse_moe.experts.88.w1", "model.layers.10.block_sparse_moe.experts.89.w1", "model.layers.10.block_sparse_moe.experts.90.w1", "model.layers.10.block_sparse_moe.experts.91.w1", "model.layers.10.block_sparse_moe.experts.92.w1", "model.layers.10.block_sparse_moe.experts.93.w1", "model.layers.10.block_sparse_moe.experts.94.w1", "model.layers.10.block_sparse_moe.experts.95.w1", "model.layers.10.block_sparse_moe.experts.96.w1", "model.layers.10.block_sparse_moe.experts.97.w1", "model.layers.10.block_sparse_moe.experts.98.w1", "model.layers.10.block_sparse_moe.experts.99.w1", "model.layers.10.block_sparse_moe.experts.100.w1", "model.layers.10.block_sparse_moe.experts.101.w1", "model.layers.10.block_sparse_moe.experts.102.w1", "model.layers.10.block_sparse_moe.experts.103.w1", "model.layers.10.block_sparse_moe.experts.104.w1", "model.layers.10.block_sparse_moe.experts.105.w1", "model.layers.10.block_sparse_moe.experts.106.w1", "model.layers.10.block_sparse_moe.experts.107.w1", "model.layers.10.block_sparse_moe.experts.108.w1", "model.layers.10.block_sparse_moe.experts.109.w1", "model.layers.10.block_sparse_moe.experts.110.w1", "model.layers.10.block_sparse_moe.experts.111.w1", "model.layers.10.block_sparse_moe.experts.112.w1", "model.layers.10.block_sparse_moe.experts.113.w1", "model.layers.10.block_sparse_moe.experts.114.w1", "model.layers.10.block_sparse_moe.experts.115.w1", "model.layers.10.block_sparse_moe.experts.116.w1", "model.layers.10.block_sparse_moe.experts.117.w1", "model.layers.10.block_sparse_moe.experts.118.w1", "model.layers.10.block_sparse_moe.experts.119.w1", "model.layers.10.block_sparse_moe.experts.120.w1", "model.layers.10.block_sparse_moe.experts.121.w1", "model.layers.10.block_sparse_moe.experts.122.w1", "model.layers.10.block_sparse_moe.experts.123.w1", "model.layers.10.block_sparse_moe.experts.124.w1", "model.layers.10.block_sparse_moe.experts.125.w1", "model.layers.10.block_sparse_moe.experts.126.w1", "model.layers.10.block_sparse_moe.experts.127.w1", "model.layers.10.block_sparse_moe.experts.128.w1", "model.layers.10.block_sparse_moe.experts.129.w1", "model.layers.10.block_sparse_moe.experts.130.w1", "model.layers.10.block_sparse_moe.experts.131.w1", "model.layers.10.block_sparse_moe.experts.132.w1", "model.layers.10.block_sparse_moe.experts.133.w1", "model.layers.10.block_sparse_moe.experts.134.w1", "model.layers.10.block_sparse_moe.experts.135.w1", "model.layers.10.block_sparse_moe.experts.136.w1", "model.layers.10.block_sparse_moe.experts.137.w1", "model.layers.10.block_sparse_moe.experts.138.w1", "model.layers.10.block_sparse_moe.experts.139.w1", "model.layers.10.block_sparse_moe.experts.140.w1", "model.layers.10.block_sparse_moe.experts.141.w1", "model.layers.10.block_sparse_moe.experts.142.w1", "model.layers.10.block_sparse_moe.experts.143.w1", "model.layers.10.block_sparse_moe.experts.144.w1", "model.layers.10.block_sparse_moe.experts.145.w1", "model.layers.10.block_sparse_moe.experts.146.w1", "model.layers.10.block_sparse_moe.experts.147.w1", "model.layers.10.block_sparse_moe.experts.148.w1", "model.layers.10.block_sparse_moe.experts.149.w1", "model.layers.10.block_sparse_moe.experts.150.w1", "model.layers.10.block_sparse_moe.experts.151.w1", "model.layers.10.block_sparse_moe.experts.152.w1", "model.layers.10.block_sparse_moe.experts.153.w1", "model.layers.10.block_sparse_moe.experts.154.w1", "model.layers.10.block_sparse_moe.experts.155.w1", "model.layers.10.block_sparse_moe.experts.156.w1", "model.layers.10.block_sparse_moe.experts.157.w1", "model.layers.10.block_sparse_moe.experts.158.w1", "model.layers.10.block_sparse_moe.experts.159.w1", "model.layers.10.block_sparse_moe.experts.160.w1", "model.layers.10.block_sparse_moe.experts.161.w1", "model.layers.10.block_sparse_moe.experts.162.w1", "model.layers.10.block_sparse_moe.experts.163.w1", "model.layers.10.block_sparse_moe.experts.164.w1", "model.layers.10.block_sparse_moe.experts.165.w1", "model.layers.10.block_sparse_moe.experts.166.w1", "model.layers.10.block_sparse_moe.experts.167.w1", "model.layers.10.block_sparse_moe.experts.168.w1", "model.layers.10.block_sparse_moe.experts.169.w1", "model.layers.10.block_sparse_moe.experts.170.w1", "model.layers.10.block_sparse_moe.experts.171.w1", "model.layers.10.block_sparse_moe.experts.172.w1", "model.layers.10.block_sparse_moe.experts.173.w1", "model.layers.10.block_sparse_moe.experts.174.w1", "model.layers.10.block_sparse_moe.experts.175.w1", "model.layers.10.block_sparse_moe.experts.176.w1", "model.layers.10.block_sparse_moe.experts.177.w1", "model.layers.10.block_sparse_moe.experts.178.w1", "model.layers.10.block_sparse_moe.experts.179.w1", "model.layers.10.block_sparse_moe.experts.180.w1", "model.layers.10.block_sparse_moe.experts.181.w1", "model.layers.10.block_sparse_moe.experts.182.w1", "model.layers.10.block_sparse_moe.experts.183.w1", "model.layers.10.block_sparse_moe.experts.184.w1", "model.layers.10.block_sparse_moe.experts.185.w1", "model.layers.10.block_sparse_moe.experts.186.w1", "model.layers.10.block_sparse_moe.experts.187.w1", "model.layers.10.block_sparse_moe.experts.188.w1", "model.layers.10.block_sparse_moe.experts.189.w1", "model.layers.10.block_sparse_moe.experts.190.w1", "model.layers.10.block_sparse_moe.experts.191.w1", "model.layers.10.block_sparse_moe.experts.192.w1", "model.layers.10.block_sparse_moe.experts.193.w1", "model.layers.10.block_sparse_moe.experts.194.w1", "model.layers.10.block_sparse_moe.experts.195.w1", "model.layers.10.block_sparse_moe.experts.196.w1", "model.layers.10.block_sparse_moe.experts.197.w1", "model.layers.10.block_sparse_moe.experts.198.w1", "model.layers.10.block_sparse_moe.experts.199.w1", "model.layers.10.block_sparse_moe.experts.200.w1", "model.layers.10.block_sparse_moe.experts.201.w1", "model.layers.10.block_sparse_moe.experts.202.w1", "model.layers.10.block_sparse_moe.experts.203.w1", "model.layers.10.block_sparse_moe.experts.204.w1", "model.layers.10.block_sparse_moe.experts.205.w1", "model.layers.10.block_sparse_moe.experts.206.w1", "model.layers.10.block_sparse_moe.experts.207.w1", "model.layers.10.block_sparse_moe.experts.208.w1", "model.layers.10.block_sparse_moe.experts.209.w1", "model.layers.10.block_sparse_moe.experts.210.w1", "model.layers.10.block_sparse_moe.experts.211.w1", "model.layers.10.block_sparse_moe.experts.212.w1", "model.layers.10.block_sparse_moe.experts.213.w1", "model.layers.10.block_sparse_moe.experts.214.w1", "model.layers.10.block_sparse_moe.experts.215.w1", "model.layers.10.block_sparse_moe.experts.216.w1", "model.layers.10.block_sparse_moe.experts.217.w1", "model.layers.10.block_sparse_moe.experts.218.w1", "model.layers.10.block_sparse_moe.experts.219.w1", "model.layers.10.block_sparse_moe.experts.220.w1", "model.layers.10.block_sparse_moe.experts.221.w1", "model.layers.10.block_sparse_moe.experts.222.w1", "model.layers.10.block_sparse_moe.experts.223.w1", "model.layers.10.block_sparse_moe.experts.224.w1", "model.layers.10.block_sparse_moe.experts.225.w1", "model.layers.10.block_sparse_moe.experts.226.w1", "model.layers.10.block_sparse_moe.experts.227.w1", "model.layers.10.block_sparse_moe.experts.228.w1", "model.layers.10.block_sparse_moe.experts.229.w1", "model.layers.10.block_sparse_moe.experts.230.w1", "model.layers.10.block_sparse_moe.experts.231.w1", "model.layers.10.block_sparse_moe.experts.232.w1", "model.layers.10.block_sparse_moe.experts.233.w1", "model.layers.10.block_sparse_moe.experts.234.w1", "model.layers.10.block_sparse_moe.experts.235.w1", "model.layers.10.block_sparse_moe.experts.236.w1", "model.layers.10.block_sparse_moe.experts.237.w1", "model.layers.10.block_sparse_moe.experts.238.w1", "model.layers.10.block_sparse_moe.experts.239.w1", "model.layers.10.block_sparse_moe.experts.240.w1", "model.layers.10.block_sparse_moe.experts.241.w1", "model.layers.10.block_sparse_moe.experts.242.w1", "model.layers.10.block_sparse_moe.experts.243.w1", "model.layers.10.block_sparse_moe.experts.244.w1", "model.layers.10.block_sparse_moe.experts.245.w1", "model.layers.10.block_sparse_moe.experts.246.w1", "model.layers.10.block_sparse_moe.experts.247.w1", "model.layers.10.block_sparse_moe.experts.248.w1", "model.layers.10.block_sparse_moe.experts.249.w1", "model.layers.10.block_sparse_moe.experts.250.w1", "model.layers.10.block_sparse_moe.experts.251.w1", "model.layers.10.block_sparse_moe.experts.252.w1", "model.layers.10.block_sparse_moe.experts.253.w1", "model.layers.10.block_sparse_moe.experts.254.w1", "model.layers.10.block_sparse_moe.experts.255.w1", "model.layers.10.block_sparse_moe.experts.0.w3", "model.layers.10.block_sparse_moe.experts.1.w3", "model.layers.10.block_sparse_moe.experts.2.w3", "model.layers.10.block_sparse_moe.experts.3.w3", "model.layers.10.block_sparse_moe.experts.4.w3", "model.layers.10.block_sparse_moe.experts.5.w3", "model.layers.10.block_sparse_moe.experts.6.w3", "model.layers.10.block_sparse_moe.experts.7.w3", "model.layers.10.block_sparse_moe.experts.8.w3", "model.layers.10.block_sparse_moe.experts.9.w3", "model.layers.10.block_sparse_moe.experts.10.w3", "model.layers.10.block_sparse_moe.experts.11.w3", "model.layers.10.block_sparse_moe.experts.12.w3", "model.layers.10.block_sparse_moe.experts.13.w3", "model.layers.10.block_sparse_moe.experts.14.w3", "model.layers.10.block_sparse_moe.experts.15.w3", "model.layers.10.block_sparse_moe.experts.16.w3", "model.layers.10.block_sparse_moe.experts.17.w3", "model.layers.10.block_sparse_moe.experts.18.w3", "model.layers.10.block_sparse_moe.experts.19.w3", "model.layers.10.block_sparse_moe.experts.20.w3", "model.layers.10.block_sparse_moe.experts.21.w3", "model.layers.10.block_sparse_moe.experts.22.w3", "model.layers.10.block_sparse_moe.experts.23.w3", "model.layers.10.block_sparse_moe.experts.24.w3", "model.layers.10.block_sparse_moe.experts.25.w3", "model.layers.10.block_sparse_moe.experts.26.w3", "model.layers.10.block_sparse_moe.experts.27.w3", "model.layers.10.block_sparse_moe.experts.28.w3", "model.layers.10.block_sparse_moe.experts.29.w3", "model.layers.10.block_sparse_moe.experts.30.w3", "model.layers.10.block_sparse_moe.experts.31.w3", "model.layers.10.block_sparse_moe.experts.32.w3", "model.layers.10.block_sparse_moe.experts.33.w3", "model.layers.10.block_sparse_moe.experts.34.w3", "model.layers.10.block_sparse_moe.experts.35.w3", "model.layers.10.block_sparse_moe.experts.36.w3", "model.layers.10.block_sparse_moe.experts.37.w3", "model.layers.10.block_sparse_moe.experts.38.w3", "model.layers.10.block_sparse_moe.experts.39.w3", "model.layers.10.block_sparse_moe.experts.40.w3", "model.layers.10.block_sparse_moe.experts.41.w3", "model.layers.10.block_sparse_moe.experts.42.w3", "model.layers.10.block_sparse_moe.experts.43.w3", "model.layers.10.block_sparse_moe.experts.44.w3", "model.layers.10.block_sparse_moe.experts.45.w3", "model.layers.10.block_sparse_moe.experts.46.w3", "model.layers.10.block_sparse_moe.experts.47.w3", "model.layers.10.block_sparse_moe.experts.48.w3", "model.layers.10.block_sparse_moe.experts.49.w3", "model.layers.10.block_sparse_moe.experts.50.w3", "model.layers.10.block_sparse_moe.experts.51.w3", "model.layers.10.block_sparse_moe.experts.52.w3", "model.layers.10.block_sparse_moe.experts.53.w3", "model.layers.10.block_sparse_moe.experts.54.w3", "model.layers.10.block_sparse_moe.experts.55.w3", "model.layers.10.block_sparse_moe.experts.56.w3", "model.layers.10.block_sparse_moe.experts.57.w3", "model.layers.10.block_sparse_moe.experts.58.w3", "model.layers.10.block_sparse_moe.experts.59.w3", "model.layers.10.block_sparse_moe.experts.60.w3", "model.layers.10.block_sparse_moe.experts.61.w3", "model.layers.10.block_sparse_moe.experts.62.w3", "model.layers.10.block_sparse_moe.experts.63.w3", "model.layers.10.block_sparse_moe.experts.64.w3", "model.layers.10.block_sparse_moe.experts.65.w3", "model.layers.10.block_sparse_moe.experts.66.w3", "model.layers.10.block_sparse_moe.experts.67.w3", "model.layers.10.block_sparse_moe.experts.68.w3", "model.layers.10.block_sparse_moe.experts.69.w3", "model.layers.10.block_sparse_moe.experts.70.w3", "model.layers.10.block_sparse_moe.experts.71.w3", "model.layers.10.block_sparse_moe.experts.72.w3", "model.layers.10.block_sparse_moe.experts.73.w3", "model.layers.10.block_sparse_moe.experts.74.w3", "model.layers.10.block_sparse_moe.experts.75.w3", "model.layers.10.block_sparse_moe.experts.76.w3", "model.layers.10.block_sparse_moe.experts.77.w3", "model.layers.10.block_sparse_moe.experts.78.w3", "model.layers.10.block_sparse_moe.experts.79.w3", "model.layers.10.block_sparse_moe.experts.80.w3", "model.layers.10.block_sparse_moe.experts.81.w3", "model.layers.10.block_sparse_moe.experts.82.w3", "model.layers.10.block_sparse_moe.experts.83.w3", "model.layers.10.block_sparse_moe.experts.84.w3", "model.layers.10.block_sparse_moe.experts.85.w3", "model.layers.10.block_sparse_moe.experts.86.w3", "model.layers.10.block_sparse_moe.experts.87.w3", "model.layers.10.block_sparse_moe.experts.88.w3", "model.layers.10.block_sparse_moe.experts.89.w3", "model.layers.10.block_sparse_moe.experts.90.w3", "model.layers.10.block_sparse_moe.experts.91.w3", "model.layers.10.block_sparse_moe.experts.92.w3", "model.layers.10.block_sparse_moe.experts.93.w3", "model.layers.10.block_sparse_moe.experts.94.w3", "model.layers.10.block_sparse_moe.experts.95.w3", "model.layers.10.block_sparse_moe.experts.96.w3", "model.layers.10.block_sparse_moe.experts.97.w3", "model.layers.10.block_sparse_moe.experts.98.w3", "model.layers.10.block_sparse_moe.experts.99.w3", "model.layers.10.block_sparse_moe.experts.100.w3", "model.layers.10.block_sparse_moe.experts.101.w3", "model.layers.10.block_sparse_moe.experts.102.w3", "model.layers.10.block_sparse_moe.experts.103.w3", "model.layers.10.block_sparse_moe.experts.104.w3", "model.layers.10.block_sparse_moe.experts.105.w3", "model.layers.10.block_sparse_moe.experts.106.w3", "model.layers.10.block_sparse_moe.experts.107.w3", "model.layers.10.block_sparse_moe.experts.108.w3", "model.layers.10.block_sparse_moe.experts.109.w3", "model.layers.10.block_sparse_moe.experts.110.w3", "model.layers.10.block_sparse_moe.experts.111.w3", "model.layers.10.block_sparse_moe.experts.112.w3", "model.layers.10.block_sparse_moe.experts.113.w3", "model.layers.10.block_sparse_moe.experts.114.w3", "model.layers.10.block_sparse_moe.experts.115.w3", "model.layers.10.block_sparse_moe.experts.116.w3", "model.layers.10.block_sparse_moe.experts.117.w3", "model.layers.10.block_sparse_moe.experts.118.w3", "model.layers.10.block_sparse_moe.experts.119.w3", "model.layers.10.block_sparse_moe.experts.120.w3", "model.layers.10.block_sparse_moe.experts.121.w3", "model.layers.10.block_sparse_moe.experts.122.w3", "model.layers.10.block_sparse_moe.experts.123.w3", "model.layers.10.block_sparse_moe.experts.124.w3", "model.layers.10.block_sparse_moe.experts.125.w3", "model.layers.10.block_sparse_moe.experts.126.w3", "model.layers.10.block_sparse_moe.experts.127.w3", "model.layers.10.block_sparse_moe.experts.128.w3", "model.layers.10.block_sparse_moe.experts.129.w3", "model.layers.10.block_sparse_moe.experts.130.w3", "model.layers.10.block_sparse_moe.experts.131.w3", "model.layers.10.block_sparse_moe.experts.132.w3", "model.layers.10.block_sparse_moe.experts.133.w3", "model.layers.10.block_sparse_moe.experts.134.w3", "model.layers.10.block_sparse_moe.experts.135.w3", "model.layers.10.block_sparse_moe.experts.136.w3", "model.layers.10.block_sparse_moe.experts.137.w3", "model.layers.10.block_sparse_moe.experts.138.w3", "model.layers.10.block_sparse_moe.experts.139.w3", "model.layers.10.block_sparse_moe.experts.140.w3", "model.layers.10.block_sparse_moe.experts.141.w3", "model.layers.10.block_sparse_moe.experts.142.w3", "model.layers.10.block_sparse_moe.experts.143.w3", "model.layers.10.block_sparse_moe.experts.144.w3", "model.layers.10.block_sparse_moe.experts.145.w3", "model.layers.10.block_sparse_moe.experts.146.w3", "model.layers.10.block_sparse_moe.experts.147.w3", "model.layers.10.block_sparse_moe.experts.148.w3", "model.layers.10.block_sparse_moe.experts.149.w3", "model.layers.10.block_sparse_moe.experts.150.w3", "model.layers.10.block_sparse_moe.experts.151.w3", "model.layers.10.block_sparse_moe.experts.152.w3", "model.layers.10.block_sparse_moe.experts.153.w3", "model.layers.10.block_sparse_moe.experts.154.w3", "model.layers.10.block_sparse_moe.experts.155.w3", "model.layers.10.block_sparse_moe.experts.156.w3", "model.layers.10.block_sparse_moe.experts.157.w3", "model.layers.10.block_sparse_moe.experts.158.w3", "model.layers.10.block_sparse_moe.experts.159.w3", "model.layers.10.block_sparse_moe.experts.160.w3", "model.layers.10.block_sparse_moe.experts.161.w3", "model.layers.10.block_sparse_moe.experts.162.w3", "model.layers.10.block_sparse_moe.experts.163.w3", "model.layers.10.block_sparse_moe.experts.164.w3", "model.layers.10.block_sparse_moe.experts.165.w3", "model.layers.10.block_sparse_moe.experts.166.w3", "model.layers.10.block_sparse_moe.experts.167.w3", "model.layers.10.block_sparse_moe.experts.168.w3", "model.layers.10.block_sparse_moe.experts.169.w3", "model.layers.10.block_sparse_moe.experts.170.w3", "model.layers.10.block_sparse_moe.experts.171.w3", "model.layers.10.block_sparse_moe.experts.172.w3", "model.layers.10.block_sparse_moe.experts.173.w3", "model.layers.10.block_sparse_moe.experts.174.w3", "model.layers.10.block_sparse_moe.experts.175.w3", "model.layers.10.block_sparse_moe.experts.176.w3", "model.layers.10.block_sparse_moe.experts.177.w3", "model.layers.10.block_sparse_moe.experts.178.w3", "model.layers.10.block_sparse_moe.experts.179.w3", "model.layers.10.block_sparse_moe.experts.180.w3", "model.layers.10.block_sparse_moe.experts.181.w3", "model.layers.10.block_sparse_moe.experts.182.w3", "model.layers.10.block_sparse_moe.experts.183.w3", "model.layers.10.block_sparse_moe.experts.184.w3", "model.layers.10.block_sparse_moe.experts.185.w3", "model.layers.10.block_sparse_moe.experts.186.w3", "model.layers.10.block_sparse_moe.experts.187.w3", "model.layers.10.block_sparse_moe.experts.188.w3", "model.layers.10.block_sparse_moe.experts.189.w3", "model.layers.10.block_sparse_moe.experts.190.w3", "model.layers.10.block_sparse_moe.experts.191.w3", "model.layers.10.block_sparse_moe.experts.192.w3", "model.layers.10.block_sparse_moe.experts.193.w3", "model.layers.10.block_sparse_moe.experts.194.w3", "model.layers.10.block_sparse_moe.experts.195.w3", "model.layers.10.block_sparse_moe.experts.196.w3", "model.layers.10.block_sparse_moe.experts.197.w3", "model.layers.10.block_sparse_moe.experts.198.w3", "model.layers.10.block_sparse_moe.experts.199.w3", "model.layers.10.block_sparse_moe.experts.200.w3", "model.layers.10.block_sparse_moe.experts.201.w3", "model.layers.10.block_sparse_moe.experts.202.w3", "model.layers.10.block_sparse_moe.experts.203.w3", "model.layers.10.block_sparse_moe.experts.204.w3", "model.layers.10.block_sparse_moe.experts.205.w3", "model.layers.10.block_sparse_moe.experts.206.w3", "model.layers.10.block_sparse_moe.experts.207.w3", "model.layers.10.block_sparse_moe.experts.208.w3", "model.layers.10.block_sparse_moe.experts.209.w3", "model.layers.10.block_sparse_moe.experts.210.w3", "model.layers.10.block_sparse_moe.experts.211.w3", "model.layers.10.block_sparse_moe.experts.212.w3", "model.layers.10.block_sparse_moe.experts.213.w3", "model.layers.10.block_sparse_moe.experts.214.w3", "model.layers.10.block_sparse_moe.experts.215.w3", "model.layers.10.block_sparse_moe.experts.216.w3", "model.layers.10.block_sparse_moe.experts.217.w3", "model.layers.10.block_sparse_moe.experts.218.w3", "model.layers.10.block_sparse_moe.experts.219.w3", "model.layers.10.block_sparse_moe.experts.220.w3", "model.layers.10.block_sparse_moe.experts.221.w3", "model.layers.10.block_sparse_moe.experts.222.w3", "model.layers.10.block_sparse_moe.experts.223.w3", "model.layers.10.block_sparse_moe.experts.224.w3", "model.layers.10.block_sparse_moe.experts.225.w3", "model.layers.10.block_sparse_moe.experts.226.w3", "model.layers.10.block_sparse_moe.experts.227.w3", "model.layers.10.block_sparse_moe.experts.228.w3", "model.layers.10.block_sparse_moe.experts.229.w3", "model.layers.10.block_sparse_moe.experts.230.w3", "model.layers.10.block_sparse_moe.experts.231.w3", "model.layers.10.block_sparse_moe.experts.232.w3", "model.layers.10.block_sparse_moe.experts.233.w3", "model.layers.10.block_sparse_moe.experts.234.w3", "model.layers.10.block_sparse_moe.experts.235.w3", "model.layers.10.block_sparse_moe.experts.236.w3", "model.layers.10.block_sparse_moe.experts.237.w3", "model.layers.10.block_sparse_moe.experts.238.w3", "model.layers.10.block_sparse_moe.experts.239.w3", "model.layers.10.block_sparse_moe.experts.240.w3", "model.layers.10.block_sparse_moe.experts.241.w3", "model.layers.10.block_sparse_moe.experts.242.w3", "model.layers.10.block_sparse_moe.experts.243.w3", "model.layers.10.block_sparse_moe.experts.244.w3", "model.layers.10.block_sparse_moe.experts.245.w3", "model.layers.10.block_sparse_moe.experts.246.w3", "model.layers.10.block_sparse_moe.experts.247.w3", "model.layers.10.block_sparse_moe.experts.248.w3", "model.layers.10.block_sparse_moe.experts.249.w3", "model.layers.10.block_sparse_moe.experts.250.w3", "model.layers.10.block_sparse_moe.experts.251.w3", "model.layers.10.block_sparse_moe.experts.252.w3", "model.layers.10.block_sparse_moe.experts.253.w3", "model.layers.10.block_sparse_moe.experts.254.w3", "model.layers.10.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0008305799216032361, "dbits": 2415919104 } ] }, { "idx": 54, "layers": [ "model.layers.10.block_sparse_moe.experts.0.w2", "model.layers.10.block_sparse_moe.experts.1.w2", "model.layers.10.block_sparse_moe.experts.2.w2", "model.layers.10.block_sparse_moe.experts.3.w2", "model.layers.10.block_sparse_moe.experts.4.w2", "model.layers.10.block_sparse_moe.experts.5.w2", "model.layers.10.block_sparse_moe.experts.6.w2", "model.layers.10.block_sparse_moe.experts.7.w2", "model.layers.10.block_sparse_moe.experts.8.w2", "model.layers.10.block_sparse_moe.experts.9.w2", "model.layers.10.block_sparse_moe.experts.10.w2", "model.layers.10.block_sparse_moe.experts.11.w2", "model.layers.10.block_sparse_moe.experts.12.w2", "model.layers.10.block_sparse_moe.experts.13.w2", "model.layers.10.block_sparse_moe.experts.14.w2", "model.layers.10.block_sparse_moe.experts.15.w2", "model.layers.10.block_sparse_moe.experts.16.w2", "model.layers.10.block_sparse_moe.experts.17.w2", "model.layers.10.block_sparse_moe.experts.18.w2", "model.layers.10.block_sparse_moe.experts.19.w2", "model.layers.10.block_sparse_moe.experts.20.w2", "model.layers.10.block_sparse_moe.experts.21.w2", "model.layers.10.block_sparse_moe.experts.22.w2", "model.layers.10.block_sparse_moe.experts.23.w2", "model.layers.10.block_sparse_moe.experts.24.w2", "model.layers.10.block_sparse_moe.experts.25.w2", "model.layers.10.block_sparse_moe.experts.26.w2", "model.layers.10.block_sparse_moe.experts.27.w2", "model.layers.10.block_sparse_moe.experts.28.w2", "model.layers.10.block_sparse_moe.experts.29.w2", "model.layers.10.block_sparse_moe.experts.30.w2", "model.layers.10.block_sparse_moe.experts.31.w2", "model.layers.10.block_sparse_moe.experts.32.w2", "model.layers.10.block_sparse_moe.experts.33.w2", "model.layers.10.block_sparse_moe.experts.34.w2", "model.layers.10.block_sparse_moe.experts.35.w2", "model.layers.10.block_sparse_moe.experts.36.w2", "model.layers.10.block_sparse_moe.experts.37.w2", "model.layers.10.block_sparse_moe.experts.38.w2", "model.layers.10.block_sparse_moe.experts.39.w2", "model.layers.10.block_sparse_moe.experts.40.w2", "model.layers.10.block_sparse_moe.experts.41.w2", "model.layers.10.block_sparse_moe.experts.42.w2", "model.layers.10.block_sparse_moe.experts.43.w2", "model.layers.10.block_sparse_moe.experts.44.w2", "model.layers.10.block_sparse_moe.experts.45.w2", "model.layers.10.block_sparse_moe.experts.46.w2", "model.layers.10.block_sparse_moe.experts.47.w2", "model.layers.10.block_sparse_moe.experts.48.w2", "model.layers.10.block_sparse_moe.experts.49.w2", "model.layers.10.block_sparse_moe.experts.50.w2", "model.layers.10.block_sparse_moe.experts.51.w2", "model.layers.10.block_sparse_moe.experts.52.w2", "model.layers.10.block_sparse_moe.experts.53.w2", "model.layers.10.block_sparse_moe.experts.54.w2", "model.layers.10.block_sparse_moe.experts.55.w2", "model.layers.10.block_sparse_moe.experts.56.w2", "model.layers.10.block_sparse_moe.experts.57.w2", "model.layers.10.block_sparse_moe.experts.58.w2", "model.layers.10.block_sparse_moe.experts.59.w2", "model.layers.10.block_sparse_moe.experts.60.w2", "model.layers.10.block_sparse_moe.experts.61.w2", "model.layers.10.block_sparse_moe.experts.62.w2", "model.layers.10.block_sparse_moe.experts.63.w2", "model.layers.10.block_sparse_moe.experts.64.w2", "model.layers.10.block_sparse_moe.experts.65.w2", "model.layers.10.block_sparse_moe.experts.66.w2", "model.layers.10.block_sparse_moe.experts.67.w2", "model.layers.10.block_sparse_moe.experts.68.w2", "model.layers.10.block_sparse_moe.experts.69.w2", "model.layers.10.block_sparse_moe.experts.70.w2", "model.layers.10.block_sparse_moe.experts.71.w2", "model.layers.10.block_sparse_moe.experts.72.w2", "model.layers.10.block_sparse_moe.experts.73.w2", "model.layers.10.block_sparse_moe.experts.74.w2", "model.layers.10.block_sparse_moe.experts.75.w2", "model.layers.10.block_sparse_moe.experts.76.w2", "model.layers.10.block_sparse_moe.experts.77.w2", "model.layers.10.block_sparse_moe.experts.78.w2", "model.layers.10.block_sparse_moe.experts.79.w2", "model.layers.10.block_sparse_moe.experts.80.w2", "model.layers.10.block_sparse_moe.experts.81.w2", "model.layers.10.block_sparse_moe.experts.82.w2", "model.layers.10.block_sparse_moe.experts.83.w2", "model.layers.10.block_sparse_moe.experts.84.w2", "model.layers.10.block_sparse_moe.experts.85.w2", "model.layers.10.block_sparse_moe.experts.86.w2", "model.layers.10.block_sparse_moe.experts.87.w2", "model.layers.10.block_sparse_moe.experts.88.w2", "model.layers.10.block_sparse_moe.experts.89.w2", "model.layers.10.block_sparse_moe.experts.90.w2", "model.layers.10.block_sparse_moe.experts.91.w2", "model.layers.10.block_sparse_moe.experts.92.w2", "model.layers.10.block_sparse_moe.experts.93.w2", "model.layers.10.block_sparse_moe.experts.94.w2", "model.layers.10.block_sparse_moe.experts.95.w2", "model.layers.10.block_sparse_moe.experts.96.w2", "model.layers.10.block_sparse_moe.experts.97.w2", "model.layers.10.block_sparse_moe.experts.98.w2", "model.layers.10.block_sparse_moe.experts.99.w2", "model.layers.10.block_sparse_moe.experts.100.w2", "model.layers.10.block_sparse_moe.experts.101.w2", "model.layers.10.block_sparse_moe.experts.102.w2", "model.layers.10.block_sparse_moe.experts.103.w2", "model.layers.10.block_sparse_moe.experts.104.w2", "model.layers.10.block_sparse_moe.experts.105.w2", "model.layers.10.block_sparse_moe.experts.106.w2", "model.layers.10.block_sparse_moe.experts.107.w2", "model.layers.10.block_sparse_moe.experts.108.w2", "model.layers.10.block_sparse_moe.experts.109.w2", "model.layers.10.block_sparse_moe.experts.110.w2", "model.layers.10.block_sparse_moe.experts.111.w2", "model.layers.10.block_sparse_moe.experts.112.w2", "model.layers.10.block_sparse_moe.experts.113.w2", "model.layers.10.block_sparse_moe.experts.114.w2", "model.layers.10.block_sparse_moe.experts.115.w2", "model.layers.10.block_sparse_moe.experts.116.w2", "model.layers.10.block_sparse_moe.experts.117.w2", "model.layers.10.block_sparse_moe.experts.118.w2", "model.layers.10.block_sparse_moe.experts.119.w2", "model.layers.10.block_sparse_moe.experts.120.w2", "model.layers.10.block_sparse_moe.experts.121.w2", "model.layers.10.block_sparse_moe.experts.122.w2", "model.layers.10.block_sparse_moe.experts.123.w2", "model.layers.10.block_sparse_moe.experts.124.w2", "model.layers.10.block_sparse_moe.experts.125.w2", "model.layers.10.block_sparse_moe.experts.126.w2", "model.layers.10.block_sparse_moe.experts.127.w2", "model.layers.10.block_sparse_moe.experts.128.w2", "model.layers.10.block_sparse_moe.experts.129.w2", "model.layers.10.block_sparse_moe.experts.130.w2", "model.layers.10.block_sparse_moe.experts.131.w2", "model.layers.10.block_sparse_moe.experts.132.w2", "model.layers.10.block_sparse_moe.experts.133.w2", "model.layers.10.block_sparse_moe.experts.134.w2", "model.layers.10.block_sparse_moe.experts.135.w2", "model.layers.10.block_sparse_moe.experts.136.w2", "model.layers.10.block_sparse_moe.experts.137.w2", "model.layers.10.block_sparse_moe.experts.138.w2", "model.layers.10.block_sparse_moe.experts.139.w2", "model.layers.10.block_sparse_moe.experts.140.w2", "model.layers.10.block_sparse_moe.experts.141.w2", "model.layers.10.block_sparse_moe.experts.142.w2", "model.layers.10.block_sparse_moe.experts.143.w2", "model.layers.10.block_sparse_moe.experts.144.w2", "model.layers.10.block_sparse_moe.experts.145.w2", "model.layers.10.block_sparse_moe.experts.146.w2", "model.layers.10.block_sparse_moe.experts.147.w2", "model.layers.10.block_sparse_moe.experts.148.w2", "model.layers.10.block_sparse_moe.experts.149.w2", "model.layers.10.block_sparse_moe.experts.150.w2", "model.layers.10.block_sparse_moe.experts.151.w2", "model.layers.10.block_sparse_moe.experts.152.w2", "model.layers.10.block_sparse_moe.experts.153.w2", "model.layers.10.block_sparse_moe.experts.154.w2", "model.layers.10.block_sparse_moe.experts.155.w2", "model.layers.10.block_sparse_moe.experts.156.w2", "model.layers.10.block_sparse_moe.experts.157.w2", "model.layers.10.block_sparse_moe.experts.158.w2", "model.layers.10.block_sparse_moe.experts.159.w2", "model.layers.10.block_sparse_moe.experts.160.w2", "model.layers.10.block_sparse_moe.experts.161.w2", "model.layers.10.block_sparse_moe.experts.162.w2", "model.layers.10.block_sparse_moe.experts.163.w2", "model.layers.10.block_sparse_moe.experts.164.w2", "model.layers.10.block_sparse_moe.experts.165.w2", "model.layers.10.block_sparse_moe.experts.166.w2", "model.layers.10.block_sparse_moe.experts.167.w2", "model.layers.10.block_sparse_moe.experts.168.w2", "model.layers.10.block_sparse_moe.experts.169.w2", "model.layers.10.block_sparse_moe.experts.170.w2", "model.layers.10.block_sparse_moe.experts.171.w2", "model.layers.10.block_sparse_moe.experts.172.w2", "model.layers.10.block_sparse_moe.experts.173.w2", "model.layers.10.block_sparse_moe.experts.174.w2", "model.layers.10.block_sparse_moe.experts.175.w2", "model.layers.10.block_sparse_moe.experts.176.w2", "model.layers.10.block_sparse_moe.experts.177.w2", "model.layers.10.block_sparse_moe.experts.178.w2", "model.layers.10.block_sparse_moe.experts.179.w2", "model.layers.10.block_sparse_moe.experts.180.w2", "model.layers.10.block_sparse_moe.experts.181.w2", "model.layers.10.block_sparse_moe.experts.182.w2", "model.layers.10.block_sparse_moe.experts.183.w2", "model.layers.10.block_sparse_moe.experts.184.w2", "model.layers.10.block_sparse_moe.experts.185.w2", "model.layers.10.block_sparse_moe.experts.186.w2", "model.layers.10.block_sparse_moe.experts.187.w2", "model.layers.10.block_sparse_moe.experts.188.w2", "model.layers.10.block_sparse_moe.experts.189.w2", "model.layers.10.block_sparse_moe.experts.190.w2", "model.layers.10.block_sparse_moe.experts.191.w2", "model.layers.10.block_sparse_moe.experts.192.w2", "model.layers.10.block_sparse_moe.experts.193.w2", "model.layers.10.block_sparse_moe.experts.194.w2", "model.layers.10.block_sparse_moe.experts.195.w2", "model.layers.10.block_sparse_moe.experts.196.w2", "model.layers.10.block_sparse_moe.experts.197.w2", "model.layers.10.block_sparse_moe.experts.198.w2", "model.layers.10.block_sparse_moe.experts.199.w2", "model.layers.10.block_sparse_moe.experts.200.w2", "model.layers.10.block_sparse_moe.experts.201.w2", "model.layers.10.block_sparse_moe.experts.202.w2", "model.layers.10.block_sparse_moe.experts.203.w2", "model.layers.10.block_sparse_moe.experts.204.w2", "model.layers.10.block_sparse_moe.experts.205.w2", "model.layers.10.block_sparse_moe.experts.206.w2", "model.layers.10.block_sparse_moe.experts.207.w2", "model.layers.10.block_sparse_moe.experts.208.w2", "model.layers.10.block_sparse_moe.experts.209.w2", "model.layers.10.block_sparse_moe.experts.210.w2", "model.layers.10.block_sparse_moe.experts.211.w2", "model.layers.10.block_sparse_moe.experts.212.w2", "model.layers.10.block_sparse_moe.experts.213.w2", "model.layers.10.block_sparse_moe.experts.214.w2", "model.layers.10.block_sparse_moe.experts.215.w2", "model.layers.10.block_sparse_moe.experts.216.w2", "model.layers.10.block_sparse_moe.experts.217.w2", "model.layers.10.block_sparse_moe.experts.218.w2", "model.layers.10.block_sparse_moe.experts.219.w2", "model.layers.10.block_sparse_moe.experts.220.w2", "model.layers.10.block_sparse_moe.experts.221.w2", "model.layers.10.block_sparse_moe.experts.222.w2", "model.layers.10.block_sparse_moe.experts.223.w2", "model.layers.10.block_sparse_moe.experts.224.w2", "model.layers.10.block_sparse_moe.experts.225.w2", "model.layers.10.block_sparse_moe.experts.226.w2", "model.layers.10.block_sparse_moe.experts.227.w2", "model.layers.10.block_sparse_moe.experts.228.w2", "model.layers.10.block_sparse_moe.experts.229.w2", "model.layers.10.block_sparse_moe.experts.230.w2", "model.layers.10.block_sparse_moe.experts.231.w2", "model.layers.10.block_sparse_moe.experts.232.w2", "model.layers.10.block_sparse_moe.experts.233.w2", "model.layers.10.block_sparse_moe.experts.234.w2", "model.layers.10.block_sparse_moe.experts.235.w2", "model.layers.10.block_sparse_moe.experts.236.w2", "model.layers.10.block_sparse_moe.experts.237.w2", "model.layers.10.block_sparse_moe.experts.238.w2", "model.layers.10.block_sparse_moe.experts.239.w2", "model.layers.10.block_sparse_moe.experts.240.w2", "model.layers.10.block_sparse_moe.experts.241.w2", "model.layers.10.block_sparse_moe.experts.242.w2", "model.layers.10.block_sparse_moe.experts.243.w2", "model.layers.10.block_sparse_moe.experts.244.w2", "model.layers.10.block_sparse_moe.experts.245.w2", "model.layers.10.block_sparse_moe.experts.246.w2", "model.layers.10.block_sparse_moe.experts.247.w2", "model.layers.10.block_sparse_moe.experts.248.w2", "model.layers.10.block_sparse_moe.experts.249.w2", "model.layers.10.block_sparse_moe.experts.250.w2", "model.layers.10.block_sparse_moe.experts.251.w2", "model.layers.10.block_sparse_moe.experts.252.w2", "model.layers.10.block_sparse_moe.experts.253.w2", "model.layers.10.block_sparse_moe.experts.254.w2", "model.layers.10.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0013918962329625972, "dbits": 1207959552 } ] }, { "idx": 55, "layers": [ "model.layers.11.self_attn.q_proj" ], "candidates": [ { "dkld": -0.001124504953622818, "dbits": 18874368 } ] }, { "idx": 56, "layers": [ "model.layers.11.self_attn.k_proj", "model.layers.11.self_attn.v_proj" ], "candidates": [ { "dkld": -2.561137080192566e-06, "dbits": 6291456 } ] }, { "idx": 57, "layers": [ "model.layers.11.self_attn.o_proj" ], "candidates": [ { "dkld": -0.005573122575879108, "dbits": 18874368 } ] }, { "idx": 58, "layers": [ "model.layers.11.block_sparse_moe.experts.0.w1", "model.layers.11.block_sparse_moe.experts.1.w1", "model.layers.11.block_sparse_moe.experts.2.w1", "model.layers.11.block_sparse_moe.experts.3.w1", "model.layers.11.block_sparse_moe.experts.4.w1", "model.layers.11.block_sparse_moe.experts.5.w1", "model.layers.11.block_sparse_moe.experts.6.w1", "model.layers.11.block_sparse_moe.experts.7.w1", "model.layers.11.block_sparse_moe.experts.8.w1", "model.layers.11.block_sparse_moe.experts.9.w1", "model.layers.11.block_sparse_moe.experts.10.w1", "model.layers.11.block_sparse_moe.experts.11.w1", "model.layers.11.block_sparse_moe.experts.12.w1", "model.layers.11.block_sparse_moe.experts.13.w1", "model.layers.11.block_sparse_moe.experts.14.w1", "model.layers.11.block_sparse_moe.experts.15.w1", "model.layers.11.block_sparse_moe.experts.16.w1", "model.layers.11.block_sparse_moe.experts.17.w1", "model.layers.11.block_sparse_moe.experts.18.w1", "model.layers.11.block_sparse_moe.experts.19.w1", "model.layers.11.block_sparse_moe.experts.20.w1", "model.layers.11.block_sparse_moe.experts.21.w1", "model.layers.11.block_sparse_moe.experts.22.w1", "model.layers.11.block_sparse_moe.experts.23.w1", "model.layers.11.block_sparse_moe.experts.24.w1", "model.layers.11.block_sparse_moe.experts.25.w1", "model.layers.11.block_sparse_moe.experts.26.w1", "model.layers.11.block_sparse_moe.experts.27.w1", "model.layers.11.block_sparse_moe.experts.28.w1", "model.layers.11.block_sparse_moe.experts.29.w1", "model.layers.11.block_sparse_moe.experts.30.w1", "model.layers.11.block_sparse_moe.experts.31.w1", "model.layers.11.block_sparse_moe.experts.32.w1", "model.layers.11.block_sparse_moe.experts.33.w1", "model.layers.11.block_sparse_moe.experts.34.w1", "model.layers.11.block_sparse_moe.experts.35.w1", "model.layers.11.block_sparse_moe.experts.36.w1", "model.layers.11.block_sparse_moe.experts.37.w1", "model.layers.11.block_sparse_moe.experts.38.w1", "model.layers.11.block_sparse_moe.experts.39.w1", "model.layers.11.block_sparse_moe.experts.40.w1", "model.layers.11.block_sparse_moe.experts.41.w1", "model.layers.11.block_sparse_moe.experts.42.w1", "model.layers.11.block_sparse_moe.experts.43.w1", "model.layers.11.block_sparse_moe.experts.44.w1", "model.layers.11.block_sparse_moe.experts.45.w1", "model.layers.11.block_sparse_moe.experts.46.w1", "model.layers.11.block_sparse_moe.experts.47.w1", "model.layers.11.block_sparse_moe.experts.48.w1", "model.layers.11.block_sparse_moe.experts.49.w1", "model.layers.11.block_sparse_moe.experts.50.w1", "model.layers.11.block_sparse_moe.experts.51.w1", "model.layers.11.block_sparse_moe.experts.52.w1", "model.layers.11.block_sparse_moe.experts.53.w1", "model.layers.11.block_sparse_moe.experts.54.w1", "model.layers.11.block_sparse_moe.experts.55.w1", "model.layers.11.block_sparse_moe.experts.56.w1", "model.layers.11.block_sparse_moe.experts.57.w1", "model.layers.11.block_sparse_moe.experts.58.w1", "model.layers.11.block_sparse_moe.experts.59.w1", "model.layers.11.block_sparse_moe.experts.60.w1", "model.layers.11.block_sparse_moe.experts.61.w1", "model.layers.11.block_sparse_moe.experts.62.w1", "model.layers.11.block_sparse_moe.experts.63.w1", "model.layers.11.block_sparse_moe.experts.64.w1", "model.layers.11.block_sparse_moe.experts.65.w1", "model.layers.11.block_sparse_moe.experts.66.w1", "model.layers.11.block_sparse_moe.experts.67.w1", "model.layers.11.block_sparse_moe.experts.68.w1", "model.layers.11.block_sparse_moe.experts.69.w1", "model.layers.11.block_sparse_moe.experts.70.w1", "model.layers.11.block_sparse_moe.experts.71.w1", "model.layers.11.block_sparse_moe.experts.72.w1", "model.layers.11.block_sparse_moe.experts.73.w1", "model.layers.11.block_sparse_moe.experts.74.w1", "model.layers.11.block_sparse_moe.experts.75.w1", "model.layers.11.block_sparse_moe.experts.76.w1", "model.layers.11.block_sparse_moe.experts.77.w1", "model.layers.11.block_sparse_moe.experts.78.w1", "model.layers.11.block_sparse_moe.experts.79.w1", "model.layers.11.block_sparse_moe.experts.80.w1", "model.layers.11.block_sparse_moe.experts.81.w1", "model.layers.11.block_sparse_moe.experts.82.w1", "model.layers.11.block_sparse_moe.experts.83.w1", "model.layers.11.block_sparse_moe.experts.84.w1", "model.layers.11.block_sparse_moe.experts.85.w1", "model.layers.11.block_sparse_moe.experts.86.w1", "model.layers.11.block_sparse_moe.experts.87.w1", "model.layers.11.block_sparse_moe.experts.88.w1", "model.layers.11.block_sparse_moe.experts.89.w1", "model.layers.11.block_sparse_moe.experts.90.w1", "model.layers.11.block_sparse_moe.experts.91.w1", "model.layers.11.block_sparse_moe.experts.92.w1", "model.layers.11.block_sparse_moe.experts.93.w1", "model.layers.11.block_sparse_moe.experts.94.w1", "model.layers.11.block_sparse_moe.experts.95.w1", "model.layers.11.block_sparse_moe.experts.96.w1", "model.layers.11.block_sparse_moe.experts.97.w1", "model.layers.11.block_sparse_moe.experts.98.w1", "model.layers.11.block_sparse_moe.experts.99.w1", "model.layers.11.block_sparse_moe.experts.100.w1", "model.layers.11.block_sparse_moe.experts.101.w1", "model.layers.11.block_sparse_moe.experts.102.w1", "model.layers.11.block_sparse_moe.experts.103.w1", "model.layers.11.block_sparse_moe.experts.104.w1", "model.layers.11.block_sparse_moe.experts.105.w1", "model.layers.11.block_sparse_moe.experts.106.w1", "model.layers.11.block_sparse_moe.experts.107.w1", "model.layers.11.block_sparse_moe.experts.108.w1", "model.layers.11.block_sparse_moe.experts.109.w1", "model.layers.11.block_sparse_moe.experts.110.w1", "model.layers.11.block_sparse_moe.experts.111.w1", "model.layers.11.block_sparse_moe.experts.112.w1", "model.layers.11.block_sparse_moe.experts.113.w1", "model.layers.11.block_sparse_moe.experts.114.w1", "model.layers.11.block_sparse_moe.experts.115.w1", "model.layers.11.block_sparse_moe.experts.116.w1", "model.layers.11.block_sparse_moe.experts.117.w1", "model.layers.11.block_sparse_moe.experts.118.w1", "model.layers.11.block_sparse_moe.experts.119.w1", "model.layers.11.block_sparse_moe.experts.120.w1", "model.layers.11.block_sparse_moe.experts.121.w1", "model.layers.11.block_sparse_moe.experts.122.w1", "model.layers.11.block_sparse_moe.experts.123.w1", "model.layers.11.block_sparse_moe.experts.124.w1", "model.layers.11.block_sparse_moe.experts.125.w1", "model.layers.11.block_sparse_moe.experts.126.w1", "model.layers.11.block_sparse_moe.experts.127.w1", "model.layers.11.block_sparse_moe.experts.128.w1", "model.layers.11.block_sparse_moe.experts.129.w1", "model.layers.11.block_sparse_moe.experts.130.w1", "model.layers.11.block_sparse_moe.experts.131.w1", "model.layers.11.block_sparse_moe.experts.132.w1", "model.layers.11.block_sparse_moe.experts.133.w1", "model.layers.11.block_sparse_moe.experts.134.w1", "model.layers.11.block_sparse_moe.experts.135.w1", "model.layers.11.block_sparse_moe.experts.136.w1", "model.layers.11.block_sparse_moe.experts.137.w1", "model.layers.11.block_sparse_moe.experts.138.w1", "model.layers.11.block_sparse_moe.experts.139.w1", "model.layers.11.block_sparse_moe.experts.140.w1", "model.layers.11.block_sparse_moe.experts.141.w1", "model.layers.11.block_sparse_moe.experts.142.w1", "model.layers.11.block_sparse_moe.experts.143.w1", "model.layers.11.block_sparse_moe.experts.144.w1", "model.layers.11.block_sparse_moe.experts.145.w1", "model.layers.11.block_sparse_moe.experts.146.w1", "model.layers.11.block_sparse_moe.experts.147.w1", "model.layers.11.block_sparse_moe.experts.148.w1", "model.layers.11.block_sparse_moe.experts.149.w1", "model.layers.11.block_sparse_moe.experts.150.w1", "model.layers.11.block_sparse_moe.experts.151.w1", "model.layers.11.block_sparse_moe.experts.152.w1", "model.layers.11.block_sparse_moe.experts.153.w1", "model.layers.11.block_sparse_moe.experts.154.w1", "model.layers.11.block_sparse_moe.experts.155.w1", "model.layers.11.block_sparse_moe.experts.156.w1", "model.layers.11.block_sparse_moe.experts.157.w1", "model.layers.11.block_sparse_moe.experts.158.w1", "model.layers.11.block_sparse_moe.experts.159.w1", "model.layers.11.block_sparse_moe.experts.160.w1", "model.layers.11.block_sparse_moe.experts.161.w1", "model.layers.11.block_sparse_moe.experts.162.w1", "model.layers.11.block_sparse_moe.experts.163.w1", "model.layers.11.block_sparse_moe.experts.164.w1", "model.layers.11.block_sparse_moe.experts.165.w1", "model.layers.11.block_sparse_moe.experts.166.w1", "model.layers.11.block_sparse_moe.experts.167.w1", "model.layers.11.block_sparse_moe.experts.168.w1", "model.layers.11.block_sparse_moe.experts.169.w1", "model.layers.11.block_sparse_moe.experts.170.w1", "model.layers.11.block_sparse_moe.experts.171.w1", "model.layers.11.block_sparse_moe.experts.172.w1", "model.layers.11.block_sparse_moe.experts.173.w1", "model.layers.11.block_sparse_moe.experts.174.w1", "model.layers.11.block_sparse_moe.experts.175.w1", "model.layers.11.block_sparse_moe.experts.176.w1", "model.layers.11.block_sparse_moe.experts.177.w1", "model.layers.11.block_sparse_moe.experts.178.w1", "model.layers.11.block_sparse_moe.experts.179.w1", "model.layers.11.block_sparse_moe.experts.180.w1", "model.layers.11.block_sparse_moe.experts.181.w1", "model.layers.11.block_sparse_moe.experts.182.w1", "model.layers.11.block_sparse_moe.experts.183.w1", "model.layers.11.block_sparse_moe.experts.184.w1", "model.layers.11.block_sparse_moe.experts.185.w1", "model.layers.11.block_sparse_moe.experts.186.w1", "model.layers.11.block_sparse_moe.experts.187.w1", "model.layers.11.block_sparse_moe.experts.188.w1", "model.layers.11.block_sparse_moe.experts.189.w1", "model.layers.11.block_sparse_moe.experts.190.w1", "model.layers.11.block_sparse_moe.experts.191.w1", "model.layers.11.block_sparse_moe.experts.192.w1", "model.layers.11.block_sparse_moe.experts.193.w1", "model.layers.11.block_sparse_moe.experts.194.w1", "model.layers.11.block_sparse_moe.experts.195.w1", "model.layers.11.block_sparse_moe.experts.196.w1", "model.layers.11.block_sparse_moe.experts.197.w1", "model.layers.11.block_sparse_moe.experts.198.w1", "model.layers.11.block_sparse_moe.experts.199.w1", "model.layers.11.block_sparse_moe.experts.200.w1", "model.layers.11.block_sparse_moe.experts.201.w1", "model.layers.11.block_sparse_moe.experts.202.w1", "model.layers.11.block_sparse_moe.experts.203.w1", "model.layers.11.block_sparse_moe.experts.204.w1", "model.layers.11.block_sparse_moe.experts.205.w1", "model.layers.11.block_sparse_moe.experts.206.w1", "model.layers.11.block_sparse_moe.experts.207.w1", "model.layers.11.block_sparse_moe.experts.208.w1", "model.layers.11.block_sparse_moe.experts.209.w1", "model.layers.11.block_sparse_moe.experts.210.w1", "model.layers.11.block_sparse_moe.experts.211.w1", "model.layers.11.block_sparse_moe.experts.212.w1", "model.layers.11.block_sparse_moe.experts.213.w1", "model.layers.11.block_sparse_moe.experts.214.w1", "model.layers.11.block_sparse_moe.experts.215.w1", "model.layers.11.block_sparse_moe.experts.216.w1", "model.layers.11.block_sparse_moe.experts.217.w1", "model.layers.11.block_sparse_moe.experts.218.w1", "model.layers.11.block_sparse_moe.experts.219.w1", "model.layers.11.block_sparse_moe.experts.220.w1", "model.layers.11.block_sparse_moe.experts.221.w1", "model.layers.11.block_sparse_moe.experts.222.w1", "model.layers.11.block_sparse_moe.experts.223.w1", "model.layers.11.block_sparse_moe.experts.224.w1", "model.layers.11.block_sparse_moe.experts.225.w1", "model.layers.11.block_sparse_moe.experts.226.w1", "model.layers.11.block_sparse_moe.experts.227.w1", "model.layers.11.block_sparse_moe.experts.228.w1", "model.layers.11.block_sparse_moe.experts.229.w1", "model.layers.11.block_sparse_moe.experts.230.w1", "model.layers.11.block_sparse_moe.experts.231.w1", "model.layers.11.block_sparse_moe.experts.232.w1", "model.layers.11.block_sparse_moe.experts.233.w1", "model.layers.11.block_sparse_moe.experts.234.w1", "model.layers.11.block_sparse_moe.experts.235.w1", "model.layers.11.block_sparse_moe.experts.236.w1", "model.layers.11.block_sparse_moe.experts.237.w1", "model.layers.11.block_sparse_moe.experts.238.w1", "model.layers.11.block_sparse_moe.experts.239.w1", "model.layers.11.block_sparse_moe.experts.240.w1", "model.layers.11.block_sparse_moe.experts.241.w1", "model.layers.11.block_sparse_moe.experts.242.w1", "model.layers.11.block_sparse_moe.experts.243.w1", "model.layers.11.block_sparse_moe.experts.244.w1", "model.layers.11.block_sparse_moe.experts.245.w1", "model.layers.11.block_sparse_moe.experts.246.w1", "model.layers.11.block_sparse_moe.experts.247.w1", "model.layers.11.block_sparse_moe.experts.248.w1", "model.layers.11.block_sparse_moe.experts.249.w1", "model.layers.11.block_sparse_moe.experts.250.w1", "model.layers.11.block_sparse_moe.experts.251.w1", "model.layers.11.block_sparse_moe.experts.252.w1", "model.layers.11.block_sparse_moe.experts.253.w1", "model.layers.11.block_sparse_moe.experts.254.w1", "model.layers.11.block_sparse_moe.experts.255.w1", "model.layers.11.block_sparse_moe.experts.0.w3", "model.layers.11.block_sparse_moe.experts.1.w3", "model.layers.11.block_sparse_moe.experts.2.w3", "model.layers.11.block_sparse_moe.experts.3.w3", "model.layers.11.block_sparse_moe.experts.4.w3", "model.layers.11.block_sparse_moe.experts.5.w3", "model.layers.11.block_sparse_moe.experts.6.w3", "model.layers.11.block_sparse_moe.experts.7.w3", "model.layers.11.block_sparse_moe.experts.8.w3", "model.layers.11.block_sparse_moe.experts.9.w3", "model.layers.11.block_sparse_moe.experts.10.w3", "model.layers.11.block_sparse_moe.experts.11.w3", "model.layers.11.block_sparse_moe.experts.12.w3", "model.layers.11.block_sparse_moe.experts.13.w3", "model.layers.11.block_sparse_moe.experts.14.w3", "model.layers.11.block_sparse_moe.experts.15.w3", "model.layers.11.block_sparse_moe.experts.16.w3", "model.layers.11.block_sparse_moe.experts.17.w3", "model.layers.11.block_sparse_moe.experts.18.w3", "model.layers.11.block_sparse_moe.experts.19.w3", "model.layers.11.block_sparse_moe.experts.20.w3", "model.layers.11.block_sparse_moe.experts.21.w3", "model.layers.11.block_sparse_moe.experts.22.w3", "model.layers.11.block_sparse_moe.experts.23.w3", "model.layers.11.block_sparse_moe.experts.24.w3", "model.layers.11.block_sparse_moe.experts.25.w3", "model.layers.11.block_sparse_moe.experts.26.w3", "model.layers.11.block_sparse_moe.experts.27.w3", "model.layers.11.block_sparse_moe.experts.28.w3", "model.layers.11.block_sparse_moe.experts.29.w3", "model.layers.11.block_sparse_moe.experts.30.w3", "model.layers.11.block_sparse_moe.experts.31.w3", "model.layers.11.block_sparse_moe.experts.32.w3", "model.layers.11.block_sparse_moe.experts.33.w3", "model.layers.11.block_sparse_moe.experts.34.w3", "model.layers.11.block_sparse_moe.experts.35.w3", "model.layers.11.block_sparse_moe.experts.36.w3", "model.layers.11.block_sparse_moe.experts.37.w3", "model.layers.11.block_sparse_moe.experts.38.w3", "model.layers.11.block_sparse_moe.experts.39.w3", "model.layers.11.block_sparse_moe.experts.40.w3", "model.layers.11.block_sparse_moe.experts.41.w3", "model.layers.11.block_sparse_moe.experts.42.w3", "model.layers.11.block_sparse_moe.experts.43.w3", "model.layers.11.block_sparse_moe.experts.44.w3", "model.layers.11.block_sparse_moe.experts.45.w3", "model.layers.11.block_sparse_moe.experts.46.w3", "model.layers.11.block_sparse_moe.experts.47.w3", "model.layers.11.block_sparse_moe.experts.48.w3", "model.layers.11.block_sparse_moe.experts.49.w3", "model.layers.11.block_sparse_moe.experts.50.w3", "model.layers.11.block_sparse_moe.experts.51.w3", "model.layers.11.block_sparse_moe.experts.52.w3", "model.layers.11.block_sparse_moe.experts.53.w3", "model.layers.11.block_sparse_moe.experts.54.w3", "model.layers.11.block_sparse_moe.experts.55.w3", "model.layers.11.block_sparse_moe.experts.56.w3", "model.layers.11.block_sparse_moe.experts.57.w3", "model.layers.11.block_sparse_moe.experts.58.w3", "model.layers.11.block_sparse_moe.experts.59.w3", "model.layers.11.block_sparse_moe.experts.60.w3", "model.layers.11.block_sparse_moe.experts.61.w3", "model.layers.11.block_sparse_moe.experts.62.w3", "model.layers.11.block_sparse_moe.experts.63.w3", "model.layers.11.block_sparse_moe.experts.64.w3", "model.layers.11.block_sparse_moe.experts.65.w3", "model.layers.11.block_sparse_moe.experts.66.w3", "model.layers.11.block_sparse_moe.experts.67.w3", "model.layers.11.block_sparse_moe.experts.68.w3", "model.layers.11.block_sparse_moe.experts.69.w3", "model.layers.11.block_sparse_moe.experts.70.w3", "model.layers.11.block_sparse_moe.experts.71.w3", "model.layers.11.block_sparse_moe.experts.72.w3", "model.layers.11.block_sparse_moe.experts.73.w3", "model.layers.11.block_sparse_moe.experts.74.w3", "model.layers.11.block_sparse_moe.experts.75.w3", "model.layers.11.block_sparse_moe.experts.76.w3", "model.layers.11.block_sparse_moe.experts.77.w3", "model.layers.11.block_sparse_moe.experts.78.w3", "model.layers.11.block_sparse_moe.experts.79.w3", "model.layers.11.block_sparse_moe.experts.80.w3", "model.layers.11.block_sparse_moe.experts.81.w3", "model.layers.11.block_sparse_moe.experts.82.w3", "model.layers.11.block_sparse_moe.experts.83.w3", "model.layers.11.block_sparse_moe.experts.84.w3", "model.layers.11.block_sparse_moe.experts.85.w3", "model.layers.11.block_sparse_moe.experts.86.w3", "model.layers.11.block_sparse_moe.experts.87.w3", "model.layers.11.block_sparse_moe.experts.88.w3", "model.layers.11.block_sparse_moe.experts.89.w3", "model.layers.11.block_sparse_moe.experts.90.w3", "model.layers.11.block_sparse_moe.experts.91.w3", "model.layers.11.block_sparse_moe.experts.92.w3", "model.layers.11.block_sparse_moe.experts.93.w3", "model.layers.11.block_sparse_moe.experts.94.w3", "model.layers.11.block_sparse_moe.experts.95.w3", "model.layers.11.block_sparse_moe.experts.96.w3", "model.layers.11.block_sparse_moe.experts.97.w3", "model.layers.11.block_sparse_moe.experts.98.w3", "model.layers.11.block_sparse_moe.experts.99.w3", "model.layers.11.block_sparse_moe.experts.100.w3", "model.layers.11.block_sparse_moe.experts.101.w3", "model.layers.11.block_sparse_moe.experts.102.w3", "model.layers.11.block_sparse_moe.experts.103.w3", "model.layers.11.block_sparse_moe.experts.104.w3", "model.layers.11.block_sparse_moe.experts.105.w3", "model.layers.11.block_sparse_moe.experts.106.w3", "model.layers.11.block_sparse_moe.experts.107.w3", "model.layers.11.block_sparse_moe.experts.108.w3", "model.layers.11.block_sparse_moe.experts.109.w3", "model.layers.11.block_sparse_moe.experts.110.w3", "model.layers.11.block_sparse_moe.experts.111.w3", "model.layers.11.block_sparse_moe.experts.112.w3", "model.layers.11.block_sparse_moe.experts.113.w3", "model.layers.11.block_sparse_moe.experts.114.w3", "model.layers.11.block_sparse_moe.experts.115.w3", "model.layers.11.block_sparse_moe.experts.116.w3", "model.layers.11.block_sparse_moe.experts.117.w3", "model.layers.11.block_sparse_moe.experts.118.w3", "model.layers.11.block_sparse_moe.experts.119.w3", "model.layers.11.block_sparse_moe.experts.120.w3", "model.layers.11.block_sparse_moe.experts.121.w3", "model.layers.11.block_sparse_moe.experts.122.w3", "model.layers.11.block_sparse_moe.experts.123.w3", "model.layers.11.block_sparse_moe.experts.124.w3", "model.layers.11.block_sparse_moe.experts.125.w3", "model.layers.11.block_sparse_moe.experts.126.w3", "model.layers.11.block_sparse_moe.experts.127.w3", "model.layers.11.block_sparse_moe.experts.128.w3", "model.layers.11.block_sparse_moe.experts.129.w3", "model.layers.11.block_sparse_moe.experts.130.w3", "model.layers.11.block_sparse_moe.experts.131.w3", "model.layers.11.block_sparse_moe.experts.132.w3", "model.layers.11.block_sparse_moe.experts.133.w3", "model.layers.11.block_sparse_moe.experts.134.w3", "model.layers.11.block_sparse_moe.experts.135.w3", "model.layers.11.block_sparse_moe.experts.136.w3", "model.layers.11.block_sparse_moe.experts.137.w3", "model.layers.11.block_sparse_moe.experts.138.w3", "model.layers.11.block_sparse_moe.experts.139.w3", "model.layers.11.block_sparse_moe.experts.140.w3", "model.layers.11.block_sparse_moe.experts.141.w3", "model.layers.11.block_sparse_moe.experts.142.w3", "model.layers.11.block_sparse_moe.experts.143.w3", "model.layers.11.block_sparse_moe.experts.144.w3", "model.layers.11.block_sparse_moe.experts.145.w3", "model.layers.11.block_sparse_moe.experts.146.w3", "model.layers.11.block_sparse_moe.experts.147.w3", "model.layers.11.block_sparse_moe.experts.148.w3", "model.layers.11.block_sparse_moe.experts.149.w3", "model.layers.11.block_sparse_moe.experts.150.w3", "model.layers.11.block_sparse_moe.experts.151.w3", "model.layers.11.block_sparse_moe.experts.152.w3", "model.layers.11.block_sparse_moe.experts.153.w3", "model.layers.11.block_sparse_moe.experts.154.w3", "model.layers.11.block_sparse_moe.experts.155.w3", "model.layers.11.block_sparse_moe.experts.156.w3", "model.layers.11.block_sparse_moe.experts.157.w3", "model.layers.11.block_sparse_moe.experts.158.w3", "model.layers.11.block_sparse_moe.experts.159.w3", "model.layers.11.block_sparse_moe.experts.160.w3", "model.layers.11.block_sparse_moe.experts.161.w3", "model.layers.11.block_sparse_moe.experts.162.w3", "model.layers.11.block_sparse_moe.experts.163.w3", "model.layers.11.block_sparse_moe.experts.164.w3", "model.layers.11.block_sparse_moe.experts.165.w3", "model.layers.11.block_sparse_moe.experts.166.w3", "model.layers.11.block_sparse_moe.experts.167.w3", "model.layers.11.block_sparse_moe.experts.168.w3", "model.layers.11.block_sparse_moe.experts.169.w3", "model.layers.11.block_sparse_moe.experts.170.w3", "model.layers.11.block_sparse_moe.experts.171.w3", "model.layers.11.block_sparse_moe.experts.172.w3", "model.layers.11.block_sparse_moe.experts.173.w3", "model.layers.11.block_sparse_moe.experts.174.w3", "model.layers.11.block_sparse_moe.experts.175.w3", "model.layers.11.block_sparse_moe.experts.176.w3", "model.layers.11.block_sparse_moe.experts.177.w3", "model.layers.11.block_sparse_moe.experts.178.w3", "model.layers.11.block_sparse_moe.experts.179.w3", "model.layers.11.block_sparse_moe.experts.180.w3", "model.layers.11.block_sparse_moe.experts.181.w3", "model.layers.11.block_sparse_moe.experts.182.w3", "model.layers.11.block_sparse_moe.experts.183.w3", "model.layers.11.block_sparse_moe.experts.184.w3", "model.layers.11.block_sparse_moe.experts.185.w3", "model.layers.11.block_sparse_moe.experts.186.w3", "model.layers.11.block_sparse_moe.experts.187.w3", "model.layers.11.block_sparse_moe.experts.188.w3", "model.layers.11.block_sparse_moe.experts.189.w3", "model.layers.11.block_sparse_moe.experts.190.w3", "model.layers.11.block_sparse_moe.experts.191.w3", "model.layers.11.block_sparse_moe.experts.192.w3", "model.layers.11.block_sparse_moe.experts.193.w3", "model.layers.11.block_sparse_moe.experts.194.w3", "model.layers.11.block_sparse_moe.experts.195.w3", "model.layers.11.block_sparse_moe.experts.196.w3", "model.layers.11.block_sparse_moe.experts.197.w3", "model.layers.11.block_sparse_moe.experts.198.w3", "model.layers.11.block_sparse_moe.experts.199.w3", "model.layers.11.block_sparse_moe.experts.200.w3", "model.layers.11.block_sparse_moe.experts.201.w3", "model.layers.11.block_sparse_moe.experts.202.w3", "model.layers.11.block_sparse_moe.experts.203.w3", "model.layers.11.block_sparse_moe.experts.204.w3", "model.layers.11.block_sparse_moe.experts.205.w3", "model.layers.11.block_sparse_moe.experts.206.w3", "model.layers.11.block_sparse_moe.experts.207.w3", "model.layers.11.block_sparse_moe.experts.208.w3", "model.layers.11.block_sparse_moe.experts.209.w3", "model.layers.11.block_sparse_moe.experts.210.w3", "model.layers.11.block_sparse_moe.experts.211.w3", "model.layers.11.block_sparse_moe.experts.212.w3", "model.layers.11.block_sparse_moe.experts.213.w3", "model.layers.11.block_sparse_moe.experts.214.w3", "model.layers.11.block_sparse_moe.experts.215.w3", "model.layers.11.block_sparse_moe.experts.216.w3", "model.layers.11.block_sparse_moe.experts.217.w3", "model.layers.11.block_sparse_moe.experts.218.w3", "model.layers.11.block_sparse_moe.experts.219.w3", "model.layers.11.block_sparse_moe.experts.220.w3", "model.layers.11.block_sparse_moe.experts.221.w3", "model.layers.11.block_sparse_moe.experts.222.w3", "model.layers.11.block_sparse_moe.experts.223.w3", "model.layers.11.block_sparse_moe.experts.224.w3", "model.layers.11.block_sparse_moe.experts.225.w3", "model.layers.11.block_sparse_moe.experts.226.w3", "model.layers.11.block_sparse_moe.experts.227.w3", "model.layers.11.block_sparse_moe.experts.228.w3", "model.layers.11.block_sparse_moe.experts.229.w3", "model.layers.11.block_sparse_moe.experts.230.w3", "model.layers.11.block_sparse_moe.experts.231.w3", "model.layers.11.block_sparse_moe.experts.232.w3", "model.layers.11.block_sparse_moe.experts.233.w3", "model.layers.11.block_sparse_moe.experts.234.w3", "model.layers.11.block_sparse_moe.experts.235.w3", "model.layers.11.block_sparse_moe.experts.236.w3", "model.layers.11.block_sparse_moe.experts.237.w3", "model.layers.11.block_sparse_moe.experts.238.w3", "model.layers.11.block_sparse_moe.experts.239.w3", "model.layers.11.block_sparse_moe.experts.240.w3", "model.layers.11.block_sparse_moe.experts.241.w3", "model.layers.11.block_sparse_moe.experts.242.w3", "model.layers.11.block_sparse_moe.experts.243.w3", "model.layers.11.block_sparse_moe.experts.244.w3", "model.layers.11.block_sparse_moe.experts.245.w3", "model.layers.11.block_sparse_moe.experts.246.w3", "model.layers.11.block_sparse_moe.experts.247.w3", "model.layers.11.block_sparse_moe.experts.248.w3", "model.layers.11.block_sparse_moe.experts.249.w3", "model.layers.11.block_sparse_moe.experts.250.w3", "model.layers.11.block_sparse_moe.experts.251.w3", "model.layers.11.block_sparse_moe.experts.252.w3", "model.layers.11.block_sparse_moe.experts.253.w3", "model.layers.11.block_sparse_moe.experts.254.w3", "model.layers.11.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00040031224489212036, "dbits": 2415919104 } ] }, { "idx": 59, "layers": [ "model.layers.11.block_sparse_moe.experts.0.w2", "model.layers.11.block_sparse_moe.experts.1.w2", "model.layers.11.block_sparse_moe.experts.2.w2", "model.layers.11.block_sparse_moe.experts.3.w2", "model.layers.11.block_sparse_moe.experts.4.w2", "model.layers.11.block_sparse_moe.experts.5.w2", "model.layers.11.block_sparse_moe.experts.6.w2", "model.layers.11.block_sparse_moe.experts.7.w2", "model.layers.11.block_sparse_moe.experts.8.w2", "model.layers.11.block_sparse_moe.experts.9.w2", "model.layers.11.block_sparse_moe.experts.10.w2", "model.layers.11.block_sparse_moe.experts.11.w2", "model.layers.11.block_sparse_moe.experts.12.w2", "model.layers.11.block_sparse_moe.experts.13.w2", "model.layers.11.block_sparse_moe.experts.14.w2", "model.layers.11.block_sparse_moe.experts.15.w2", "model.layers.11.block_sparse_moe.experts.16.w2", "model.layers.11.block_sparse_moe.experts.17.w2", "model.layers.11.block_sparse_moe.experts.18.w2", "model.layers.11.block_sparse_moe.experts.19.w2", "model.layers.11.block_sparse_moe.experts.20.w2", "model.layers.11.block_sparse_moe.experts.21.w2", "model.layers.11.block_sparse_moe.experts.22.w2", "model.layers.11.block_sparse_moe.experts.23.w2", "model.layers.11.block_sparse_moe.experts.24.w2", "model.layers.11.block_sparse_moe.experts.25.w2", "model.layers.11.block_sparse_moe.experts.26.w2", "model.layers.11.block_sparse_moe.experts.27.w2", "model.layers.11.block_sparse_moe.experts.28.w2", "model.layers.11.block_sparse_moe.experts.29.w2", "model.layers.11.block_sparse_moe.experts.30.w2", "model.layers.11.block_sparse_moe.experts.31.w2", "model.layers.11.block_sparse_moe.experts.32.w2", "model.layers.11.block_sparse_moe.experts.33.w2", "model.layers.11.block_sparse_moe.experts.34.w2", "model.layers.11.block_sparse_moe.experts.35.w2", "model.layers.11.block_sparse_moe.experts.36.w2", "model.layers.11.block_sparse_moe.experts.37.w2", "model.layers.11.block_sparse_moe.experts.38.w2", "model.layers.11.block_sparse_moe.experts.39.w2", "model.layers.11.block_sparse_moe.experts.40.w2", "model.layers.11.block_sparse_moe.experts.41.w2", "model.layers.11.block_sparse_moe.experts.42.w2", "model.layers.11.block_sparse_moe.experts.43.w2", "model.layers.11.block_sparse_moe.experts.44.w2", "model.layers.11.block_sparse_moe.experts.45.w2", "model.layers.11.block_sparse_moe.experts.46.w2", "model.layers.11.block_sparse_moe.experts.47.w2", "model.layers.11.block_sparse_moe.experts.48.w2", "model.layers.11.block_sparse_moe.experts.49.w2", "model.layers.11.block_sparse_moe.experts.50.w2", "model.layers.11.block_sparse_moe.experts.51.w2", "model.layers.11.block_sparse_moe.experts.52.w2", "model.layers.11.block_sparse_moe.experts.53.w2", "model.layers.11.block_sparse_moe.experts.54.w2", "model.layers.11.block_sparse_moe.experts.55.w2", "model.layers.11.block_sparse_moe.experts.56.w2", "model.layers.11.block_sparse_moe.experts.57.w2", "model.layers.11.block_sparse_moe.experts.58.w2", "model.layers.11.block_sparse_moe.experts.59.w2", "model.layers.11.block_sparse_moe.experts.60.w2", "model.layers.11.block_sparse_moe.experts.61.w2", "model.layers.11.block_sparse_moe.experts.62.w2", "model.layers.11.block_sparse_moe.experts.63.w2", "model.layers.11.block_sparse_moe.experts.64.w2", "model.layers.11.block_sparse_moe.experts.65.w2", "model.layers.11.block_sparse_moe.experts.66.w2", "model.layers.11.block_sparse_moe.experts.67.w2", "model.layers.11.block_sparse_moe.experts.68.w2", "model.layers.11.block_sparse_moe.experts.69.w2", "model.layers.11.block_sparse_moe.experts.70.w2", "model.layers.11.block_sparse_moe.experts.71.w2", "model.layers.11.block_sparse_moe.experts.72.w2", "model.layers.11.block_sparse_moe.experts.73.w2", "model.layers.11.block_sparse_moe.experts.74.w2", "model.layers.11.block_sparse_moe.experts.75.w2", "model.layers.11.block_sparse_moe.experts.76.w2", "model.layers.11.block_sparse_moe.experts.77.w2", "model.layers.11.block_sparse_moe.experts.78.w2", "model.layers.11.block_sparse_moe.experts.79.w2", "model.layers.11.block_sparse_moe.experts.80.w2", "model.layers.11.block_sparse_moe.experts.81.w2", "model.layers.11.block_sparse_moe.experts.82.w2", "model.layers.11.block_sparse_moe.experts.83.w2", "model.layers.11.block_sparse_moe.experts.84.w2", "model.layers.11.block_sparse_moe.experts.85.w2", "model.layers.11.block_sparse_moe.experts.86.w2", "model.layers.11.block_sparse_moe.experts.87.w2", "model.layers.11.block_sparse_moe.experts.88.w2", "model.layers.11.block_sparse_moe.experts.89.w2", "model.layers.11.block_sparse_moe.experts.90.w2", "model.layers.11.block_sparse_moe.experts.91.w2", "model.layers.11.block_sparse_moe.experts.92.w2", "model.layers.11.block_sparse_moe.experts.93.w2", "model.layers.11.block_sparse_moe.experts.94.w2", "model.layers.11.block_sparse_moe.experts.95.w2", "model.layers.11.block_sparse_moe.experts.96.w2", "model.layers.11.block_sparse_moe.experts.97.w2", "model.layers.11.block_sparse_moe.experts.98.w2", "model.layers.11.block_sparse_moe.experts.99.w2", "model.layers.11.block_sparse_moe.experts.100.w2", "model.layers.11.block_sparse_moe.experts.101.w2", "model.layers.11.block_sparse_moe.experts.102.w2", "model.layers.11.block_sparse_moe.experts.103.w2", "model.layers.11.block_sparse_moe.experts.104.w2", "model.layers.11.block_sparse_moe.experts.105.w2", "model.layers.11.block_sparse_moe.experts.106.w2", "model.layers.11.block_sparse_moe.experts.107.w2", "model.layers.11.block_sparse_moe.experts.108.w2", "model.layers.11.block_sparse_moe.experts.109.w2", "model.layers.11.block_sparse_moe.experts.110.w2", "model.layers.11.block_sparse_moe.experts.111.w2", "model.layers.11.block_sparse_moe.experts.112.w2", "model.layers.11.block_sparse_moe.experts.113.w2", "model.layers.11.block_sparse_moe.experts.114.w2", "model.layers.11.block_sparse_moe.experts.115.w2", "model.layers.11.block_sparse_moe.experts.116.w2", "model.layers.11.block_sparse_moe.experts.117.w2", "model.layers.11.block_sparse_moe.experts.118.w2", "model.layers.11.block_sparse_moe.experts.119.w2", "model.layers.11.block_sparse_moe.experts.120.w2", "model.layers.11.block_sparse_moe.experts.121.w2", "model.layers.11.block_sparse_moe.experts.122.w2", "model.layers.11.block_sparse_moe.experts.123.w2", "model.layers.11.block_sparse_moe.experts.124.w2", "model.layers.11.block_sparse_moe.experts.125.w2", "model.layers.11.block_sparse_moe.experts.126.w2", "model.layers.11.block_sparse_moe.experts.127.w2", "model.layers.11.block_sparse_moe.experts.128.w2", "model.layers.11.block_sparse_moe.experts.129.w2", "model.layers.11.block_sparse_moe.experts.130.w2", "model.layers.11.block_sparse_moe.experts.131.w2", "model.layers.11.block_sparse_moe.experts.132.w2", "model.layers.11.block_sparse_moe.experts.133.w2", "model.layers.11.block_sparse_moe.experts.134.w2", "model.layers.11.block_sparse_moe.experts.135.w2", "model.layers.11.block_sparse_moe.experts.136.w2", "model.layers.11.block_sparse_moe.experts.137.w2", "model.layers.11.block_sparse_moe.experts.138.w2", "model.layers.11.block_sparse_moe.experts.139.w2", "model.layers.11.block_sparse_moe.experts.140.w2", "model.layers.11.block_sparse_moe.experts.141.w2", "model.layers.11.block_sparse_moe.experts.142.w2", "model.layers.11.block_sparse_moe.experts.143.w2", "model.layers.11.block_sparse_moe.experts.144.w2", "model.layers.11.block_sparse_moe.experts.145.w2", "model.layers.11.block_sparse_moe.experts.146.w2", "model.layers.11.block_sparse_moe.experts.147.w2", "model.layers.11.block_sparse_moe.experts.148.w2", "model.layers.11.block_sparse_moe.experts.149.w2", "model.layers.11.block_sparse_moe.experts.150.w2", "model.layers.11.block_sparse_moe.experts.151.w2", "model.layers.11.block_sparse_moe.experts.152.w2", "model.layers.11.block_sparse_moe.experts.153.w2", "model.layers.11.block_sparse_moe.experts.154.w2", "model.layers.11.block_sparse_moe.experts.155.w2", "model.layers.11.block_sparse_moe.experts.156.w2", "model.layers.11.block_sparse_moe.experts.157.w2", "model.layers.11.block_sparse_moe.experts.158.w2", "model.layers.11.block_sparse_moe.experts.159.w2", "model.layers.11.block_sparse_moe.experts.160.w2", "model.layers.11.block_sparse_moe.experts.161.w2", "model.layers.11.block_sparse_moe.experts.162.w2", "model.layers.11.block_sparse_moe.experts.163.w2", "model.layers.11.block_sparse_moe.experts.164.w2", "model.layers.11.block_sparse_moe.experts.165.w2", "model.layers.11.block_sparse_moe.experts.166.w2", "model.layers.11.block_sparse_moe.experts.167.w2", "model.layers.11.block_sparse_moe.experts.168.w2", "model.layers.11.block_sparse_moe.experts.169.w2", "model.layers.11.block_sparse_moe.experts.170.w2", "model.layers.11.block_sparse_moe.experts.171.w2", "model.layers.11.block_sparse_moe.experts.172.w2", "model.layers.11.block_sparse_moe.experts.173.w2", "model.layers.11.block_sparse_moe.experts.174.w2", "model.layers.11.block_sparse_moe.experts.175.w2", "model.layers.11.block_sparse_moe.experts.176.w2", "model.layers.11.block_sparse_moe.experts.177.w2", "model.layers.11.block_sparse_moe.experts.178.w2", "model.layers.11.block_sparse_moe.experts.179.w2", "model.layers.11.block_sparse_moe.experts.180.w2", "model.layers.11.block_sparse_moe.experts.181.w2", "model.layers.11.block_sparse_moe.experts.182.w2", "model.layers.11.block_sparse_moe.experts.183.w2", "model.layers.11.block_sparse_moe.experts.184.w2", "model.layers.11.block_sparse_moe.experts.185.w2", "model.layers.11.block_sparse_moe.experts.186.w2", "model.layers.11.block_sparse_moe.experts.187.w2", "model.layers.11.block_sparse_moe.experts.188.w2", "model.layers.11.block_sparse_moe.experts.189.w2", "model.layers.11.block_sparse_moe.experts.190.w2", "model.layers.11.block_sparse_moe.experts.191.w2", "model.layers.11.block_sparse_moe.experts.192.w2", "model.layers.11.block_sparse_moe.experts.193.w2", "model.layers.11.block_sparse_moe.experts.194.w2", "model.layers.11.block_sparse_moe.experts.195.w2", "model.layers.11.block_sparse_moe.experts.196.w2", "model.layers.11.block_sparse_moe.experts.197.w2", "model.layers.11.block_sparse_moe.experts.198.w2", "model.layers.11.block_sparse_moe.experts.199.w2", "model.layers.11.block_sparse_moe.experts.200.w2", "model.layers.11.block_sparse_moe.experts.201.w2", "model.layers.11.block_sparse_moe.experts.202.w2", "model.layers.11.block_sparse_moe.experts.203.w2", "model.layers.11.block_sparse_moe.experts.204.w2", "model.layers.11.block_sparse_moe.experts.205.w2", "model.layers.11.block_sparse_moe.experts.206.w2", "model.layers.11.block_sparse_moe.experts.207.w2", "model.layers.11.block_sparse_moe.experts.208.w2", "model.layers.11.block_sparse_moe.experts.209.w2", "model.layers.11.block_sparse_moe.experts.210.w2", "model.layers.11.block_sparse_moe.experts.211.w2", "model.layers.11.block_sparse_moe.experts.212.w2", "model.layers.11.block_sparse_moe.experts.213.w2", "model.layers.11.block_sparse_moe.experts.214.w2", "model.layers.11.block_sparse_moe.experts.215.w2", "model.layers.11.block_sparse_moe.experts.216.w2", "model.layers.11.block_sparse_moe.experts.217.w2", "model.layers.11.block_sparse_moe.experts.218.w2", "model.layers.11.block_sparse_moe.experts.219.w2", "model.layers.11.block_sparse_moe.experts.220.w2", "model.layers.11.block_sparse_moe.experts.221.w2", "model.layers.11.block_sparse_moe.experts.222.w2", "model.layers.11.block_sparse_moe.experts.223.w2", "model.layers.11.block_sparse_moe.experts.224.w2", "model.layers.11.block_sparse_moe.experts.225.w2", "model.layers.11.block_sparse_moe.experts.226.w2", "model.layers.11.block_sparse_moe.experts.227.w2", "model.layers.11.block_sparse_moe.experts.228.w2", "model.layers.11.block_sparse_moe.experts.229.w2", "model.layers.11.block_sparse_moe.experts.230.w2", "model.layers.11.block_sparse_moe.experts.231.w2", "model.layers.11.block_sparse_moe.experts.232.w2", "model.layers.11.block_sparse_moe.experts.233.w2", "model.layers.11.block_sparse_moe.experts.234.w2", "model.layers.11.block_sparse_moe.experts.235.w2", "model.layers.11.block_sparse_moe.experts.236.w2", "model.layers.11.block_sparse_moe.experts.237.w2", "model.layers.11.block_sparse_moe.experts.238.w2", "model.layers.11.block_sparse_moe.experts.239.w2", "model.layers.11.block_sparse_moe.experts.240.w2", "model.layers.11.block_sparse_moe.experts.241.w2", "model.layers.11.block_sparse_moe.experts.242.w2", "model.layers.11.block_sparse_moe.experts.243.w2", "model.layers.11.block_sparse_moe.experts.244.w2", "model.layers.11.block_sparse_moe.experts.245.w2", "model.layers.11.block_sparse_moe.experts.246.w2", "model.layers.11.block_sparse_moe.experts.247.w2", "model.layers.11.block_sparse_moe.experts.248.w2", "model.layers.11.block_sparse_moe.experts.249.w2", "model.layers.11.block_sparse_moe.experts.250.w2", "model.layers.11.block_sparse_moe.experts.251.w2", "model.layers.11.block_sparse_moe.experts.252.w2", "model.layers.11.block_sparse_moe.experts.253.w2", "model.layers.11.block_sparse_moe.experts.254.w2", "model.layers.11.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -8.755922317504883e-05, "dbits": 1207959552 } ] }, { "idx": 60, "layers": [ "model.layers.12.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0009493384510279035, "dbits": 18874368 } ] }, { "idx": 61, "layers": [ "model.layers.12.self_attn.k_proj", "model.layers.12.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0014703214168548806, "dbits": 6291456 } ] }, { "idx": 62, "layers": [ "model.layers.12.self_attn.o_proj" ], "candidates": [ { "dkld": -0.00506064519286159, "dbits": 18874368 } ] }, { "idx": 63, "layers": [ "model.layers.12.block_sparse_moe.experts.0.w1", "model.layers.12.block_sparse_moe.experts.1.w1", "model.layers.12.block_sparse_moe.experts.2.w1", "model.layers.12.block_sparse_moe.experts.3.w1", "model.layers.12.block_sparse_moe.experts.4.w1", "model.layers.12.block_sparse_moe.experts.5.w1", "model.layers.12.block_sparse_moe.experts.6.w1", "model.layers.12.block_sparse_moe.experts.7.w1", "model.layers.12.block_sparse_moe.experts.8.w1", "model.layers.12.block_sparse_moe.experts.9.w1", "model.layers.12.block_sparse_moe.experts.10.w1", "model.layers.12.block_sparse_moe.experts.11.w1", "model.layers.12.block_sparse_moe.experts.12.w1", "model.layers.12.block_sparse_moe.experts.13.w1", "model.layers.12.block_sparse_moe.experts.14.w1", "model.layers.12.block_sparse_moe.experts.15.w1", "model.layers.12.block_sparse_moe.experts.16.w1", "model.layers.12.block_sparse_moe.experts.17.w1", "model.layers.12.block_sparse_moe.experts.18.w1", "model.layers.12.block_sparse_moe.experts.19.w1", "model.layers.12.block_sparse_moe.experts.20.w1", "model.layers.12.block_sparse_moe.experts.21.w1", "model.layers.12.block_sparse_moe.experts.22.w1", "model.layers.12.block_sparse_moe.experts.23.w1", "model.layers.12.block_sparse_moe.experts.24.w1", "model.layers.12.block_sparse_moe.experts.25.w1", "model.layers.12.block_sparse_moe.experts.26.w1", "model.layers.12.block_sparse_moe.experts.27.w1", "model.layers.12.block_sparse_moe.experts.28.w1", "model.layers.12.block_sparse_moe.experts.29.w1", "model.layers.12.block_sparse_moe.experts.30.w1", "model.layers.12.block_sparse_moe.experts.31.w1", "model.layers.12.block_sparse_moe.experts.32.w1", "model.layers.12.block_sparse_moe.experts.33.w1", "model.layers.12.block_sparse_moe.experts.34.w1", "model.layers.12.block_sparse_moe.experts.35.w1", "model.layers.12.block_sparse_moe.experts.36.w1", "model.layers.12.block_sparse_moe.experts.37.w1", "model.layers.12.block_sparse_moe.experts.38.w1", "model.layers.12.block_sparse_moe.experts.39.w1", "model.layers.12.block_sparse_moe.experts.40.w1", "model.layers.12.block_sparse_moe.experts.41.w1", "model.layers.12.block_sparse_moe.experts.42.w1", "model.layers.12.block_sparse_moe.experts.43.w1", "model.layers.12.block_sparse_moe.experts.44.w1", "model.layers.12.block_sparse_moe.experts.45.w1", "model.layers.12.block_sparse_moe.experts.46.w1", "model.layers.12.block_sparse_moe.experts.47.w1", "model.layers.12.block_sparse_moe.experts.48.w1", "model.layers.12.block_sparse_moe.experts.49.w1", "model.layers.12.block_sparse_moe.experts.50.w1", "model.layers.12.block_sparse_moe.experts.51.w1", "model.layers.12.block_sparse_moe.experts.52.w1", "model.layers.12.block_sparse_moe.experts.53.w1", "model.layers.12.block_sparse_moe.experts.54.w1", "model.layers.12.block_sparse_moe.experts.55.w1", "model.layers.12.block_sparse_moe.experts.56.w1", "model.layers.12.block_sparse_moe.experts.57.w1", "model.layers.12.block_sparse_moe.experts.58.w1", "model.layers.12.block_sparse_moe.experts.59.w1", "model.layers.12.block_sparse_moe.experts.60.w1", "model.layers.12.block_sparse_moe.experts.61.w1", "model.layers.12.block_sparse_moe.experts.62.w1", "model.layers.12.block_sparse_moe.experts.63.w1", "model.layers.12.block_sparse_moe.experts.64.w1", "model.layers.12.block_sparse_moe.experts.65.w1", "model.layers.12.block_sparse_moe.experts.66.w1", "model.layers.12.block_sparse_moe.experts.67.w1", "model.layers.12.block_sparse_moe.experts.68.w1", "model.layers.12.block_sparse_moe.experts.69.w1", "model.layers.12.block_sparse_moe.experts.70.w1", "model.layers.12.block_sparse_moe.experts.71.w1", "model.layers.12.block_sparse_moe.experts.72.w1", "model.layers.12.block_sparse_moe.experts.73.w1", "model.layers.12.block_sparse_moe.experts.74.w1", "model.layers.12.block_sparse_moe.experts.75.w1", "model.layers.12.block_sparse_moe.experts.76.w1", "model.layers.12.block_sparse_moe.experts.77.w1", "model.layers.12.block_sparse_moe.experts.78.w1", "model.layers.12.block_sparse_moe.experts.79.w1", "model.layers.12.block_sparse_moe.experts.80.w1", "model.layers.12.block_sparse_moe.experts.81.w1", "model.layers.12.block_sparse_moe.experts.82.w1", "model.layers.12.block_sparse_moe.experts.83.w1", "model.layers.12.block_sparse_moe.experts.84.w1", "model.layers.12.block_sparse_moe.experts.85.w1", "model.layers.12.block_sparse_moe.experts.86.w1", "model.layers.12.block_sparse_moe.experts.87.w1", "model.layers.12.block_sparse_moe.experts.88.w1", "model.layers.12.block_sparse_moe.experts.89.w1", "model.layers.12.block_sparse_moe.experts.90.w1", "model.layers.12.block_sparse_moe.experts.91.w1", "model.layers.12.block_sparse_moe.experts.92.w1", "model.layers.12.block_sparse_moe.experts.93.w1", "model.layers.12.block_sparse_moe.experts.94.w1", "model.layers.12.block_sparse_moe.experts.95.w1", "model.layers.12.block_sparse_moe.experts.96.w1", "model.layers.12.block_sparse_moe.experts.97.w1", "model.layers.12.block_sparse_moe.experts.98.w1", "model.layers.12.block_sparse_moe.experts.99.w1", "model.layers.12.block_sparse_moe.experts.100.w1", "model.layers.12.block_sparse_moe.experts.101.w1", "model.layers.12.block_sparse_moe.experts.102.w1", "model.layers.12.block_sparse_moe.experts.103.w1", "model.layers.12.block_sparse_moe.experts.104.w1", "model.layers.12.block_sparse_moe.experts.105.w1", "model.layers.12.block_sparse_moe.experts.106.w1", "model.layers.12.block_sparse_moe.experts.107.w1", "model.layers.12.block_sparse_moe.experts.108.w1", "model.layers.12.block_sparse_moe.experts.109.w1", "model.layers.12.block_sparse_moe.experts.110.w1", "model.layers.12.block_sparse_moe.experts.111.w1", "model.layers.12.block_sparse_moe.experts.112.w1", "model.layers.12.block_sparse_moe.experts.113.w1", "model.layers.12.block_sparse_moe.experts.114.w1", "model.layers.12.block_sparse_moe.experts.115.w1", "model.layers.12.block_sparse_moe.experts.116.w1", "model.layers.12.block_sparse_moe.experts.117.w1", "model.layers.12.block_sparse_moe.experts.118.w1", "model.layers.12.block_sparse_moe.experts.119.w1", "model.layers.12.block_sparse_moe.experts.120.w1", "model.layers.12.block_sparse_moe.experts.121.w1", "model.layers.12.block_sparse_moe.experts.122.w1", "model.layers.12.block_sparse_moe.experts.123.w1", "model.layers.12.block_sparse_moe.experts.124.w1", "model.layers.12.block_sparse_moe.experts.125.w1", "model.layers.12.block_sparse_moe.experts.126.w1", "model.layers.12.block_sparse_moe.experts.127.w1", "model.layers.12.block_sparse_moe.experts.128.w1", "model.layers.12.block_sparse_moe.experts.129.w1", "model.layers.12.block_sparse_moe.experts.130.w1", "model.layers.12.block_sparse_moe.experts.131.w1", "model.layers.12.block_sparse_moe.experts.132.w1", "model.layers.12.block_sparse_moe.experts.133.w1", "model.layers.12.block_sparse_moe.experts.134.w1", "model.layers.12.block_sparse_moe.experts.135.w1", "model.layers.12.block_sparse_moe.experts.136.w1", "model.layers.12.block_sparse_moe.experts.137.w1", "model.layers.12.block_sparse_moe.experts.138.w1", "model.layers.12.block_sparse_moe.experts.139.w1", "model.layers.12.block_sparse_moe.experts.140.w1", "model.layers.12.block_sparse_moe.experts.141.w1", "model.layers.12.block_sparse_moe.experts.142.w1", "model.layers.12.block_sparse_moe.experts.143.w1", "model.layers.12.block_sparse_moe.experts.144.w1", "model.layers.12.block_sparse_moe.experts.145.w1", "model.layers.12.block_sparse_moe.experts.146.w1", "model.layers.12.block_sparse_moe.experts.147.w1", "model.layers.12.block_sparse_moe.experts.148.w1", "model.layers.12.block_sparse_moe.experts.149.w1", "model.layers.12.block_sparse_moe.experts.150.w1", "model.layers.12.block_sparse_moe.experts.151.w1", "model.layers.12.block_sparse_moe.experts.152.w1", "model.layers.12.block_sparse_moe.experts.153.w1", "model.layers.12.block_sparse_moe.experts.154.w1", "model.layers.12.block_sparse_moe.experts.155.w1", "model.layers.12.block_sparse_moe.experts.156.w1", "model.layers.12.block_sparse_moe.experts.157.w1", "model.layers.12.block_sparse_moe.experts.158.w1", "model.layers.12.block_sparse_moe.experts.159.w1", "model.layers.12.block_sparse_moe.experts.160.w1", "model.layers.12.block_sparse_moe.experts.161.w1", "model.layers.12.block_sparse_moe.experts.162.w1", "model.layers.12.block_sparse_moe.experts.163.w1", "model.layers.12.block_sparse_moe.experts.164.w1", "model.layers.12.block_sparse_moe.experts.165.w1", "model.layers.12.block_sparse_moe.experts.166.w1", "model.layers.12.block_sparse_moe.experts.167.w1", "model.layers.12.block_sparse_moe.experts.168.w1", "model.layers.12.block_sparse_moe.experts.169.w1", "model.layers.12.block_sparse_moe.experts.170.w1", "model.layers.12.block_sparse_moe.experts.171.w1", "model.layers.12.block_sparse_moe.experts.172.w1", "model.layers.12.block_sparse_moe.experts.173.w1", "model.layers.12.block_sparse_moe.experts.174.w1", "model.layers.12.block_sparse_moe.experts.175.w1", "model.layers.12.block_sparse_moe.experts.176.w1", "model.layers.12.block_sparse_moe.experts.177.w1", "model.layers.12.block_sparse_moe.experts.178.w1", "model.layers.12.block_sparse_moe.experts.179.w1", "model.layers.12.block_sparse_moe.experts.180.w1", "model.layers.12.block_sparse_moe.experts.181.w1", "model.layers.12.block_sparse_moe.experts.182.w1", "model.layers.12.block_sparse_moe.experts.183.w1", "model.layers.12.block_sparse_moe.experts.184.w1", "model.layers.12.block_sparse_moe.experts.185.w1", "model.layers.12.block_sparse_moe.experts.186.w1", "model.layers.12.block_sparse_moe.experts.187.w1", "model.layers.12.block_sparse_moe.experts.188.w1", "model.layers.12.block_sparse_moe.experts.189.w1", "model.layers.12.block_sparse_moe.experts.190.w1", "model.layers.12.block_sparse_moe.experts.191.w1", "model.layers.12.block_sparse_moe.experts.192.w1", "model.layers.12.block_sparse_moe.experts.193.w1", "model.layers.12.block_sparse_moe.experts.194.w1", "model.layers.12.block_sparse_moe.experts.195.w1", "model.layers.12.block_sparse_moe.experts.196.w1", "model.layers.12.block_sparse_moe.experts.197.w1", "model.layers.12.block_sparse_moe.experts.198.w1", "model.layers.12.block_sparse_moe.experts.199.w1", "model.layers.12.block_sparse_moe.experts.200.w1", "model.layers.12.block_sparse_moe.experts.201.w1", "model.layers.12.block_sparse_moe.experts.202.w1", "model.layers.12.block_sparse_moe.experts.203.w1", "model.layers.12.block_sparse_moe.experts.204.w1", "model.layers.12.block_sparse_moe.experts.205.w1", "model.layers.12.block_sparse_moe.experts.206.w1", "model.layers.12.block_sparse_moe.experts.207.w1", "model.layers.12.block_sparse_moe.experts.208.w1", "model.layers.12.block_sparse_moe.experts.209.w1", "model.layers.12.block_sparse_moe.experts.210.w1", "model.layers.12.block_sparse_moe.experts.211.w1", "model.layers.12.block_sparse_moe.experts.212.w1", "model.layers.12.block_sparse_moe.experts.213.w1", "model.layers.12.block_sparse_moe.experts.214.w1", "model.layers.12.block_sparse_moe.experts.215.w1", "model.layers.12.block_sparse_moe.experts.216.w1", "model.layers.12.block_sparse_moe.experts.217.w1", "model.layers.12.block_sparse_moe.experts.218.w1", "model.layers.12.block_sparse_moe.experts.219.w1", "model.layers.12.block_sparse_moe.experts.220.w1", "model.layers.12.block_sparse_moe.experts.221.w1", "model.layers.12.block_sparse_moe.experts.222.w1", "model.layers.12.block_sparse_moe.experts.223.w1", "model.layers.12.block_sparse_moe.experts.224.w1", "model.layers.12.block_sparse_moe.experts.225.w1", "model.layers.12.block_sparse_moe.experts.226.w1", "model.layers.12.block_sparse_moe.experts.227.w1", "model.layers.12.block_sparse_moe.experts.228.w1", "model.layers.12.block_sparse_moe.experts.229.w1", "model.layers.12.block_sparse_moe.experts.230.w1", "model.layers.12.block_sparse_moe.experts.231.w1", "model.layers.12.block_sparse_moe.experts.232.w1", "model.layers.12.block_sparse_moe.experts.233.w1", "model.layers.12.block_sparse_moe.experts.234.w1", "model.layers.12.block_sparse_moe.experts.235.w1", "model.layers.12.block_sparse_moe.experts.236.w1", "model.layers.12.block_sparse_moe.experts.237.w1", "model.layers.12.block_sparse_moe.experts.238.w1", "model.layers.12.block_sparse_moe.experts.239.w1", "model.layers.12.block_sparse_moe.experts.240.w1", "model.layers.12.block_sparse_moe.experts.241.w1", "model.layers.12.block_sparse_moe.experts.242.w1", "model.layers.12.block_sparse_moe.experts.243.w1", "model.layers.12.block_sparse_moe.experts.244.w1", "model.layers.12.block_sparse_moe.experts.245.w1", "model.layers.12.block_sparse_moe.experts.246.w1", "model.layers.12.block_sparse_moe.experts.247.w1", "model.layers.12.block_sparse_moe.experts.248.w1", "model.layers.12.block_sparse_moe.experts.249.w1", "model.layers.12.block_sparse_moe.experts.250.w1", "model.layers.12.block_sparse_moe.experts.251.w1", "model.layers.12.block_sparse_moe.experts.252.w1", "model.layers.12.block_sparse_moe.experts.253.w1", "model.layers.12.block_sparse_moe.experts.254.w1", "model.layers.12.block_sparse_moe.experts.255.w1", "model.layers.12.block_sparse_moe.experts.0.w3", "model.layers.12.block_sparse_moe.experts.1.w3", "model.layers.12.block_sparse_moe.experts.2.w3", "model.layers.12.block_sparse_moe.experts.3.w3", "model.layers.12.block_sparse_moe.experts.4.w3", "model.layers.12.block_sparse_moe.experts.5.w3", "model.layers.12.block_sparse_moe.experts.6.w3", "model.layers.12.block_sparse_moe.experts.7.w3", "model.layers.12.block_sparse_moe.experts.8.w3", "model.layers.12.block_sparse_moe.experts.9.w3", "model.layers.12.block_sparse_moe.experts.10.w3", "model.layers.12.block_sparse_moe.experts.11.w3", "model.layers.12.block_sparse_moe.experts.12.w3", "model.layers.12.block_sparse_moe.experts.13.w3", "model.layers.12.block_sparse_moe.experts.14.w3", "model.layers.12.block_sparse_moe.experts.15.w3", "model.layers.12.block_sparse_moe.experts.16.w3", "model.layers.12.block_sparse_moe.experts.17.w3", "model.layers.12.block_sparse_moe.experts.18.w3", "model.layers.12.block_sparse_moe.experts.19.w3", "model.layers.12.block_sparse_moe.experts.20.w3", "model.layers.12.block_sparse_moe.experts.21.w3", "model.layers.12.block_sparse_moe.experts.22.w3", "model.layers.12.block_sparse_moe.experts.23.w3", "model.layers.12.block_sparse_moe.experts.24.w3", "model.layers.12.block_sparse_moe.experts.25.w3", "model.layers.12.block_sparse_moe.experts.26.w3", "model.layers.12.block_sparse_moe.experts.27.w3", "model.layers.12.block_sparse_moe.experts.28.w3", "model.layers.12.block_sparse_moe.experts.29.w3", "model.layers.12.block_sparse_moe.experts.30.w3", "model.layers.12.block_sparse_moe.experts.31.w3", "model.layers.12.block_sparse_moe.experts.32.w3", "model.layers.12.block_sparse_moe.experts.33.w3", "model.layers.12.block_sparse_moe.experts.34.w3", "model.layers.12.block_sparse_moe.experts.35.w3", "model.layers.12.block_sparse_moe.experts.36.w3", "model.layers.12.block_sparse_moe.experts.37.w3", "model.layers.12.block_sparse_moe.experts.38.w3", "model.layers.12.block_sparse_moe.experts.39.w3", "model.layers.12.block_sparse_moe.experts.40.w3", "model.layers.12.block_sparse_moe.experts.41.w3", "model.layers.12.block_sparse_moe.experts.42.w3", "model.layers.12.block_sparse_moe.experts.43.w3", "model.layers.12.block_sparse_moe.experts.44.w3", "model.layers.12.block_sparse_moe.experts.45.w3", "model.layers.12.block_sparse_moe.experts.46.w3", "model.layers.12.block_sparse_moe.experts.47.w3", "model.layers.12.block_sparse_moe.experts.48.w3", "model.layers.12.block_sparse_moe.experts.49.w3", "model.layers.12.block_sparse_moe.experts.50.w3", "model.layers.12.block_sparse_moe.experts.51.w3", "model.layers.12.block_sparse_moe.experts.52.w3", "model.layers.12.block_sparse_moe.experts.53.w3", "model.layers.12.block_sparse_moe.experts.54.w3", "model.layers.12.block_sparse_moe.experts.55.w3", "model.layers.12.block_sparse_moe.experts.56.w3", "model.layers.12.block_sparse_moe.experts.57.w3", "model.layers.12.block_sparse_moe.experts.58.w3", "model.layers.12.block_sparse_moe.experts.59.w3", "model.layers.12.block_sparse_moe.experts.60.w3", "model.layers.12.block_sparse_moe.experts.61.w3", "model.layers.12.block_sparse_moe.experts.62.w3", "model.layers.12.block_sparse_moe.experts.63.w3", "model.layers.12.block_sparse_moe.experts.64.w3", "model.layers.12.block_sparse_moe.experts.65.w3", "model.layers.12.block_sparse_moe.experts.66.w3", "model.layers.12.block_sparse_moe.experts.67.w3", "model.layers.12.block_sparse_moe.experts.68.w3", "model.layers.12.block_sparse_moe.experts.69.w3", "model.layers.12.block_sparse_moe.experts.70.w3", "model.layers.12.block_sparse_moe.experts.71.w3", "model.layers.12.block_sparse_moe.experts.72.w3", "model.layers.12.block_sparse_moe.experts.73.w3", "model.layers.12.block_sparse_moe.experts.74.w3", "model.layers.12.block_sparse_moe.experts.75.w3", "model.layers.12.block_sparse_moe.experts.76.w3", "model.layers.12.block_sparse_moe.experts.77.w3", "model.layers.12.block_sparse_moe.experts.78.w3", "model.layers.12.block_sparse_moe.experts.79.w3", "model.layers.12.block_sparse_moe.experts.80.w3", "model.layers.12.block_sparse_moe.experts.81.w3", "model.layers.12.block_sparse_moe.experts.82.w3", "model.layers.12.block_sparse_moe.experts.83.w3", "model.layers.12.block_sparse_moe.experts.84.w3", "model.layers.12.block_sparse_moe.experts.85.w3", "model.layers.12.block_sparse_moe.experts.86.w3", "model.layers.12.block_sparse_moe.experts.87.w3", "model.layers.12.block_sparse_moe.experts.88.w3", "model.layers.12.block_sparse_moe.experts.89.w3", "model.layers.12.block_sparse_moe.experts.90.w3", "model.layers.12.block_sparse_moe.experts.91.w3", "model.layers.12.block_sparse_moe.experts.92.w3", "model.layers.12.block_sparse_moe.experts.93.w3", "model.layers.12.block_sparse_moe.experts.94.w3", "model.layers.12.block_sparse_moe.experts.95.w3", "model.layers.12.block_sparse_moe.experts.96.w3", "model.layers.12.block_sparse_moe.experts.97.w3", "model.layers.12.block_sparse_moe.experts.98.w3", "model.layers.12.block_sparse_moe.experts.99.w3", "model.layers.12.block_sparse_moe.experts.100.w3", "model.layers.12.block_sparse_moe.experts.101.w3", "model.layers.12.block_sparse_moe.experts.102.w3", "model.layers.12.block_sparse_moe.experts.103.w3", "model.layers.12.block_sparse_moe.experts.104.w3", "model.layers.12.block_sparse_moe.experts.105.w3", "model.layers.12.block_sparse_moe.experts.106.w3", "model.layers.12.block_sparse_moe.experts.107.w3", "model.layers.12.block_sparse_moe.experts.108.w3", "model.layers.12.block_sparse_moe.experts.109.w3", "model.layers.12.block_sparse_moe.experts.110.w3", "model.layers.12.block_sparse_moe.experts.111.w3", "model.layers.12.block_sparse_moe.experts.112.w3", "model.layers.12.block_sparse_moe.experts.113.w3", "model.layers.12.block_sparse_moe.experts.114.w3", "model.layers.12.block_sparse_moe.experts.115.w3", "model.layers.12.block_sparse_moe.experts.116.w3", "model.layers.12.block_sparse_moe.experts.117.w3", "model.layers.12.block_sparse_moe.experts.118.w3", "model.layers.12.block_sparse_moe.experts.119.w3", "model.layers.12.block_sparse_moe.experts.120.w3", "model.layers.12.block_sparse_moe.experts.121.w3", "model.layers.12.block_sparse_moe.experts.122.w3", "model.layers.12.block_sparse_moe.experts.123.w3", "model.layers.12.block_sparse_moe.experts.124.w3", "model.layers.12.block_sparse_moe.experts.125.w3", "model.layers.12.block_sparse_moe.experts.126.w3", "model.layers.12.block_sparse_moe.experts.127.w3", "model.layers.12.block_sparse_moe.experts.128.w3", "model.layers.12.block_sparse_moe.experts.129.w3", "model.layers.12.block_sparse_moe.experts.130.w3", "model.layers.12.block_sparse_moe.experts.131.w3", "model.layers.12.block_sparse_moe.experts.132.w3", "model.layers.12.block_sparse_moe.experts.133.w3", "model.layers.12.block_sparse_moe.experts.134.w3", "model.layers.12.block_sparse_moe.experts.135.w3", "model.layers.12.block_sparse_moe.experts.136.w3", "model.layers.12.block_sparse_moe.experts.137.w3", "model.layers.12.block_sparse_moe.experts.138.w3", "model.layers.12.block_sparse_moe.experts.139.w3", "model.layers.12.block_sparse_moe.experts.140.w3", "model.layers.12.block_sparse_moe.experts.141.w3", "model.layers.12.block_sparse_moe.experts.142.w3", "model.layers.12.block_sparse_moe.experts.143.w3", "model.layers.12.block_sparse_moe.experts.144.w3", "model.layers.12.block_sparse_moe.experts.145.w3", "model.layers.12.block_sparse_moe.experts.146.w3", "model.layers.12.block_sparse_moe.experts.147.w3", "model.layers.12.block_sparse_moe.experts.148.w3", "model.layers.12.block_sparse_moe.experts.149.w3", "model.layers.12.block_sparse_moe.experts.150.w3", "model.layers.12.block_sparse_moe.experts.151.w3", "model.layers.12.block_sparse_moe.experts.152.w3", "model.layers.12.block_sparse_moe.experts.153.w3", "model.layers.12.block_sparse_moe.experts.154.w3", "model.layers.12.block_sparse_moe.experts.155.w3", "model.layers.12.block_sparse_moe.experts.156.w3", "model.layers.12.block_sparse_moe.experts.157.w3", "model.layers.12.block_sparse_moe.experts.158.w3", "model.layers.12.block_sparse_moe.experts.159.w3", "model.layers.12.block_sparse_moe.experts.160.w3", "model.layers.12.block_sparse_moe.experts.161.w3", "model.layers.12.block_sparse_moe.experts.162.w3", "model.layers.12.block_sparse_moe.experts.163.w3", "model.layers.12.block_sparse_moe.experts.164.w3", "model.layers.12.block_sparse_moe.experts.165.w3", "model.layers.12.block_sparse_moe.experts.166.w3", "model.layers.12.block_sparse_moe.experts.167.w3", "model.layers.12.block_sparse_moe.experts.168.w3", "model.layers.12.block_sparse_moe.experts.169.w3", "model.layers.12.block_sparse_moe.experts.170.w3", "model.layers.12.block_sparse_moe.experts.171.w3", "model.layers.12.block_sparse_moe.experts.172.w3", "model.layers.12.block_sparse_moe.experts.173.w3", "model.layers.12.block_sparse_moe.experts.174.w3", "model.layers.12.block_sparse_moe.experts.175.w3", "model.layers.12.block_sparse_moe.experts.176.w3", "model.layers.12.block_sparse_moe.experts.177.w3", "model.layers.12.block_sparse_moe.experts.178.w3", "model.layers.12.block_sparse_moe.experts.179.w3", "model.layers.12.block_sparse_moe.experts.180.w3", "model.layers.12.block_sparse_moe.experts.181.w3", "model.layers.12.block_sparse_moe.experts.182.w3", "model.layers.12.block_sparse_moe.experts.183.w3", "model.layers.12.block_sparse_moe.experts.184.w3", "model.layers.12.block_sparse_moe.experts.185.w3", "model.layers.12.block_sparse_moe.experts.186.w3", "model.layers.12.block_sparse_moe.experts.187.w3", "model.layers.12.block_sparse_moe.experts.188.w3", "model.layers.12.block_sparse_moe.experts.189.w3", "model.layers.12.block_sparse_moe.experts.190.w3", "model.layers.12.block_sparse_moe.experts.191.w3", "model.layers.12.block_sparse_moe.experts.192.w3", "model.layers.12.block_sparse_moe.experts.193.w3", "model.layers.12.block_sparse_moe.experts.194.w3", "model.layers.12.block_sparse_moe.experts.195.w3", "model.layers.12.block_sparse_moe.experts.196.w3", "model.layers.12.block_sparse_moe.experts.197.w3", "model.layers.12.block_sparse_moe.experts.198.w3", "model.layers.12.block_sparse_moe.experts.199.w3", "model.layers.12.block_sparse_moe.experts.200.w3", "model.layers.12.block_sparse_moe.experts.201.w3", "model.layers.12.block_sparse_moe.experts.202.w3", "model.layers.12.block_sparse_moe.experts.203.w3", "model.layers.12.block_sparse_moe.experts.204.w3", "model.layers.12.block_sparse_moe.experts.205.w3", "model.layers.12.block_sparse_moe.experts.206.w3", "model.layers.12.block_sparse_moe.experts.207.w3", "model.layers.12.block_sparse_moe.experts.208.w3", "model.layers.12.block_sparse_moe.experts.209.w3", "model.layers.12.block_sparse_moe.experts.210.w3", "model.layers.12.block_sparse_moe.experts.211.w3", "model.layers.12.block_sparse_moe.experts.212.w3", "model.layers.12.block_sparse_moe.experts.213.w3", "model.layers.12.block_sparse_moe.experts.214.w3", "model.layers.12.block_sparse_moe.experts.215.w3", "model.layers.12.block_sparse_moe.experts.216.w3", "model.layers.12.block_sparse_moe.experts.217.w3", "model.layers.12.block_sparse_moe.experts.218.w3", "model.layers.12.block_sparse_moe.experts.219.w3", "model.layers.12.block_sparse_moe.experts.220.w3", "model.layers.12.block_sparse_moe.experts.221.w3", "model.layers.12.block_sparse_moe.experts.222.w3", "model.layers.12.block_sparse_moe.experts.223.w3", "model.layers.12.block_sparse_moe.experts.224.w3", "model.layers.12.block_sparse_moe.experts.225.w3", "model.layers.12.block_sparse_moe.experts.226.w3", "model.layers.12.block_sparse_moe.experts.227.w3", "model.layers.12.block_sparse_moe.experts.228.w3", "model.layers.12.block_sparse_moe.experts.229.w3", "model.layers.12.block_sparse_moe.experts.230.w3", "model.layers.12.block_sparse_moe.experts.231.w3", "model.layers.12.block_sparse_moe.experts.232.w3", "model.layers.12.block_sparse_moe.experts.233.w3", "model.layers.12.block_sparse_moe.experts.234.w3", "model.layers.12.block_sparse_moe.experts.235.w3", "model.layers.12.block_sparse_moe.experts.236.w3", "model.layers.12.block_sparse_moe.experts.237.w3", "model.layers.12.block_sparse_moe.experts.238.w3", "model.layers.12.block_sparse_moe.experts.239.w3", "model.layers.12.block_sparse_moe.experts.240.w3", "model.layers.12.block_sparse_moe.experts.241.w3", "model.layers.12.block_sparse_moe.experts.242.w3", "model.layers.12.block_sparse_moe.experts.243.w3", "model.layers.12.block_sparse_moe.experts.244.w3", "model.layers.12.block_sparse_moe.experts.245.w3", "model.layers.12.block_sparse_moe.experts.246.w3", "model.layers.12.block_sparse_moe.experts.247.w3", "model.layers.12.block_sparse_moe.experts.248.w3", "model.layers.12.block_sparse_moe.experts.249.w3", "model.layers.12.block_sparse_moe.experts.250.w3", "model.layers.12.block_sparse_moe.experts.251.w3", "model.layers.12.block_sparse_moe.experts.252.w3", "model.layers.12.block_sparse_moe.experts.253.w3", "model.layers.12.block_sparse_moe.experts.254.w3", "model.layers.12.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0004236806184053643, "dbits": 2415919104 } ] }, { "idx": 64, "layers": [ "model.layers.12.block_sparse_moe.experts.0.w2", "model.layers.12.block_sparse_moe.experts.1.w2", "model.layers.12.block_sparse_moe.experts.2.w2", "model.layers.12.block_sparse_moe.experts.3.w2", "model.layers.12.block_sparse_moe.experts.4.w2", "model.layers.12.block_sparse_moe.experts.5.w2", "model.layers.12.block_sparse_moe.experts.6.w2", "model.layers.12.block_sparse_moe.experts.7.w2", "model.layers.12.block_sparse_moe.experts.8.w2", "model.layers.12.block_sparse_moe.experts.9.w2", "model.layers.12.block_sparse_moe.experts.10.w2", "model.layers.12.block_sparse_moe.experts.11.w2", "model.layers.12.block_sparse_moe.experts.12.w2", "model.layers.12.block_sparse_moe.experts.13.w2", "model.layers.12.block_sparse_moe.experts.14.w2", "model.layers.12.block_sparse_moe.experts.15.w2", "model.layers.12.block_sparse_moe.experts.16.w2", "model.layers.12.block_sparse_moe.experts.17.w2", "model.layers.12.block_sparse_moe.experts.18.w2", "model.layers.12.block_sparse_moe.experts.19.w2", "model.layers.12.block_sparse_moe.experts.20.w2", "model.layers.12.block_sparse_moe.experts.21.w2", "model.layers.12.block_sparse_moe.experts.22.w2", "model.layers.12.block_sparse_moe.experts.23.w2", "model.layers.12.block_sparse_moe.experts.24.w2", "model.layers.12.block_sparse_moe.experts.25.w2", "model.layers.12.block_sparse_moe.experts.26.w2", "model.layers.12.block_sparse_moe.experts.27.w2", "model.layers.12.block_sparse_moe.experts.28.w2", "model.layers.12.block_sparse_moe.experts.29.w2", "model.layers.12.block_sparse_moe.experts.30.w2", "model.layers.12.block_sparse_moe.experts.31.w2", "model.layers.12.block_sparse_moe.experts.32.w2", "model.layers.12.block_sparse_moe.experts.33.w2", "model.layers.12.block_sparse_moe.experts.34.w2", "model.layers.12.block_sparse_moe.experts.35.w2", "model.layers.12.block_sparse_moe.experts.36.w2", "model.layers.12.block_sparse_moe.experts.37.w2", "model.layers.12.block_sparse_moe.experts.38.w2", "model.layers.12.block_sparse_moe.experts.39.w2", "model.layers.12.block_sparse_moe.experts.40.w2", "model.layers.12.block_sparse_moe.experts.41.w2", "model.layers.12.block_sparse_moe.experts.42.w2", "model.layers.12.block_sparse_moe.experts.43.w2", "model.layers.12.block_sparse_moe.experts.44.w2", "model.layers.12.block_sparse_moe.experts.45.w2", "model.layers.12.block_sparse_moe.experts.46.w2", "model.layers.12.block_sparse_moe.experts.47.w2", "model.layers.12.block_sparse_moe.experts.48.w2", "model.layers.12.block_sparse_moe.experts.49.w2", "model.layers.12.block_sparse_moe.experts.50.w2", "model.layers.12.block_sparse_moe.experts.51.w2", "model.layers.12.block_sparse_moe.experts.52.w2", "model.layers.12.block_sparse_moe.experts.53.w2", "model.layers.12.block_sparse_moe.experts.54.w2", "model.layers.12.block_sparse_moe.experts.55.w2", "model.layers.12.block_sparse_moe.experts.56.w2", "model.layers.12.block_sparse_moe.experts.57.w2", "model.layers.12.block_sparse_moe.experts.58.w2", "model.layers.12.block_sparse_moe.experts.59.w2", "model.layers.12.block_sparse_moe.experts.60.w2", "model.layers.12.block_sparse_moe.experts.61.w2", "model.layers.12.block_sparse_moe.experts.62.w2", "model.layers.12.block_sparse_moe.experts.63.w2", "model.layers.12.block_sparse_moe.experts.64.w2", "model.layers.12.block_sparse_moe.experts.65.w2", "model.layers.12.block_sparse_moe.experts.66.w2", "model.layers.12.block_sparse_moe.experts.67.w2", "model.layers.12.block_sparse_moe.experts.68.w2", "model.layers.12.block_sparse_moe.experts.69.w2", "model.layers.12.block_sparse_moe.experts.70.w2", "model.layers.12.block_sparse_moe.experts.71.w2", "model.layers.12.block_sparse_moe.experts.72.w2", "model.layers.12.block_sparse_moe.experts.73.w2", "model.layers.12.block_sparse_moe.experts.74.w2", "model.layers.12.block_sparse_moe.experts.75.w2", "model.layers.12.block_sparse_moe.experts.76.w2", "model.layers.12.block_sparse_moe.experts.77.w2", "model.layers.12.block_sparse_moe.experts.78.w2", "model.layers.12.block_sparse_moe.experts.79.w2", "model.layers.12.block_sparse_moe.experts.80.w2", "model.layers.12.block_sparse_moe.experts.81.w2", "model.layers.12.block_sparse_moe.experts.82.w2", "model.layers.12.block_sparse_moe.experts.83.w2", "model.layers.12.block_sparse_moe.experts.84.w2", "model.layers.12.block_sparse_moe.experts.85.w2", "model.layers.12.block_sparse_moe.experts.86.w2", "model.layers.12.block_sparse_moe.experts.87.w2", "model.layers.12.block_sparse_moe.experts.88.w2", "model.layers.12.block_sparse_moe.experts.89.w2", "model.layers.12.block_sparse_moe.experts.90.w2", "model.layers.12.block_sparse_moe.experts.91.w2", "model.layers.12.block_sparse_moe.experts.92.w2", "model.layers.12.block_sparse_moe.experts.93.w2", "model.layers.12.block_sparse_moe.experts.94.w2", "model.layers.12.block_sparse_moe.experts.95.w2", "model.layers.12.block_sparse_moe.experts.96.w2", "model.layers.12.block_sparse_moe.experts.97.w2", "model.layers.12.block_sparse_moe.experts.98.w2", "model.layers.12.block_sparse_moe.experts.99.w2", "model.layers.12.block_sparse_moe.experts.100.w2", "model.layers.12.block_sparse_moe.experts.101.w2", "model.layers.12.block_sparse_moe.experts.102.w2", "model.layers.12.block_sparse_moe.experts.103.w2", "model.layers.12.block_sparse_moe.experts.104.w2", "model.layers.12.block_sparse_moe.experts.105.w2", "model.layers.12.block_sparse_moe.experts.106.w2", "model.layers.12.block_sparse_moe.experts.107.w2", "model.layers.12.block_sparse_moe.experts.108.w2", "model.layers.12.block_sparse_moe.experts.109.w2", "model.layers.12.block_sparse_moe.experts.110.w2", "model.layers.12.block_sparse_moe.experts.111.w2", "model.layers.12.block_sparse_moe.experts.112.w2", "model.layers.12.block_sparse_moe.experts.113.w2", "model.layers.12.block_sparse_moe.experts.114.w2", "model.layers.12.block_sparse_moe.experts.115.w2", "model.layers.12.block_sparse_moe.experts.116.w2", "model.layers.12.block_sparse_moe.experts.117.w2", "model.layers.12.block_sparse_moe.experts.118.w2", "model.layers.12.block_sparse_moe.experts.119.w2", "model.layers.12.block_sparse_moe.experts.120.w2", "model.layers.12.block_sparse_moe.experts.121.w2", "model.layers.12.block_sparse_moe.experts.122.w2", "model.layers.12.block_sparse_moe.experts.123.w2", "model.layers.12.block_sparse_moe.experts.124.w2", "model.layers.12.block_sparse_moe.experts.125.w2", "model.layers.12.block_sparse_moe.experts.126.w2", "model.layers.12.block_sparse_moe.experts.127.w2", "model.layers.12.block_sparse_moe.experts.128.w2", "model.layers.12.block_sparse_moe.experts.129.w2", "model.layers.12.block_sparse_moe.experts.130.w2", "model.layers.12.block_sparse_moe.experts.131.w2", "model.layers.12.block_sparse_moe.experts.132.w2", "model.layers.12.block_sparse_moe.experts.133.w2", "model.layers.12.block_sparse_moe.experts.134.w2", "model.layers.12.block_sparse_moe.experts.135.w2", "model.layers.12.block_sparse_moe.experts.136.w2", "model.layers.12.block_sparse_moe.experts.137.w2", "model.layers.12.block_sparse_moe.experts.138.w2", "model.layers.12.block_sparse_moe.experts.139.w2", "model.layers.12.block_sparse_moe.experts.140.w2", "model.layers.12.block_sparse_moe.experts.141.w2", "model.layers.12.block_sparse_moe.experts.142.w2", "model.layers.12.block_sparse_moe.experts.143.w2", "model.layers.12.block_sparse_moe.experts.144.w2", "model.layers.12.block_sparse_moe.experts.145.w2", "model.layers.12.block_sparse_moe.experts.146.w2", "model.layers.12.block_sparse_moe.experts.147.w2", "model.layers.12.block_sparse_moe.experts.148.w2", "model.layers.12.block_sparse_moe.experts.149.w2", "model.layers.12.block_sparse_moe.experts.150.w2", "model.layers.12.block_sparse_moe.experts.151.w2", "model.layers.12.block_sparse_moe.experts.152.w2", "model.layers.12.block_sparse_moe.experts.153.w2", "model.layers.12.block_sparse_moe.experts.154.w2", "model.layers.12.block_sparse_moe.experts.155.w2", "model.layers.12.block_sparse_moe.experts.156.w2", "model.layers.12.block_sparse_moe.experts.157.w2", "model.layers.12.block_sparse_moe.experts.158.w2", "model.layers.12.block_sparse_moe.experts.159.w2", "model.layers.12.block_sparse_moe.experts.160.w2", "model.layers.12.block_sparse_moe.experts.161.w2", "model.layers.12.block_sparse_moe.experts.162.w2", "model.layers.12.block_sparse_moe.experts.163.w2", "model.layers.12.block_sparse_moe.experts.164.w2", "model.layers.12.block_sparse_moe.experts.165.w2", "model.layers.12.block_sparse_moe.experts.166.w2", "model.layers.12.block_sparse_moe.experts.167.w2", "model.layers.12.block_sparse_moe.experts.168.w2", "model.layers.12.block_sparse_moe.experts.169.w2", "model.layers.12.block_sparse_moe.experts.170.w2", "model.layers.12.block_sparse_moe.experts.171.w2", "model.layers.12.block_sparse_moe.experts.172.w2", "model.layers.12.block_sparse_moe.experts.173.w2", "model.layers.12.block_sparse_moe.experts.174.w2", "model.layers.12.block_sparse_moe.experts.175.w2", "model.layers.12.block_sparse_moe.experts.176.w2", "model.layers.12.block_sparse_moe.experts.177.w2", "model.layers.12.block_sparse_moe.experts.178.w2", "model.layers.12.block_sparse_moe.experts.179.w2", "model.layers.12.block_sparse_moe.experts.180.w2", "model.layers.12.block_sparse_moe.experts.181.w2", "model.layers.12.block_sparse_moe.experts.182.w2", "model.layers.12.block_sparse_moe.experts.183.w2", "model.layers.12.block_sparse_moe.experts.184.w2", "model.layers.12.block_sparse_moe.experts.185.w2", "model.layers.12.block_sparse_moe.experts.186.w2", "model.layers.12.block_sparse_moe.experts.187.w2", "model.layers.12.block_sparse_moe.experts.188.w2", "model.layers.12.block_sparse_moe.experts.189.w2", "model.layers.12.block_sparse_moe.experts.190.w2", "model.layers.12.block_sparse_moe.experts.191.w2", "model.layers.12.block_sparse_moe.experts.192.w2", "model.layers.12.block_sparse_moe.experts.193.w2", "model.layers.12.block_sparse_moe.experts.194.w2", "model.layers.12.block_sparse_moe.experts.195.w2", "model.layers.12.block_sparse_moe.experts.196.w2", "model.layers.12.block_sparse_moe.experts.197.w2", "model.layers.12.block_sparse_moe.experts.198.w2", "model.layers.12.block_sparse_moe.experts.199.w2", "model.layers.12.block_sparse_moe.experts.200.w2", "model.layers.12.block_sparse_moe.experts.201.w2", "model.layers.12.block_sparse_moe.experts.202.w2", "model.layers.12.block_sparse_moe.experts.203.w2", "model.layers.12.block_sparse_moe.experts.204.w2", "model.layers.12.block_sparse_moe.experts.205.w2", "model.layers.12.block_sparse_moe.experts.206.w2", "model.layers.12.block_sparse_moe.experts.207.w2", "model.layers.12.block_sparse_moe.experts.208.w2", "model.layers.12.block_sparse_moe.experts.209.w2", "model.layers.12.block_sparse_moe.experts.210.w2", "model.layers.12.block_sparse_moe.experts.211.w2", "model.layers.12.block_sparse_moe.experts.212.w2", "model.layers.12.block_sparse_moe.experts.213.w2", "model.layers.12.block_sparse_moe.experts.214.w2", "model.layers.12.block_sparse_moe.experts.215.w2", "model.layers.12.block_sparse_moe.experts.216.w2", "model.layers.12.block_sparse_moe.experts.217.w2", "model.layers.12.block_sparse_moe.experts.218.w2", "model.layers.12.block_sparse_moe.experts.219.w2", "model.layers.12.block_sparse_moe.experts.220.w2", "model.layers.12.block_sparse_moe.experts.221.w2", "model.layers.12.block_sparse_moe.experts.222.w2", "model.layers.12.block_sparse_moe.experts.223.w2", "model.layers.12.block_sparse_moe.experts.224.w2", "model.layers.12.block_sparse_moe.experts.225.w2", "model.layers.12.block_sparse_moe.experts.226.w2", "model.layers.12.block_sparse_moe.experts.227.w2", "model.layers.12.block_sparse_moe.experts.228.w2", "model.layers.12.block_sparse_moe.experts.229.w2", "model.layers.12.block_sparse_moe.experts.230.w2", "model.layers.12.block_sparse_moe.experts.231.w2", "model.layers.12.block_sparse_moe.experts.232.w2", "model.layers.12.block_sparse_moe.experts.233.w2", "model.layers.12.block_sparse_moe.experts.234.w2", "model.layers.12.block_sparse_moe.experts.235.w2", "model.layers.12.block_sparse_moe.experts.236.w2", "model.layers.12.block_sparse_moe.experts.237.w2", "model.layers.12.block_sparse_moe.experts.238.w2", "model.layers.12.block_sparse_moe.experts.239.w2", "model.layers.12.block_sparse_moe.experts.240.w2", "model.layers.12.block_sparse_moe.experts.241.w2", "model.layers.12.block_sparse_moe.experts.242.w2", "model.layers.12.block_sparse_moe.experts.243.w2", "model.layers.12.block_sparse_moe.experts.244.w2", "model.layers.12.block_sparse_moe.experts.245.w2", "model.layers.12.block_sparse_moe.experts.246.w2", "model.layers.12.block_sparse_moe.experts.247.w2", "model.layers.12.block_sparse_moe.experts.248.w2", "model.layers.12.block_sparse_moe.experts.249.w2", "model.layers.12.block_sparse_moe.experts.250.w2", "model.layers.12.block_sparse_moe.experts.251.w2", "model.layers.12.block_sparse_moe.experts.252.w2", "model.layers.12.block_sparse_moe.experts.253.w2", "model.layers.12.block_sparse_moe.experts.254.w2", "model.layers.12.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00045500509440898895, "dbits": 1207959552 } ] }, { "idx": 65, "layers": [ "model.layers.13.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0030866920948028342, "dbits": 18874368 } ] }, { "idx": 66, "layers": [ "model.layers.13.self_attn.k_proj", "model.layers.13.self_attn.v_proj" ], "candidates": [ { "dkld": 0.00235118009150026, "dbits": 6291456 } ] }, { "idx": 67, "layers": [ "model.layers.13.self_attn.o_proj" ], "candidates": [ { "dkld": 0.003997224941849675, "dbits": 18874368 } ] }, { "idx": 68, "layers": [ "model.layers.13.block_sparse_moe.experts.0.w1", "model.layers.13.block_sparse_moe.experts.1.w1", "model.layers.13.block_sparse_moe.experts.2.w1", "model.layers.13.block_sparse_moe.experts.3.w1", "model.layers.13.block_sparse_moe.experts.4.w1", "model.layers.13.block_sparse_moe.experts.5.w1", "model.layers.13.block_sparse_moe.experts.6.w1", "model.layers.13.block_sparse_moe.experts.7.w1", "model.layers.13.block_sparse_moe.experts.8.w1", "model.layers.13.block_sparse_moe.experts.9.w1", "model.layers.13.block_sparse_moe.experts.10.w1", "model.layers.13.block_sparse_moe.experts.11.w1", "model.layers.13.block_sparse_moe.experts.12.w1", "model.layers.13.block_sparse_moe.experts.13.w1", "model.layers.13.block_sparse_moe.experts.14.w1", "model.layers.13.block_sparse_moe.experts.15.w1", "model.layers.13.block_sparse_moe.experts.16.w1", "model.layers.13.block_sparse_moe.experts.17.w1", "model.layers.13.block_sparse_moe.experts.18.w1", "model.layers.13.block_sparse_moe.experts.19.w1", "model.layers.13.block_sparse_moe.experts.20.w1", "model.layers.13.block_sparse_moe.experts.21.w1", "model.layers.13.block_sparse_moe.experts.22.w1", "model.layers.13.block_sparse_moe.experts.23.w1", "model.layers.13.block_sparse_moe.experts.24.w1", "model.layers.13.block_sparse_moe.experts.25.w1", "model.layers.13.block_sparse_moe.experts.26.w1", "model.layers.13.block_sparse_moe.experts.27.w1", "model.layers.13.block_sparse_moe.experts.28.w1", "model.layers.13.block_sparse_moe.experts.29.w1", "model.layers.13.block_sparse_moe.experts.30.w1", "model.layers.13.block_sparse_moe.experts.31.w1", "model.layers.13.block_sparse_moe.experts.32.w1", "model.layers.13.block_sparse_moe.experts.33.w1", "model.layers.13.block_sparse_moe.experts.34.w1", "model.layers.13.block_sparse_moe.experts.35.w1", "model.layers.13.block_sparse_moe.experts.36.w1", "model.layers.13.block_sparse_moe.experts.37.w1", "model.layers.13.block_sparse_moe.experts.38.w1", "model.layers.13.block_sparse_moe.experts.39.w1", "model.layers.13.block_sparse_moe.experts.40.w1", "model.layers.13.block_sparse_moe.experts.41.w1", "model.layers.13.block_sparse_moe.experts.42.w1", "model.layers.13.block_sparse_moe.experts.43.w1", "model.layers.13.block_sparse_moe.experts.44.w1", "model.layers.13.block_sparse_moe.experts.45.w1", "model.layers.13.block_sparse_moe.experts.46.w1", "model.layers.13.block_sparse_moe.experts.47.w1", "model.layers.13.block_sparse_moe.experts.48.w1", "model.layers.13.block_sparse_moe.experts.49.w1", "model.layers.13.block_sparse_moe.experts.50.w1", "model.layers.13.block_sparse_moe.experts.51.w1", "model.layers.13.block_sparse_moe.experts.52.w1", "model.layers.13.block_sparse_moe.experts.53.w1", "model.layers.13.block_sparse_moe.experts.54.w1", "model.layers.13.block_sparse_moe.experts.55.w1", "model.layers.13.block_sparse_moe.experts.56.w1", "model.layers.13.block_sparse_moe.experts.57.w1", "model.layers.13.block_sparse_moe.experts.58.w1", "model.layers.13.block_sparse_moe.experts.59.w1", "model.layers.13.block_sparse_moe.experts.60.w1", "model.layers.13.block_sparse_moe.experts.61.w1", "model.layers.13.block_sparse_moe.experts.62.w1", "model.layers.13.block_sparse_moe.experts.63.w1", "model.layers.13.block_sparse_moe.experts.64.w1", "model.layers.13.block_sparse_moe.experts.65.w1", "model.layers.13.block_sparse_moe.experts.66.w1", "model.layers.13.block_sparse_moe.experts.67.w1", "model.layers.13.block_sparse_moe.experts.68.w1", "model.layers.13.block_sparse_moe.experts.69.w1", "model.layers.13.block_sparse_moe.experts.70.w1", "model.layers.13.block_sparse_moe.experts.71.w1", "model.layers.13.block_sparse_moe.experts.72.w1", "model.layers.13.block_sparse_moe.experts.73.w1", "model.layers.13.block_sparse_moe.experts.74.w1", "model.layers.13.block_sparse_moe.experts.75.w1", "model.layers.13.block_sparse_moe.experts.76.w1", "model.layers.13.block_sparse_moe.experts.77.w1", "model.layers.13.block_sparse_moe.experts.78.w1", "model.layers.13.block_sparse_moe.experts.79.w1", "model.layers.13.block_sparse_moe.experts.80.w1", "model.layers.13.block_sparse_moe.experts.81.w1", "model.layers.13.block_sparse_moe.experts.82.w1", "model.layers.13.block_sparse_moe.experts.83.w1", "model.layers.13.block_sparse_moe.experts.84.w1", "model.layers.13.block_sparse_moe.experts.85.w1", "model.layers.13.block_sparse_moe.experts.86.w1", "model.layers.13.block_sparse_moe.experts.87.w1", "model.layers.13.block_sparse_moe.experts.88.w1", "model.layers.13.block_sparse_moe.experts.89.w1", "model.layers.13.block_sparse_moe.experts.90.w1", "model.layers.13.block_sparse_moe.experts.91.w1", "model.layers.13.block_sparse_moe.experts.92.w1", "model.layers.13.block_sparse_moe.experts.93.w1", "model.layers.13.block_sparse_moe.experts.94.w1", "model.layers.13.block_sparse_moe.experts.95.w1", "model.layers.13.block_sparse_moe.experts.96.w1", "model.layers.13.block_sparse_moe.experts.97.w1", "model.layers.13.block_sparse_moe.experts.98.w1", "model.layers.13.block_sparse_moe.experts.99.w1", "model.layers.13.block_sparse_moe.experts.100.w1", "model.layers.13.block_sparse_moe.experts.101.w1", "model.layers.13.block_sparse_moe.experts.102.w1", "model.layers.13.block_sparse_moe.experts.103.w1", "model.layers.13.block_sparse_moe.experts.104.w1", "model.layers.13.block_sparse_moe.experts.105.w1", "model.layers.13.block_sparse_moe.experts.106.w1", "model.layers.13.block_sparse_moe.experts.107.w1", "model.layers.13.block_sparse_moe.experts.108.w1", "model.layers.13.block_sparse_moe.experts.109.w1", "model.layers.13.block_sparse_moe.experts.110.w1", "model.layers.13.block_sparse_moe.experts.111.w1", "model.layers.13.block_sparse_moe.experts.112.w1", "model.layers.13.block_sparse_moe.experts.113.w1", "model.layers.13.block_sparse_moe.experts.114.w1", "model.layers.13.block_sparse_moe.experts.115.w1", "model.layers.13.block_sparse_moe.experts.116.w1", "model.layers.13.block_sparse_moe.experts.117.w1", "model.layers.13.block_sparse_moe.experts.118.w1", "model.layers.13.block_sparse_moe.experts.119.w1", "model.layers.13.block_sparse_moe.experts.120.w1", "model.layers.13.block_sparse_moe.experts.121.w1", "model.layers.13.block_sparse_moe.experts.122.w1", "model.layers.13.block_sparse_moe.experts.123.w1", "model.layers.13.block_sparse_moe.experts.124.w1", "model.layers.13.block_sparse_moe.experts.125.w1", "model.layers.13.block_sparse_moe.experts.126.w1", "model.layers.13.block_sparse_moe.experts.127.w1", "model.layers.13.block_sparse_moe.experts.128.w1", "model.layers.13.block_sparse_moe.experts.129.w1", "model.layers.13.block_sparse_moe.experts.130.w1", "model.layers.13.block_sparse_moe.experts.131.w1", "model.layers.13.block_sparse_moe.experts.132.w1", "model.layers.13.block_sparse_moe.experts.133.w1", "model.layers.13.block_sparse_moe.experts.134.w1", "model.layers.13.block_sparse_moe.experts.135.w1", "model.layers.13.block_sparse_moe.experts.136.w1", "model.layers.13.block_sparse_moe.experts.137.w1", "model.layers.13.block_sparse_moe.experts.138.w1", "model.layers.13.block_sparse_moe.experts.139.w1", "model.layers.13.block_sparse_moe.experts.140.w1", "model.layers.13.block_sparse_moe.experts.141.w1", "model.layers.13.block_sparse_moe.experts.142.w1", "model.layers.13.block_sparse_moe.experts.143.w1", "model.layers.13.block_sparse_moe.experts.144.w1", "model.layers.13.block_sparse_moe.experts.145.w1", "model.layers.13.block_sparse_moe.experts.146.w1", "model.layers.13.block_sparse_moe.experts.147.w1", "model.layers.13.block_sparse_moe.experts.148.w1", "model.layers.13.block_sparse_moe.experts.149.w1", "model.layers.13.block_sparse_moe.experts.150.w1", "model.layers.13.block_sparse_moe.experts.151.w1", "model.layers.13.block_sparse_moe.experts.152.w1", "model.layers.13.block_sparse_moe.experts.153.w1", "model.layers.13.block_sparse_moe.experts.154.w1", "model.layers.13.block_sparse_moe.experts.155.w1", "model.layers.13.block_sparse_moe.experts.156.w1", "model.layers.13.block_sparse_moe.experts.157.w1", "model.layers.13.block_sparse_moe.experts.158.w1", "model.layers.13.block_sparse_moe.experts.159.w1", "model.layers.13.block_sparse_moe.experts.160.w1", "model.layers.13.block_sparse_moe.experts.161.w1", "model.layers.13.block_sparse_moe.experts.162.w1", "model.layers.13.block_sparse_moe.experts.163.w1", "model.layers.13.block_sparse_moe.experts.164.w1", "model.layers.13.block_sparse_moe.experts.165.w1", "model.layers.13.block_sparse_moe.experts.166.w1", "model.layers.13.block_sparse_moe.experts.167.w1", "model.layers.13.block_sparse_moe.experts.168.w1", "model.layers.13.block_sparse_moe.experts.169.w1", "model.layers.13.block_sparse_moe.experts.170.w1", "model.layers.13.block_sparse_moe.experts.171.w1", "model.layers.13.block_sparse_moe.experts.172.w1", "model.layers.13.block_sparse_moe.experts.173.w1", "model.layers.13.block_sparse_moe.experts.174.w1", "model.layers.13.block_sparse_moe.experts.175.w1", "model.layers.13.block_sparse_moe.experts.176.w1", "model.layers.13.block_sparse_moe.experts.177.w1", "model.layers.13.block_sparse_moe.experts.178.w1", "model.layers.13.block_sparse_moe.experts.179.w1", "model.layers.13.block_sparse_moe.experts.180.w1", "model.layers.13.block_sparse_moe.experts.181.w1", "model.layers.13.block_sparse_moe.experts.182.w1", "model.layers.13.block_sparse_moe.experts.183.w1", "model.layers.13.block_sparse_moe.experts.184.w1", "model.layers.13.block_sparse_moe.experts.185.w1", "model.layers.13.block_sparse_moe.experts.186.w1", "model.layers.13.block_sparse_moe.experts.187.w1", "model.layers.13.block_sparse_moe.experts.188.w1", "model.layers.13.block_sparse_moe.experts.189.w1", "model.layers.13.block_sparse_moe.experts.190.w1", "model.layers.13.block_sparse_moe.experts.191.w1", "model.layers.13.block_sparse_moe.experts.192.w1", "model.layers.13.block_sparse_moe.experts.193.w1", "model.layers.13.block_sparse_moe.experts.194.w1", "model.layers.13.block_sparse_moe.experts.195.w1", "model.layers.13.block_sparse_moe.experts.196.w1", "model.layers.13.block_sparse_moe.experts.197.w1", "model.layers.13.block_sparse_moe.experts.198.w1", "model.layers.13.block_sparse_moe.experts.199.w1", "model.layers.13.block_sparse_moe.experts.200.w1", "model.layers.13.block_sparse_moe.experts.201.w1", "model.layers.13.block_sparse_moe.experts.202.w1", "model.layers.13.block_sparse_moe.experts.203.w1", "model.layers.13.block_sparse_moe.experts.204.w1", "model.layers.13.block_sparse_moe.experts.205.w1", "model.layers.13.block_sparse_moe.experts.206.w1", "model.layers.13.block_sparse_moe.experts.207.w1", "model.layers.13.block_sparse_moe.experts.208.w1", "model.layers.13.block_sparse_moe.experts.209.w1", "model.layers.13.block_sparse_moe.experts.210.w1", "model.layers.13.block_sparse_moe.experts.211.w1", "model.layers.13.block_sparse_moe.experts.212.w1", "model.layers.13.block_sparse_moe.experts.213.w1", "model.layers.13.block_sparse_moe.experts.214.w1", "model.layers.13.block_sparse_moe.experts.215.w1", "model.layers.13.block_sparse_moe.experts.216.w1", "model.layers.13.block_sparse_moe.experts.217.w1", "model.layers.13.block_sparse_moe.experts.218.w1", "model.layers.13.block_sparse_moe.experts.219.w1", "model.layers.13.block_sparse_moe.experts.220.w1", "model.layers.13.block_sparse_moe.experts.221.w1", "model.layers.13.block_sparse_moe.experts.222.w1", "model.layers.13.block_sparse_moe.experts.223.w1", "model.layers.13.block_sparse_moe.experts.224.w1", "model.layers.13.block_sparse_moe.experts.225.w1", "model.layers.13.block_sparse_moe.experts.226.w1", "model.layers.13.block_sparse_moe.experts.227.w1", "model.layers.13.block_sparse_moe.experts.228.w1", "model.layers.13.block_sparse_moe.experts.229.w1", "model.layers.13.block_sparse_moe.experts.230.w1", "model.layers.13.block_sparse_moe.experts.231.w1", "model.layers.13.block_sparse_moe.experts.232.w1", "model.layers.13.block_sparse_moe.experts.233.w1", "model.layers.13.block_sparse_moe.experts.234.w1", "model.layers.13.block_sparse_moe.experts.235.w1", "model.layers.13.block_sparse_moe.experts.236.w1", "model.layers.13.block_sparse_moe.experts.237.w1", "model.layers.13.block_sparse_moe.experts.238.w1", "model.layers.13.block_sparse_moe.experts.239.w1", "model.layers.13.block_sparse_moe.experts.240.w1", "model.layers.13.block_sparse_moe.experts.241.w1", "model.layers.13.block_sparse_moe.experts.242.w1", "model.layers.13.block_sparse_moe.experts.243.w1", "model.layers.13.block_sparse_moe.experts.244.w1", "model.layers.13.block_sparse_moe.experts.245.w1", "model.layers.13.block_sparse_moe.experts.246.w1", "model.layers.13.block_sparse_moe.experts.247.w1", "model.layers.13.block_sparse_moe.experts.248.w1", "model.layers.13.block_sparse_moe.experts.249.w1", "model.layers.13.block_sparse_moe.experts.250.w1", "model.layers.13.block_sparse_moe.experts.251.w1", "model.layers.13.block_sparse_moe.experts.252.w1", "model.layers.13.block_sparse_moe.experts.253.w1", "model.layers.13.block_sparse_moe.experts.254.w1", "model.layers.13.block_sparse_moe.experts.255.w1", "model.layers.13.block_sparse_moe.experts.0.w3", "model.layers.13.block_sparse_moe.experts.1.w3", "model.layers.13.block_sparse_moe.experts.2.w3", "model.layers.13.block_sparse_moe.experts.3.w3", "model.layers.13.block_sparse_moe.experts.4.w3", "model.layers.13.block_sparse_moe.experts.5.w3", "model.layers.13.block_sparse_moe.experts.6.w3", "model.layers.13.block_sparse_moe.experts.7.w3", "model.layers.13.block_sparse_moe.experts.8.w3", "model.layers.13.block_sparse_moe.experts.9.w3", "model.layers.13.block_sparse_moe.experts.10.w3", "model.layers.13.block_sparse_moe.experts.11.w3", "model.layers.13.block_sparse_moe.experts.12.w3", "model.layers.13.block_sparse_moe.experts.13.w3", "model.layers.13.block_sparse_moe.experts.14.w3", "model.layers.13.block_sparse_moe.experts.15.w3", "model.layers.13.block_sparse_moe.experts.16.w3", "model.layers.13.block_sparse_moe.experts.17.w3", "model.layers.13.block_sparse_moe.experts.18.w3", "model.layers.13.block_sparse_moe.experts.19.w3", "model.layers.13.block_sparse_moe.experts.20.w3", "model.layers.13.block_sparse_moe.experts.21.w3", "model.layers.13.block_sparse_moe.experts.22.w3", "model.layers.13.block_sparse_moe.experts.23.w3", "model.layers.13.block_sparse_moe.experts.24.w3", "model.layers.13.block_sparse_moe.experts.25.w3", "model.layers.13.block_sparse_moe.experts.26.w3", "model.layers.13.block_sparse_moe.experts.27.w3", "model.layers.13.block_sparse_moe.experts.28.w3", "model.layers.13.block_sparse_moe.experts.29.w3", "model.layers.13.block_sparse_moe.experts.30.w3", "model.layers.13.block_sparse_moe.experts.31.w3", "model.layers.13.block_sparse_moe.experts.32.w3", "model.layers.13.block_sparse_moe.experts.33.w3", "model.layers.13.block_sparse_moe.experts.34.w3", "model.layers.13.block_sparse_moe.experts.35.w3", "model.layers.13.block_sparse_moe.experts.36.w3", "model.layers.13.block_sparse_moe.experts.37.w3", "model.layers.13.block_sparse_moe.experts.38.w3", "model.layers.13.block_sparse_moe.experts.39.w3", "model.layers.13.block_sparse_moe.experts.40.w3", "model.layers.13.block_sparse_moe.experts.41.w3", "model.layers.13.block_sparse_moe.experts.42.w3", "model.layers.13.block_sparse_moe.experts.43.w3", "model.layers.13.block_sparse_moe.experts.44.w3", "model.layers.13.block_sparse_moe.experts.45.w3", "model.layers.13.block_sparse_moe.experts.46.w3", "model.layers.13.block_sparse_moe.experts.47.w3", "model.layers.13.block_sparse_moe.experts.48.w3", "model.layers.13.block_sparse_moe.experts.49.w3", "model.layers.13.block_sparse_moe.experts.50.w3", "model.layers.13.block_sparse_moe.experts.51.w3", "model.layers.13.block_sparse_moe.experts.52.w3", "model.layers.13.block_sparse_moe.experts.53.w3", "model.layers.13.block_sparse_moe.experts.54.w3", "model.layers.13.block_sparse_moe.experts.55.w3", "model.layers.13.block_sparse_moe.experts.56.w3", "model.layers.13.block_sparse_moe.experts.57.w3", "model.layers.13.block_sparse_moe.experts.58.w3", "model.layers.13.block_sparse_moe.experts.59.w3", "model.layers.13.block_sparse_moe.experts.60.w3", "model.layers.13.block_sparse_moe.experts.61.w3", "model.layers.13.block_sparse_moe.experts.62.w3", "model.layers.13.block_sparse_moe.experts.63.w3", "model.layers.13.block_sparse_moe.experts.64.w3", "model.layers.13.block_sparse_moe.experts.65.w3", "model.layers.13.block_sparse_moe.experts.66.w3", "model.layers.13.block_sparse_moe.experts.67.w3", "model.layers.13.block_sparse_moe.experts.68.w3", "model.layers.13.block_sparse_moe.experts.69.w3", "model.layers.13.block_sparse_moe.experts.70.w3", "model.layers.13.block_sparse_moe.experts.71.w3", "model.layers.13.block_sparse_moe.experts.72.w3", "model.layers.13.block_sparse_moe.experts.73.w3", "model.layers.13.block_sparse_moe.experts.74.w3", "model.layers.13.block_sparse_moe.experts.75.w3", "model.layers.13.block_sparse_moe.experts.76.w3", "model.layers.13.block_sparse_moe.experts.77.w3", "model.layers.13.block_sparse_moe.experts.78.w3", "model.layers.13.block_sparse_moe.experts.79.w3", "model.layers.13.block_sparse_moe.experts.80.w3", "model.layers.13.block_sparse_moe.experts.81.w3", "model.layers.13.block_sparse_moe.experts.82.w3", "model.layers.13.block_sparse_moe.experts.83.w3", "model.layers.13.block_sparse_moe.experts.84.w3", "model.layers.13.block_sparse_moe.experts.85.w3", "model.layers.13.block_sparse_moe.experts.86.w3", "model.layers.13.block_sparse_moe.experts.87.w3", "model.layers.13.block_sparse_moe.experts.88.w3", "model.layers.13.block_sparse_moe.experts.89.w3", "model.layers.13.block_sparse_moe.experts.90.w3", "model.layers.13.block_sparse_moe.experts.91.w3", "model.layers.13.block_sparse_moe.experts.92.w3", "model.layers.13.block_sparse_moe.experts.93.w3", "model.layers.13.block_sparse_moe.experts.94.w3", "model.layers.13.block_sparse_moe.experts.95.w3", "model.layers.13.block_sparse_moe.experts.96.w3", "model.layers.13.block_sparse_moe.experts.97.w3", "model.layers.13.block_sparse_moe.experts.98.w3", "model.layers.13.block_sparse_moe.experts.99.w3", "model.layers.13.block_sparse_moe.experts.100.w3", "model.layers.13.block_sparse_moe.experts.101.w3", "model.layers.13.block_sparse_moe.experts.102.w3", "model.layers.13.block_sparse_moe.experts.103.w3", "model.layers.13.block_sparse_moe.experts.104.w3", "model.layers.13.block_sparse_moe.experts.105.w3", "model.layers.13.block_sparse_moe.experts.106.w3", "model.layers.13.block_sparse_moe.experts.107.w3", "model.layers.13.block_sparse_moe.experts.108.w3", "model.layers.13.block_sparse_moe.experts.109.w3", "model.layers.13.block_sparse_moe.experts.110.w3", "model.layers.13.block_sparse_moe.experts.111.w3", "model.layers.13.block_sparse_moe.experts.112.w3", "model.layers.13.block_sparse_moe.experts.113.w3", "model.layers.13.block_sparse_moe.experts.114.w3", "model.layers.13.block_sparse_moe.experts.115.w3", "model.layers.13.block_sparse_moe.experts.116.w3", "model.layers.13.block_sparse_moe.experts.117.w3", "model.layers.13.block_sparse_moe.experts.118.w3", "model.layers.13.block_sparse_moe.experts.119.w3", "model.layers.13.block_sparse_moe.experts.120.w3", "model.layers.13.block_sparse_moe.experts.121.w3", "model.layers.13.block_sparse_moe.experts.122.w3", "model.layers.13.block_sparse_moe.experts.123.w3", "model.layers.13.block_sparse_moe.experts.124.w3", "model.layers.13.block_sparse_moe.experts.125.w3", "model.layers.13.block_sparse_moe.experts.126.w3", "model.layers.13.block_sparse_moe.experts.127.w3", "model.layers.13.block_sparse_moe.experts.128.w3", "model.layers.13.block_sparse_moe.experts.129.w3", "model.layers.13.block_sparse_moe.experts.130.w3", "model.layers.13.block_sparse_moe.experts.131.w3", "model.layers.13.block_sparse_moe.experts.132.w3", "model.layers.13.block_sparse_moe.experts.133.w3", "model.layers.13.block_sparse_moe.experts.134.w3", "model.layers.13.block_sparse_moe.experts.135.w3", "model.layers.13.block_sparse_moe.experts.136.w3", "model.layers.13.block_sparse_moe.experts.137.w3", "model.layers.13.block_sparse_moe.experts.138.w3", "model.layers.13.block_sparse_moe.experts.139.w3", "model.layers.13.block_sparse_moe.experts.140.w3", "model.layers.13.block_sparse_moe.experts.141.w3", "model.layers.13.block_sparse_moe.experts.142.w3", "model.layers.13.block_sparse_moe.experts.143.w3", "model.layers.13.block_sparse_moe.experts.144.w3", "model.layers.13.block_sparse_moe.experts.145.w3", "model.layers.13.block_sparse_moe.experts.146.w3", "model.layers.13.block_sparse_moe.experts.147.w3", "model.layers.13.block_sparse_moe.experts.148.w3", "model.layers.13.block_sparse_moe.experts.149.w3", "model.layers.13.block_sparse_moe.experts.150.w3", "model.layers.13.block_sparse_moe.experts.151.w3", "model.layers.13.block_sparse_moe.experts.152.w3", "model.layers.13.block_sparse_moe.experts.153.w3", "model.layers.13.block_sparse_moe.experts.154.w3", "model.layers.13.block_sparse_moe.experts.155.w3", "model.layers.13.block_sparse_moe.experts.156.w3", "model.layers.13.block_sparse_moe.experts.157.w3", "model.layers.13.block_sparse_moe.experts.158.w3", "model.layers.13.block_sparse_moe.experts.159.w3", "model.layers.13.block_sparse_moe.experts.160.w3", "model.layers.13.block_sparse_moe.experts.161.w3", "model.layers.13.block_sparse_moe.experts.162.w3", "model.layers.13.block_sparse_moe.experts.163.w3", "model.layers.13.block_sparse_moe.experts.164.w3", "model.layers.13.block_sparse_moe.experts.165.w3", "model.layers.13.block_sparse_moe.experts.166.w3", "model.layers.13.block_sparse_moe.experts.167.w3", "model.layers.13.block_sparse_moe.experts.168.w3", "model.layers.13.block_sparse_moe.experts.169.w3", "model.layers.13.block_sparse_moe.experts.170.w3", "model.layers.13.block_sparse_moe.experts.171.w3", "model.layers.13.block_sparse_moe.experts.172.w3", "model.layers.13.block_sparse_moe.experts.173.w3", "model.layers.13.block_sparse_moe.experts.174.w3", "model.layers.13.block_sparse_moe.experts.175.w3", "model.layers.13.block_sparse_moe.experts.176.w3", "model.layers.13.block_sparse_moe.experts.177.w3", "model.layers.13.block_sparse_moe.experts.178.w3", "model.layers.13.block_sparse_moe.experts.179.w3", "model.layers.13.block_sparse_moe.experts.180.w3", "model.layers.13.block_sparse_moe.experts.181.w3", "model.layers.13.block_sparse_moe.experts.182.w3", "model.layers.13.block_sparse_moe.experts.183.w3", "model.layers.13.block_sparse_moe.experts.184.w3", "model.layers.13.block_sparse_moe.experts.185.w3", "model.layers.13.block_sparse_moe.experts.186.w3", "model.layers.13.block_sparse_moe.experts.187.w3", "model.layers.13.block_sparse_moe.experts.188.w3", "model.layers.13.block_sparse_moe.experts.189.w3", "model.layers.13.block_sparse_moe.experts.190.w3", "model.layers.13.block_sparse_moe.experts.191.w3", "model.layers.13.block_sparse_moe.experts.192.w3", "model.layers.13.block_sparse_moe.experts.193.w3", "model.layers.13.block_sparse_moe.experts.194.w3", "model.layers.13.block_sparse_moe.experts.195.w3", "model.layers.13.block_sparse_moe.experts.196.w3", "model.layers.13.block_sparse_moe.experts.197.w3", "model.layers.13.block_sparse_moe.experts.198.w3", "model.layers.13.block_sparse_moe.experts.199.w3", "model.layers.13.block_sparse_moe.experts.200.w3", "model.layers.13.block_sparse_moe.experts.201.w3", "model.layers.13.block_sparse_moe.experts.202.w3", "model.layers.13.block_sparse_moe.experts.203.w3", "model.layers.13.block_sparse_moe.experts.204.w3", "model.layers.13.block_sparse_moe.experts.205.w3", "model.layers.13.block_sparse_moe.experts.206.w3", "model.layers.13.block_sparse_moe.experts.207.w3", "model.layers.13.block_sparse_moe.experts.208.w3", "model.layers.13.block_sparse_moe.experts.209.w3", "model.layers.13.block_sparse_moe.experts.210.w3", "model.layers.13.block_sparse_moe.experts.211.w3", "model.layers.13.block_sparse_moe.experts.212.w3", "model.layers.13.block_sparse_moe.experts.213.w3", "model.layers.13.block_sparse_moe.experts.214.w3", "model.layers.13.block_sparse_moe.experts.215.w3", "model.layers.13.block_sparse_moe.experts.216.w3", "model.layers.13.block_sparse_moe.experts.217.w3", "model.layers.13.block_sparse_moe.experts.218.w3", "model.layers.13.block_sparse_moe.experts.219.w3", "model.layers.13.block_sparse_moe.experts.220.w3", "model.layers.13.block_sparse_moe.experts.221.w3", "model.layers.13.block_sparse_moe.experts.222.w3", "model.layers.13.block_sparse_moe.experts.223.w3", "model.layers.13.block_sparse_moe.experts.224.w3", "model.layers.13.block_sparse_moe.experts.225.w3", "model.layers.13.block_sparse_moe.experts.226.w3", "model.layers.13.block_sparse_moe.experts.227.w3", "model.layers.13.block_sparse_moe.experts.228.w3", "model.layers.13.block_sparse_moe.experts.229.w3", "model.layers.13.block_sparse_moe.experts.230.w3", "model.layers.13.block_sparse_moe.experts.231.w3", "model.layers.13.block_sparse_moe.experts.232.w3", "model.layers.13.block_sparse_moe.experts.233.w3", "model.layers.13.block_sparse_moe.experts.234.w3", "model.layers.13.block_sparse_moe.experts.235.w3", "model.layers.13.block_sparse_moe.experts.236.w3", "model.layers.13.block_sparse_moe.experts.237.w3", "model.layers.13.block_sparse_moe.experts.238.w3", "model.layers.13.block_sparse_moe.experts.239.w3", "model.layers.13.block_sparse_moe.experts.240.w3", "model.layers.13.block_sparse_moe.experts.241.w3", "model.layers.13.block_sparse_moe.experts.242.w3", "model.layers.13.block_sparse_moe.experts.243.w3", "model.layers.13.block_sparse_moe.experts.244.w3", "model.layers.13.block_sparse_moe.experts.245.w3", "model.layers.13.block_sparse_moe.experts.246.w3", "model.layers.13.block_sparse_moe.experts.247.w3", "model.layers.13.block_sparse_moe.experts.248.w3", "model.layers.13.block_sparse_moe.experts.249.w3", "model.layers.13.block_sparse_moe.experts.250.w3", "model.layers.13.block_sparse_moe.experts.251.w3", "model.layers.13.block_sparse_moe.experts.252.w3", "model.layers.13.block_sparse_moe.experts.253.w3", "model.layers.13.block_sparse_moe.experts.254.w3", "model.layers.13.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.001569184660911549, "dbits": 2415919104 } ] }, { "idx": 69, "layers": [ "model.layers.13.block_sparse_moe.experts.0.w2", "model.layers.13.block_sparse_moe.experts.1.w2", "model.layers.13.block_sparse_moe.experts.2.w2", "model.layers.13.block_sparse_moe.experts.3.w2", "model.layers.13.block_sparse_moe.experts.4.w2", "model.layers.13.block_sparse_moe.experts.5.w2", "model.layers.13.block_sparse_moe.experts.6.w2", "model.layers.13.block_sparse_moe.experts.7.w2", "model.layers.13.block_sparse_moe.experts.8.w2", "model.layers.13.block_sparse_moe.experts.9.w2", "model.layers.13.block_sparse_moe.experts.10.w2", "model.layers.13.block_sparse_moe.experts.11.w2", "model.layers.13.block_sparse_moe.experts.12.w2", "model.layers.13.block_sparse_moe.experts.13.w2", "model.layers.13.block_sparse_moe.experts.14.w2", "model.layers.13.block_sparse_moe.experts.15.w2", "model.layers.13.block_sparse_moe.experts.16.w2", "model.layers.13.block_sparse_moe.experts.17.w2", "model.layers.13.block_sparse_moe.experts.18.w2", "model.layers.13.block_sparse_moe.experts.19.w2", "model.layers.13.block_sparse_moe.experts.20.w2", "model.layers.13.block_sparse_moe.experts.21.w2", "model.layers.13.block_sparse_moe.experts.22.w2", "model.layers.13.block_sparse_moe.experts.23.w2", "model.layers.13.block_sparse_moe.experts.24.w2", "model.layers.13.block_sparse_moe.experts.25.w2", "model.layers.13.block_sparse_moe.experts.26.w2", "model.layers.13.block_sparse_moe.experts.27.w2", "model.layers.13.block_sparse_moe.experts.28.w2", "model.layers.13.block_sparse_moe.experts.29.w2", "model.layers.13.block_sparse_moe.experts.30.w2", "model.layers.13.block_sparse_moe.experts.31.w2", "model.layers.13.block_sparse_moe.experts.32.w2", "model.layers.13.block_sparse_moe.experts.33.w2", "model.layers.13.block_sparse_moe.experts.34.w2", "model.layers.13.block_sparse_moe.experts.35.w2", "model.layers.13.block_sparse_moe.experts.36.w2", "model.layers.13.block_sparse_moe.experts.37.w2", "model.layers.13.block_sparse_moe.experts.38.w2", "model.layers.13.block_sparse_moe.experts.39.w2", "model.layers.13.block_sparse_moe.experts.40.w2", "model.layers.13.block_sparse_moe.experts.41.w2", "model.layers.13.block_sparse_moe.experts.42.w2", "model.layers.13.block_sparse_moe.experts.43.w2", "model.layers.13.block_sparse_moe.experts.44.w2", "model.layers.13.block_sparse_moe.experts.45.w2", "model.layers.13.block_sparse_moe.experts.46.w2", "model.layers.13.block_sparse_moe.experts.47.w2", "model.layers.13.block_sparse_moe.experts.48.w2", "model.layers.13.block_sparse_moe.experts.49.w2", "model.layers.13.block_sparse_moe.experts.50.w2", "model.layers.13.block_sparse_moe.experts.51.w2", "model.layers.13.block_sparse_moe.experts.52.w2", "model.layers.13.block_sparse_moe.experts.53.w2", "model.layers.13.block_sparse_moe.experts.54.w2", "model.layers.13.block_sparse_moe.experts.55.w2", "model.layers.13.block_sparse_moe.experts.56.w2", "model.layers.13.block_sparse_moe.experts.57.w2", "model.layers.13.block_sparse_moe.experts.58.w2", "model.layers.13.block_sparse_moe.experts.59.w2", "model.layers.13.block_sparse_moe.experts.60.w2", "model.layers.13.block_sparse_moe.experts.61.w2", "model.layers.13.block_sparse_moe.experts.62.w2", "model.layers.13.block_sparse_moe.experts.63.w2", "model.layers.13.block_sparse_moe.experts.64.w2", "model.layers.13.block_sparse_moe.experts.65.w2", "model.layers.13.block_sparse_moe.experts.66.w2", "model.layers.13.block_sparse_moe.experts.67.w2", "model.layers.13.block_sparse_moe.experts.68.w2", "model.layers.13.block_sparse_moe.experts.69.w2", "model.layers.13.block_sparse_moe.experts.70.w2", "model.layers.13.block_sparse_moe.experts.71.w2", "model.layers.13.block_sparse_moe.experts.72.w2", "model.layers.13.block_sparse_moe.experts.73.w2", "model.layers.13.block_sparse_moe.experts.74.w2", "model.layers.13.block_sparse_moe.experts.75.w2", "model.layers.13.block_sparse_moe.experts.76.w2", "model.layers.13.block_sparse_moe.experts.77.w2", "model.layers.13.block_sparse_moe.experts.78.w2", "model.layers.13.block_sparse_moe.experts.79.w2", "model.layers.13.block_sparse_moe.experts.80.w2", "model.layers.13.block_sparse_moe.experts.81.w2", "model.layers.13.block_sparse_moe.experts.82.w2", "model.layers.13.block_sparse_moe.experts.83.w2", "model.layers.13.block_sparse_moe.experts.84.w2", "model.layers.13.block_sparse_moe.experts.85.w2", "model.layers.13.block_sparse_moe.experts.86.w2", "model.layers.13.block_sparse_moe.experts.87.w2", "model.layers.13.block_sparse_moe.experts.88.w2", "model.layers.13.block_sparse_moe.experts.89.w2", "model.layers.13.block_sparse_moe.experts.90.w2", "model.layers.13.block_sparse_moe.experts.91.w2", "model.layers.13.block_sparse_moe.experts.92.w2", "model.layers.13.block_sparse_moe.experts.93.w2", "model.layers.13.block_sparse_moe.experts.94.w2", "model.layers.13.block_sparse_moe.experts.95.w2", "model.layers.13.block_sparse_moe.experts.96.w2", "model.layers.13.block_sparse_moe.experts.97.w2", "model.layers.13.block_sparse_moe.experts.98.w2", "model.layers.13.block_sparse_moe.experts.99.w2", "model.layers.13.block_sparse_moe.experts.100.w2", "model.layers.13.block_sparse_moe.experts.101.w2", "model.layers.13.block_sparse_moe.experts.102.w2", "model.layers.13.block_sparse_moe.experts.103.w2", "model.layers.13.block_sparse_moe.experts.104.w2", "model.layers.13.block_sparse_moe.experts.105.w2", "model.layers.13.block_sparse_moe.experts.106.w2", "model.layers.13.block_sparse_moe.experts.107.w2", "model.layers.13.block_sparse_moe.experts.108.w2", "model.layers.13.block_sparse_moe.experts.109.w2", "model.layers.13.block_sparse_moe.experts.110.w2", "model.layers.13.block_sparse_moe.experts.111.w2", "model.layers.13.block_sparse_moe.experts.112.w2", "model.layers.13.block_sparse_moe.experts.113.w2", "model.layers.13.block_sparse_moe.experts.114.w2", "model.layers.13.block_sparse_moe.experts.115.w2", "model.layers.13.block_sparse_moe.experts.116.w2", "model.layers.13.block_sparse_moe.experts.117.w2", "model.layers.13.block_sparse_moe.experts.118.w2", "model.layers.13.block_sparse_moe.experts.119.w2", "model.layers.13.block_sparse_moe.experts.120.w2", "model.layers.13.block_sparse_moe.experts.121.w2", "model.layers.13.block_sparse_moe.experts.122.w2", "model.layers.13.block_sparse_moe.experts.123.w2", "model.layers.13.block_sparse_moe.experts.124.w2", "model.layers.13.block_sparse_moe.experts.125.w2", "model.layers.13.block_sparse_moe.experts.126.w2", "model.layers.13.block_sparse_moe.experts.127.w2", "model.layers.13.block_sparse_moe.experts.128.w2", "model.layers.13.block_sparse_moe.experts.129.w2", "model.layers.13.block_sparse_moe.experts.130.w2", "model.layers.13.block_sparse_moe.experts.131.w2", "model.layers.13.block_sparse_moe.experts.132.w2", "model.layers.13.block_sparse_moe.experts.133.w2", "model.layers.13.block_sparse_moe.experts.134.w2", "model.layers.13.block_sparse_moe.experts.135.w2", "model.layers.13.block_sparse_moe.experts.136.w2", "model.layers.13.block_sparse_moe.experts.137.w2", "model.layers.13.block_sparse_moe.experts.138.w2", "model.layers.13.block_sparse_moe.experts.139.w2", "model.layers.13.block_sparse_moe.experts.140.w2", "model.layers.13.block_sparse_moe.experts.141.w2", "model.layers.13.block_sparse_moe.experts.142.w2", "model.layers.13.block_sparse_moe.experts.143.w2", "model.layers.13.block_sparse_moe.experts.144.w2", "model.layers.13.block_sparse_moe.experts.145.w2", "model.layers.13.block_sparse_moe.experts.146.w2", "model.layers.13.block_sparse_moe.experts.147.w2", "model.layers.13.block_sparse_moe.experts.148.w2", "model.layers.13.block_sparse_moe.experts.149.w2", "model.layers.13.block_sparse_moe.experts.150.w2", "model.layers.13.block_sparse_moe.experts.151.w2", "model.layers.13.block_sparse_moe.experts.152.w2", "model.layers.13.block_sparse_moe.experts.153.w2", "model.layers.13.block_sparse_moe.experts.154.w2", "model.layers.13.block_sparse_moe.experts.155.w2", "model.layers.13.block_sparse_moe.experts.156.w2", "model.layers.13.block_sparse_moe.experts.157.w2", "model.layers.13.block_sparse_moe.experts.158.w2", "model.layers.13.block_sparse_moe.experts.159.w2", "model.layers.13.block_sparse_moe.experts.160.w2", "model.layers.13.block_sparse_moe.experts.161.w2", "model.layers.13.block_sparse_moe.experts.162.w2", "model.layers.13.block_sparse_moe.experts.163.w2", "model.layers.13.block_sparse_moe.experts.164.w2", "model.layers.13.block_sparse_moe.experts.165.w2", "model.layers.13.block_sparse_moe.experts.166.w2", "model.layers.13.block_sparse_moe.experts.167.w2", "model.layers.13.block_sparse_moe.experts.168.w2", "model.layers.13.block_sparse_moe.experts.169.w2", "model.layers.13.block_sparse_moe.experts.170.w2", "model.layers.13.block_sparse_moe.experts.171.w2", "model.layers.13.block_sparse_moe.experts.172.w2", "model.layers.13.block_sparse_moe.experts.173.w2", "model.layers.13.block_sparse_moe.experts.174.w2", "model.layers.13.block_sparse_moe.experts.175.w2", "model.layers.13.block_sparse_moe.experts.176.w2", "model.layers.13.block_sparse_moe.experts.177.w2", "model.layers.13.block_sparse_moe.experts.178.w2", "model.layers.13.block_sparse_moe.experts.179.w2", "model.layers.13.block_sparse_moe.experts.180.w2", "model.layers.13.block_sparse_moe.experts.181.w2", "model.layers.13.block_sparse_moe.experts.182.w2", "model.layers.13.block_sparse_moe.experts.183.w2", "model.layers.13.block_sparse_moe.experts.184.w2", "model.layers.13.block_sparse_moe.experts.185.w2", "model.layers.13.block_sparse_moe.experts.186.w2", "model.layers.13.block_sparse_moe.experts.187.w2", "model.layers.13.block_sparse_moe.experts.188.w2", "model.layers.13.block_sparse_moe.experts.189.w2", "model.layers.13.block_sparse_moe.experts.190.w2", "model.layers.13.block_sparse_moe.experts.191.w2", "model.layers.13.block_sparse_moe.experts.192.w2", "model.layers.13.block_sparse_moe.experts.193.w2", "model.layers.13.block_sparse_moe.experts.194.w2", "model.layers.13.block_sparse_moe.experts.195.w2", "model.layers.13.block_sparse_moe.experts.196.w2", "model.layers.13.block_sparse_moe.experts.197.w2", "model.layers.13.block_sparse_moe.experts.198.w2", "model.layers.13.block_sparse_moe.experts.199.w2", "model.layers.13.block_sparse_moe.experts.200.w2", "model.layers.13.block_sparse_moe.experts.201.w2", "model.layers.13.block_sparse_moe.experts.202.w2", "model.layers.13.block_sparse_moe.experts.203.w2", "model.layers.13.block_sparse_moe.experts.204.w2", "model.layers.13.block_sparse_moe.experts.205.w2", "model.layers.13.block_sparse_moe.experts.206.w2", "model.layers.13.block_sparse_moe.experts.207.w2", "model.layers.13.block_sparse_moe.experts.208.w2", "model.layers.13.block_sparse_moe.experts.209.w2", "model.layers.13.block_sparse_moe.experts.210.w2", "model.layers.13.block_sparse_moe.experts.211.w2", "model.layers.13.block_sparse_moe.experts.212.w2", "model.layers.13.block_sparse_moe.experts.213.w2", "model.layers.13.block_sparse_moe.experts.214.w2", "model.layers.13.block_sparse_moe.experts.215.w2", "model.layers.13.block_sparse_moe.experts.216.w2", "model.layers.13.block_sparse_moe.experts.217.w2", "model.layers.13.block_sparse_moe.experts.218.w2", "model.layers.13.block_sparse_moe.experts.219.w2", "model.layers.13.block_sparse_moe.experts.220.w2", "model.layers.13.block_sparse_moe.experts.221.w2", "model.layers.13.block_sparse_moe.experts.222.w2", "model.layers.13.block_sparse_moe.experts.223.w2", "model.layers.13.block_sparse_moe.experts.224.w2", "model.layers.13.block_sparse_moe.experts.225.w2", "model.layers.13.block_sparse_moe.experts.226.w2", "model.layers.13.block_sparse_moe.experts.227.w2", "model.layers.13.block_sparse_moe.experts.228.w2", "model.layers.13.block_sparse_moe.experts.229.w2", "model.layers.13.block_sparse_moe.experts.230.w2", "model.layers.13.block_sparse_moe.experts.231.w2", "model.layers.13.block_sparse_moe.experts.232.w2", "model.layers.13.block_sparse_moe.experts.233.w2", "model.layers.13.block_sparse_moe.experts.234.w2", "model.layers.13.block_sparse_moe.experts.235.w2", "model.layers.13.block_sparse_moe.experts.236.w2", "model.layers.13.block_sparse_moe.experts.237.w2", "model.layers.13.block_sparse_moe.experts.238.w2", "model.layers.13.block_sparse_moe.experts.239.w2", "model.layers.13.block_sparse_moe.experts.240.w2", "model.layers.13.block_sparse_moe.experts.241.w2", "model.layers.13.block_sparse_moe.experts.242.w2", "model.layers.13.block_sparse_moe.experts.243.w2", "model.layers.13.block_sparse_moe.experts.244.w2", "model.layers.13.block_sparse_moe.experts.245.w2", "model.layers.13.block_sparse_moe.experts.246.w2", "model.layers.13.block_sparse_moe.experts.247.w2", "model.layers.13.block_sparse_moe.experts.248.w2", "model.layers.13.block_sparse_moe.experts.249.w2", "model.layers.13.block_sparse_moe.experts.250.w2", "model.layers.13.block_sparse_moe.experts.251.w2", "model.layers.13.block_sparse_moe.experts.252.w2", "model.layers.13.block_sparse_moe.experts.253.w2", "model.layers.13.block_sparse_moe.experts.254.w2", "model.layers.13.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00033683739602563545, "dbits": 1207959552 } ] }, { "idx": 70, "layers": [ "model.layers.14.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0009182941168546788, "dbits": 18874368 } ] }, { "idx": 71, "layers": [ "model.layers.14.self_attn.k_proj", "model.layers.14.self_attn.v_proj" ], "candidates": [ { "dkld": -0.003426298499107361, "dbits": 6291456 } ] }, { "idx": 72, "layers": [ "model.layers.14.self_attn.o_proj" ], "candidates": [ { "dkld": -0.005846928432583831, "dbits": 18874368 } ] }, { "idx": 73, "layers": [ "model.layers.14.block_sparse_moe.experts.0.w1", "model.layers.14.block_sparse_moe.experts.1.w1", "model.layers.14.block_sparse_moe.experts.2.w1", "model.layers.14.block_sparse_moe.experts.3.w1", "model.layers.14.block_sparse_moe.experts.4.w1", "model.layers.14.block_sparse_moe.experts.5.w1", "model.layers.14.block_sparse_moe.experts.6.w1", "model.layers.14.block_sparse_moe.experts.7.w1", "model.layers.14.block_sparse_moe.experts.8.w1", "model.layers.14.block_sparse_moe.experts.9.w1", "model.layers.14.block_sparse_moe.experts.10.w1", "model.layers.14.block_sparse_moe.experts.11.w1", "model.layers.14.block_sparse_moe.experts.12.w1", "model.layers.14.block_sparse_moe.experts.13.w1", "model.layers.14.block_sparse_moe.experts.14.w1", "model.layers.14.block_sparse_moe.experts.15.w1", "model.layers.14.block_sparse_moe.experts.16.w1", "model.layers.14.block_sparse_moe.experts.17.w1", "model.layers.14.block_sparse_moe.experts.18.w1", "model.layers.14.block_sparse_moe.experts.19.w1", "model.layers.14.block_sparse_moe.experts.20.w1", "model.layers.14.block_sparse_moe.experts.21.w1", "model.layers.14.block_sparse_moe.experts.22.w1", "model.layers.14.block_sparse_moe.experts.23.w1", "model.layers.14.block_sparse_moe.experts.24.w1", "model.layers.14.block_sparse_moe.experts.25.w1", "model.layers.14.block_sparse_moe.experts.26.w1", "model.layers.14.block_sparse_moe.experts.27.w1", "model.layers.14.block_sparse_moe.experts.28.w1", "model.layers.14.block_sparse_moe.experts.29.w1", "model.layers.14.block_sparse_moe.experts.30.w1", "model.layers.14.block_sparse_moe.experts.31.w1", "model.layers.14.block_sparse_moe.experts.32.w1", "model.layers.14.block_sparse_moe.experts.33.w1", "model.layers.14.block_sparse_moe.experts.34.w1", "model.layers.14.block_sparse_moe.experts.35.w1", "model.layers.14.block_sparse_moe.experts.36.w1", "model.layers.14.block_sparse_moe.experts.37.w1", "model.layers.14.block_sparse_moe.experts.38.w1", "model.layers.14.block_sparse_moe.experts.39.w1", "model.layers.14.block_sparse_moe.experts.40.w1", "model.layers.14.block_sparse_moe.experts.41.w1", "model.layers.14.block_sparse_moe.experts.42.w1", "model.layers.14.block_sparse_moe.experts.43.w1", "model.layers.14.block_sparse_moe.experts.44.w1", "model.layers.14.block_sparse_moe.experts.45.w1", "model.layers.14.block_sparse_moe.experts.46.w1", "model.layers.14.block_sparse_moe.experts.47.w1", "model.layers.14.block_sparse_moe.experts.48.w1", "model.layers.14.block_sparse_moe.experts.49.w1", "model.layers.14.block_sparse_moe.experts.50.w1", "model.layers.14.block_sparse_moe.experts.51.w1", "model.layers.14.block_sparse_moe.experts.52.w1", "model.layers.14.block_sparse_moe.experts.53.w1", "model.layers.14.block_sparse_moe.experts.54.w1", "model.layers.14.block_sparse_moe.experts.55.w1", "model.layers.14.block_sparse_moe.experts.56.w1", "model.layers.14.block_sparse_moe.experts.57.w1", "model.layers.14.block_sparse_moe.experts.58.w1", "model.layers.14.block_sparse_moe.experts.59.w1", "model.layers.14.block_sparse_moe.experts.60.w1", "model.layers.14.block_sparse_moe.experts.61.w1", "model.layers.14.block_sparse_moe.experts.62.w1", "model.layers.14.block_sparse_moe.experts.63.w1", "model.layers.14.block_sparse_moe.experts.64.w1", "model.layers.14.block_sparse_moe.experts.65.w1", "model.layers.14.block_sparse_moe.experts.66.w1", "model.layers.14.block_sparse_moe.experts.67.w1", "model.layers.14.block_sparse_moe.experts.68.w1", "model.layers.14.block_sparse_moe.experts.69.w1", "model.layers.14.block_sparse_moe.experts.70.w1", "model.layers.14.block_sparse_moe.experts.71.w1", "model.layers.14.block_sparse_moe.experts.72.w1", "model.layers.14.block_sparse_moe.experts.73.w1", "model.layers.14.block_sparse_moe.experts.74.w1", "model.layers.14.block_sparse_moe.experts.75.w1", "model.layers.14.block_sparse_moe.experts.76.w1", "model.layers.14.block_sparse_moe.experts.77.w1", "model.layers.14.block_sparse_moe.experts.78.w1", "model.layers.14.block_sparse_moe.experts.79.w1", "model.layers.14.block_sparse_moe.experts.80.w1", "model.layers.14.block_sparse_moe.experts.81.w1", "model.layers.14.block_sparse_moe.experts.82.w1", "model.layers.14.block_sparse_moe.experts.83.w1", "model.layers.14.block_sparse_moe.experts.84.w1", "model.layers.14.block_sparse_moe.experts.85.w1", "model.layers.14.block_sparse_moe.experts.86.w1", "model.layers.14.block_sparse_moe.experts.87.w1", "model.layers.14.block_sparse_moe.experts.88.w1", "model.layers.14.block_sparse_moe.experts.89.w1", "model.layers.14.block_sparse_moe.experts.90.w1", "model.layers.14.block_sparse_moe.experts.91.w1", "model.layers.14.block_sparse_moe.experts.92.w1", "model.layers.14.block_sparse_moe.experts.93.w1", "model.layers.14.block_sparse_moe.experts.94.w1", "model.layers.14.block_sparse_moe.experts.95.w1", "model.layers.14.block_sparse_moe.experts.96.w1", "model.layers.14.block_sparse_moe.experts.97.w1", "model.layers.14.block_sparse_moe.experts.98.w1", "model.layers.14.block_sparse_moe.experts.99.w1", "model.layers.14.block_sparse_moe.experts.100.w1", "model.layers.14.block_sparse_moe.experts.101.w1", "model.layers.14.block_sparse_moe.experts.102.w1", "model.layers.14.block_sparse_moe.experts.103.w1", "model.layers.14.block_sparse_moe.experts.104.w1", "model.layers.14.block_sparse_moe.experts.105.w1", "model.layers.14.block_sparse_moe.experts.106.w1", "model.layers.14.block_sparse_moe.experts.107.w1", "model.layers.14.block_sparse_moe.experts.108.w1", "model.layers.14.block_sparse_moe.experts.109.w1", "model.layers.14.block_sparse_moe.experts.110.w1", "model.layers.14.block_sparse_moe.experts.111.w1", "model.layers.14.block_sparse_moe.experts.112.w1", "model.layers.14.block_sparse_moe.experts.113.w1", "model.layers.14.block_sparse_moe.experts.114.w1", "model.layers.14.block_sparse_moe.experts.115.w1", "model.layers.14.block_sparse_moe.experts.116.w1", "model.layers.14.block_sparse_moe.experts.117.w1", "model.layers.14.block_sparse_moe.experts.118.w1", "model.layers.14.block_sparse_moe.experts.119.w1", "model.layers.14.block_sparse_moe.experts.120.w1", "model.layers.14.block_sparse_moe.experts.121.w1", "model.layers.14.block_sparse_moe.experts.122.w1", "model.layers.14.block_sparse_moe.experts.123.w1", "model.layers.14.block_sparse_moe.experts.124.w1", "model.layers.14.block_sparse_moe.experts.125.w1", "model.layers.14.block_sparse_moe.experts.126.w1", "model.layers.14.block_sparse_moe.experts.127.w1", "model.layers.14.block_sparse_moe.experts.128.w1", "model.layers.14.block_sparse_moe.experts.129.w1", "model.layers.14.block_sparse_moe.experts.130.w1", "model.layers.14.block_sparse_moe.experts.131.w1", "model.layers.14.block_sparse_moe.experts.132.w1", "model.layers.14.block_sparse_moe.experts.133.w1", "model.layers.14.block_sparse_moe.experts.134.w1", "model.layers.14.block_sparse_moe.experts.135.w1", "model.layers.14.block_sparse_moe.experts.136.w1", "model.layers.14.block_sparse_moe.experts.137.w1", "model.layers.14.block_sparse_moe.experts.138.w1", "model.layers.14.block_sparse_moe.experts.139.w1", "model.layers.14.block_sparse_moe.experts.140.w1", "model.layers.14.block_sparse_moe.experts.141.w1", "model.layers.14.block_sparse_moe.experts.142.w1", "model.layers.14.block_sparse_moe.experts.143.w1", "model.layers.14.block_sparse_moe.experts.144.w1", "model.layers.14.block_sparse_moe.experts.145.w1", "model.layers.14.block_sparse_moe.experts.146.w1", "model.layers.14.block_sparse_moe.experts.147.w1", "model.layers.14.block_sparse_moe.experts.148.w1", "model.layers.14.block_sparse_moe.experts.149.w1", "model.layers.14.block_sparse_moe.experts.150.w1", "model.layers.14.block_sparse_moe.experts.151.w1", "model.layers.14.block_sparse_moe.experts.152.w1", "model.layers.14.block_sparse_moe.experts.153.w1", "model.layers.14.block_sparse_moe.experts.154.w1", "model.layers.14.block_sparse_moe.experts.155.w1", "model.layers.14.block_sparse_moe.experts.156.w1", "model.layers.14.block_sparse_moe.experts.157.w1", "model.layers.14.block_sparse_moe.experts.158.w1", "model.layers.14.block_sparse_moe.experts.159.w1", "model.layers.14.block_sparse_moe.experts.160.w1", "model.layers.14.block_sparse_moe.experts.161.w1", "model.layers.14.block_sparse_moe.experts.162.w1", "model.layers.14.block_sparse_moe.experts.163.w1", "model.layers.14.block_sparse_moe.experts.164.w1", "model.layers.14.block_sparse_moe.experts.165.w1", "model.layers.14.block_sparse_moe.experts.166.w1", "model.layers.14.block_sparse_moe.experts.167.w1", "model.layers.14.block_sparse_moe.experts.168.w1", "model.layers.14.block_sparse_moe.experts.169.w1", "model.layers.14.block_sparse_moe.experts.170.w1", "model.layers.14.block_sparse_moe.experts.171.w1", "model.layers.14.block_sparse_moe.experts.172.w1", "model.layers.14.block_sparse_moe.experts.173.w1", "model.layers.14.block_sparse_moe.experts.174.w1", "model.layers.14.block_sparse_moe.experts.175.w1", "model.layers.14.block_sparse_moe.experts.176.w1", "model.layers.14.block_sparse_moe.experts.177.w1", "model.layers.14.block_sparse_moe.experts.178.w1", "model.layers.14.block_sparse_moe.experts.179.w1", "model.layers.14.block_sparse_moe.experts.180.w1", "model.layers.14.block_sparse_moe.experts.181.w1", "model.layers.14.block_sparse_moe.experts.182.w1", "model.layers.14.block_sparse_moe.experts.183.w1", "model.layers.14.block_sparse_moe.experts.184.w1", "model.layers.14.block_sparse_moe.experts.185.w1", "model.layers.14.block_sparse_moe.experts.186.w1", "model.layers.14.block_sparse_moe.experts.187.w1", "model.layers.14.block_sparse_moe.experts.188.w1", "model.layers.14.block_sparse_moe.experts.189.w1", "model.layers.14.block_sparse_moe.experts.190.w1", "model.layers.14.block_sparse_moe.experts.191.w1", "model.layers.14.block_sparse_moe.experts.192.w1", "model.layers.14.block_sparse_moe.experts.193.w1", "model.layers.14.block_sparse_moe.experts.194.w1", "model.layers.14.block_sparse_moe.experts.195.w1", "model.layers.14.block_sparse_moe.experts.196.w1", "model.layers.14.block_sparse_moe.experts.197.w1", "model.layers.14.block_sparse_moe.experts.198.w1", "model.layers.14.block_sparse_moe.experts.199.w1", "model.layers.14.block_sparse_moe.experts.200.w1", "model.layers.14.block_sparse_moe.experts.201.w1", "model.layers.14.block_sparse_moe.experts.202.w1", "model.layers.14.block_sparse_moe.experts.203.w1", "model.layers.14.block_sparse_moe.experts.204.w1", "model.layers.14.block_sparse_moe.experts.205.w1", "model.layers.14.block_sparse_moe.experts.206.w1", "model.layers.14.block_sparse_moe.experts.207.w1", "model.layers.14.block_sparse_moe.experts.208.w1", "model.layers.14.block_sparse_moe.experts.209.w1", "model.layers.14.block_sparse_moe.experts.210.w1", "model.layers.14.block_sparse_moe.experts.211.w1", "model.layers.14.block_sparse_moe.experts.212.w1", "model.layers.14.block_sparse_moe.experts.213.w1", "model.layers.14.block_sparse_moe.experts.214.w1", "model.layers.14.block_sparse_moe.experts.215.w1", "model.layers.14.block_sparse_moe.experts.216.w1", "model.layers.14.block_sparse_moe.experts.217.w1", "model.layers.14.block_sparse_moe.experts.218.w1", "model.layers.14.block_sparse_moe.experts.219.w1", "model.layers.14.block_sparse_moe.experts.220.w1", "model.layers.14.block_sparse_moe.experts.221.w1", "model.layers.14.block_sparse_moe.experts.222.w1", "model.layers.14.block_sparse_moe.experts.223.w1", "model.layers.14.block_sparse_moe.experts.224.w1", "model.layers.14.block_sparse_moe.experts.225.w1", "model.layers.14.block_sparse_moe.experts.226.w1", "model.layers.14.block_sparse_moe.experts.227.w1", "model.layers.14.block_sparse_moe.experts.228.w1", "model.layers.14.block_sparse_moe.experts.229.w1", "model.layers.14.block_sparse_moe.experts.230.w1", "model.layers.14.block_sparse_moe.experts.231.w1", "model.layers.14.block_sparse_moe.experts.232.w1", "model.layers.14.block_sparse_moe.experts.233.w1", "model.layers.14.block_sparse_moe.experts.234.w1", "model.layers.14.block_sparse_moe.experts.235.w1", "model.layers.14.block_sparse_moe.experts.236.w1", "model.layers.14.block_sparse_moe.experts.237.w1", "model.layers.14.block_sparse_moe.experts.238.w1", "model.layers.14.block_sparse_moe.experts.239.w1", "model.layers.14.block_sparse_moe.experts.240.w1", "model.layers.14.block_sparse_moe.experts.241.w1", "model.layers.14.block_sparse_moe.experts.242.w1", "model.layers.14.block_sparse_moe.experts.243.w1", "model.layers.14.block_sparse_moe.experts.244.w1", "model.layers.14.block_sparse_moe.experts.245.w1", "model.layers.14.block_sparse_moe.experts.246.w1", "model.layers.14.block_sparse_moe.experts.247.w1", "model.layers.14.block_sparse_moe.experts.248.w1", "model.layers.14.block_sparse_moe.experts.249.w1", "model.layers.14.block_sparse_moe.experts.250.w1", "model.layers.14.block_sparse_moe.experts.251.w1", "model.layers.14.block_sparse_moe.experts.252.w1", "model.layers.14.block_sparse_moe.experts.253.w1", "model.layers.14.block_sparse_moe.experts.254.w1", "model.layers.14.block_sparse_moe.experts.255.w1", "model.layers.14.block_sparse_moe.experts.0.w3", "model.layers.14.block_sparse_moe.experts.1.w3", "model.layers.14.block_sparse_moe.experts.2.w3", "model.layers.14.block_sparse_moe.experts.3.w3", "model.layers.14.block_sparse_moe.experts.4.w3", "model.layers.14.block_sparse_moe.experts.5.w3", "model.layers.14.block_sparse_moe.experts.6.w3", "model.layers.14.block_sparse_moe.experts.7.w3", "model.layers.14.block_sparse_moe.experts.8.w3", "model.layers.14.block_sparse_moe.experts.9.w3", "model.layers.14.block_sparse_moe.experts.10.w3", "model.layers.14.block_sparse_moe.experts.11.w3", "model.layers.14.block_sparse_moe.experts.12.w3", "model.layers.14.block_sparse_moe.experts.13.w3", "model.layers.14.block_sparse_moe.experts.14.w3", "model.layers.14.block_sparse_moe.experts.15.w3", "model.layers.14.block_sparse_moe.experts.16.w3", "model.layers.14.block_sparse_moe.experts.17.w3", "model.layers.14.block_sparse_moe.experts.18.w3", "model.layers.14.block_sparse_moe.experts.19.w3", "model.layers.14.block_sparse_moe.experts.20.w3", "model.layers.14.block_sparse_moe.experts.21.w3", "model.layers.14.block_sparse_moe.experts.22.w3", "model.layers.14.block_sparse_moe.experts.23.w3", "model.layers.14.block_sparse_moe.experts.24.w3", "model.layers.14.block_sparse_moe.experts.25.w3", "model.layers.14.block_sparse_moe.experts.26.w3", "model.layers.14.block_sparse_moe.experts.27.w3", "model.layers.14.block_sparse_moe.experts.28.w3", "model.layers.14.block_sparse_moe.experts.29.w3", "model.layers.14.block_sparse_moe.experts.30.w3", "model.layers.14.block_sparse_moe.experts.31.w3", "model.layers.14.block_sparse_moe.experts.32.w3", "model.layers.14.block_sparse_moe.experts.33.w3", "model.layers.14.block_sparse_moe.experts.34.w3", "model.layers.14.block_sparse_moe.experts.35.w3", "model.layers.14.block_sparse_moe.experts.36.w3", "model.layers.14.block_sparse_moe.experts.37.w3", "model.layers.14.block_sparse_moe.experts.38.w3", "model.layers.14.block_sparse_moe.experts.39.w3", "model.layers.14.block_sparse_moe.experts.40.w3", "model.layers.14.block_sparse_moe.experts.41.w3", "model.layers.14.block_sparse_moe.experts.42.w3", "model.layers.14.block_sparse_moe.experts.43.w3", "model.layers.14.block_sparse_moe.experts.44.w3", "model.layers.14.block_sparse_moe.experts.45.w3", "model.layers.14.block_sparse_moe.experts.46.w3", "model.layers.14.block_sparse_moe.experts.47.w3", "model.layers.14.block_sparse_moe.experts.48.w3", "model.layers.14.block_sparse_moe.experts.49.w3", "model.layers.14.block_sparse_moe.experts.50.w3", "model.layers.14.block_sparse_moe.experts.51.w3", "model.layers.14.block_sparse_moe.experts.52.w3", "model.layers.14.block_sparse_moe.experts.53.w3", "model.layers.14.block_sparse_moe.experts.54.w3", "model.layers.14.block_sparse_moe.experts.55.w3", "model.layers.14.block_sparse_moe.experts.56.w3", "model.layers.14.block_sparse_moe.experts.57.w3", "model.layers.14.block_sparse_moe.experts.58.w3", "model.layers.14.block_sparse_moe.experts.59.w3", "model.layers.14.block_sparse_moe.experts.60.w3", "model.layers.14.block_sparse_moe.experts.61.w3", "model.layers.14.block_sparse_moe.experts.62.w3", "model.layers.14.block_sparse_moe.experts.63.w3", "model.layers.14.block_sparse_moe.experts.64.w3", "model.layers.14.block_sparse_moe.experts.65.w3", "model.layers.14.block_sparse_moe.experts.66.w3", "model.layers.14.block_sparse_moe.experts.67.w3", "model.layers.14.block_sparse_moe.experts.68.w3", "model.layers.14.block_sparse_moe.experts.69.w3", "model.layers.14.block_sparse_moe.experts.70.w3", "model.layers.14.block_sparse_moe.experts.71.w3", "model.layers.14.block_sparse_moe.experts.72.w3", "model.layers.14.block_sparse_moe.experts.73.w3", "model.layers.14.block_sparse_moe.experts.74.w3", "model.layers.14.block_sparse_moe.experts.75.w3", "model.layers.14.block_sparse_moe.experts.76.w3", "model.layers.14.block_sparse_moe.experts.77.w3", "model.layers.14.block_sparse_moe.experts.78.w3", "model.layers.14.block_sparse_moe.experts.79.w3", "model.layers.14.block_sparse_moe.experts.80.w3", "model.layers.14.block_sparse_moe.experts.81.w3", "model.layers.14.block_sparse_moe.experts.82.w3", "model.layers.14.block_sparse_moe.experts.83.w3", "model.layers.14.block_sparse_moe.experts.84.w3", "model.layers.14.block_sparse_moe.experts.85.w3", "model.layers.14.block_sparse_moe.experts.86.w3", "model.layers.14.block_sparse_moe.experts.87.w3", "model.layers.14.block_sparse_moe.experts.88.w3", "model.layers.14.block_sparse_moe.experts.89.w3", "model.layers.14.block_sparse_moe.experts.90.w3", "model.layers.14.block_sparse_moe.experts.91.w3", "model.layers.14.block_sparse_moe.experts.92.w3", "model.layers.14.block_sparse_moe.experts.93.w3", "model.layers.14.block_sparse_moe.experts.94.w3", "model.layers.14.block_sparse_moe.experts.95.w3", "model.layers.14.block_sparse_moe.experts.96.w3", "model.layers.14.block_sparse_moe.experts.97.w3", "model.layers.14.block_sparse_moe.experts.98.w3", "model.layers.14.block_sparse_moe.experts.99.w3", "model.layers.14.block_sparse_moe.experts.100.w3", "model.layers.14.block_sparse_moe.experts.101.w3", "model.layers.14.block_sparse_moe.experts.102.w3", "model.layers.14.block_sparse_moe.experts.103.w3", "model.layers.14.block_sparse_moe.experts.104.w3", "model.layers.14.block_sparse_moe.experts.105.w3", "model.layers.14.block_sparse_moe.experts.106.w3", "model.layers.14.block_sparse_moe.experts.107.w3", "model.layers.14.block_sparse_moe.experts.108.w3", "model.layers.14.block_sparse_moe.experts.109.w3", "model.layers.14.block_sparse_moe.experts.110.w3", "model.layers.14.block_sparse_moe.experts.111.w3", "model.layers.14.block_sparse_moe.experts.112.w3", "model.layers.14.block_sparse_moe.experts.113.w3", "model.layers.14.block_sparse_moe.experts.114.w3", "model.layers.14.block_sparse_moe.experts.115.w3", "model.layers.14.block_sparse_moe.experts.116.w3", "model.layers.14.block_sparse_moe.experts.117.w3", "model.layers.14.block_sparse_moe.experts.118.w3", "model.layers.14.block_sparse_moe.experts.119.w3", "model.layers.14.block_sparse_moe.experts.120.w3", "model.layers.14.block_sparse_moe.experts.121.w3", "model.layers.14.block_sparse_moe.experts.122.w3", "model.layers.14.block_sparse_moe.experts.123.w3", "model.layers.14.block_sparse_moe.experts.124.w3", "model.layers.14.block_sparse_moe.experts.125.w3", "model.layers.14.block_sparse_moe.experts.126.w3", "model.layers.14.block_sparse_moe.experts.127.w3", "model.layers.14.block_sparse_moe.experts.128.w3", "model.layers.14.block_sparse_moe.experts.129.w3", "model.layers.14.block_sparse_moe.experts.130.w3", "model.layers.14.block_sparse_moe.experts.131.w3", "model.layers.14.block_sparse_moe.experts.132.w3", "model.layers.14.block_sparse_moe.experts.133.w3", "model.layers.14.block_sparse_moe.experts.134.w3", "model.layers.14.block_sparse_moe.experts.135.w3", "model.layers.14.block_sparse_moe.experts.136.w3", "model.layers.14.block_sparse_moe.experts.137.w3", "model.layers.14.block_sparse_moe.experts.138.w3", "model.layers.14.block_sparse_moe.experts.139.w3", "model.layers.14.block_sparse_moe.experts.140.w3", "model.layers.14.block_sparse_moe.experts.141.w3", "model.layers.14.block_sparse_moe.experts.142.w3", "model.layers.14.block_sparse_moe.experts.143.w3", "model.layers.14.block_sparse_moe.experts.144.w3", "model.layers.14.block_sparse_moe.experts.145.w3", "model.layers.14.block_sparse_moe.experts.146.w3", "model.layers.14.block_sparse_moe.experts.147.w3", "model.layers.14.block_sparse_moe.experts.148.w3", "model.layers.14.block_sparse_moe.experts.149.w3", "model.layers.14.block_sparse_moe.experts.150.w3", "model.layers.14.block_sparse_moe.experts.151.w3", "model.layers.14.block_sparse_moe.experts.152.w3", "model.layers.14.block_sparse_moe.experts.153.w3", "model.layers.14.block_sparse_moe.experts.154.w3", "model.layers.14.block_sparse_moe.experts.155.w3", "model.layers.14.block_sparse_moe.experts.156.w3", "model.layers.14.block_sparse_moe.experts.157.w3", "model.layers.14.block_sparse_moe.experts.158.w3", "model.layers.14.block_sparse_moe.experts.159.w3", "model.layers.14.block_sparse_moe.experts.160.w3", "model.layers.14.block_sparse_moe.experts.161.w3", "model.layers.14.block_sparse_moe.experts.162.w3", "model.layers.14.block_sparse_moe.experts.163.w3", "model.layers.14.block_sparse_moe.experts.164.w3", "model.layers.14.block_sparse_moe.experts.165.w3", "model.layers.14.block_sparse_moe.experts.166.w3", "model.layers.14.block_sparse_moe.experts.167.w3", "model.layers.14.block_sparse_moe.experts.168.w3", "model.layers.14.block_sparse_moe.experts.169.w3", "model.layers.14.block_sparse_moe.experts.170.w3", "model.layers.14.block_sparse_moe.experts.171.w3", "model.layers.14.block_sparse_moe.experts.172.w3", "model.layers.14.block_sparse_moe.experts.173.w3", "model.layers.14.block_sparse_moe.experts.174.w3", "model.layers.14.block_sparse_moe.experts.175.w3", "model.layers.14.block_sparse_moe.experts.176.w3", "model.layers.14.block_sparse_moe.experts.177.w3", "model.layers.14.block_sparse_moe.experts.178.w3", "model.layers.14.block_sparse_moe.experts.179.w3", "model.layers.14.block_sparse_moe.experts.180.w3", "model.layers.14.block_sparse_moe.experts.181.w3", "model.layers.14.block_sparse_moe.experts.182.w3", "model.layers.14.block_sparse_moe.experts.183.w3", "model.layers.14.block_sparse_moe.experts.184.w3", "model.layers.14.block_sparse_moe.experts.185.w3", "model.layers.14.block_sparse_moe.experts.186.w3", "model.layers.14.block_sparse_moe.experts.187.w3", "model.layers.14.block_sparse_moe.experts.188.w3", "model.layers.14.block_sparse_moe.experts.189.w3", "model.layers.14.block_sparse_moe.experts.190.w3", "model.layers.14.block_sparse_moe.experts.191.w3", "model.layers.14.block_sparse_moe.experts.192.w3", "model.layers.14.block_sparse_moe.experts.193.w3", "model.layers.14.block_sparse_moe.experts.194.w3", "model.layers.14.block_sparse_moe.experts.195.w3", "model.layers.14.block_sparse_moe.experts.196.w3", "model.layers.14.block_sparse_moe.experts.197.w3", "model.layers.14.block_sparse_moe.experts.198.w3", "model.layers.14.block_sparse_moe.experts.199.w3", "model.layers.14.block_sparse_moe.experts.200.w3", "model.layers.14.block_sparse_moe.experts.201.w3", "model.layers.14.block_sparse_moe.experts.202.w3", "model.layers.14.block_sparse_moe.experts.203.w3", "model.layers.14.block_sparse_moe.experts.204.w3", "model.layers.14.block_sparse_moe.experts.205.w3", "model.layers.14.block_sparse_moe.experts.206.w3", "model.layers.14.block_sparse_moe.experts.207.w3", "model.layers.14.block_sparse_moe.experts.208.w3", "model.layers.14.block_sparse_moe.experts.209.w3", "model.layers.14.block_sparse_moe.experts.210.w3", "model.layers.14.block_sparse_moe.experts.211.w3", "model.layers.14.block_sparse_moe.experts.212.w3", "model.layers.14.block_sparse_moe.experts.213.w3", "model.layers.14.block_sparse_moe.experts.214.w3", "model.layers.14.block_sparse_moe.experts.215.w3", "model.layers.14.block_sparse_moe.experts.216.w3", "model.layers.14.block_sparse_moe.experts.217.w3", "model.layers.14.block_sparse_moe.experts.218.w3", "model.layers.14.block_sparse_moe.experts.219.w3", "model.layers.14.block_sparse_moe.experts.220.w3", "model.layers.14.block_sparse_moe.experts.221.w3", "model.layers.14.block_sparse_moe.experts.222.w3", "model.layers.14.block_sparse_moe.experts.223.w3", "model.layers.14.block_sparse_moe.experts.224.w3", "model.layers.14.block_sparse_moe.experts.225.w3", "model.layers.14.block_sparse_moe.experts.226.w3", "model.layers.14.block_sparse_moe.experts.227.w3", "model.layers.14.block_sparse_moe.experts.228.w3", "model.layers.14.block_sparse_moe.experts.229.w3", "model.layers.14.block_sparse_moe.experts.230.w3", "model.layers.14.block_sparse_moe.experts.231.w3", "model.layers.14.block_sparse_moe.experts.232.w3", "model.layers.14.block_sparse_moe.experts.233.w3", "model.layers.14.block_sparse_moe.experts.234.w3", "model.layers.14.block_sparse_moe.experts.235.w3", "model.layers.14.block_sparse_moe.experts.236.w3", "model.layers.14.block_sparse_moe.experts.237.w3", "model.layers.14.block_sparse_moe.experts.238.w3", "model.layers.14.block_sparse_moe.experts.239.w3", "model.layers.14.block_sparse_moe.experts.240.w3", "model.layers.14.block_sparse_moe.experts.241.w3", "model.layers.14.block_sparse_moe.experts.242.w3", "model.layers.14.block_sparse_moe.experts.243.w3", "model.layers.14.block_sparse_moe.experts.244.w3", "model.layers.14.block_sparse_moe.experts.245.w3", "model.layers.14.block_sparse_moe.experts.246.w3", "model.layers.14.block_sparse_moe.experts.247.w3", "model.layers.14.block_sparse_moe.experts.248.w3", "model.layers.14.block_sparse_moe.experts.249.w3", "model.layers.14.block_sparse_moe.experts.250.w3", "model.layers.14.block_sparse_moe.experts.251.w3", "model.layers.14.block_sparse_moe.experts.252.w3", "model.layers.14.block_sparse_moe.experts.253.w3", "model.layers.14.block_sparse_moe.experts.254.w3", "model.layers.14.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0009963378310203774, "dbits": 2415919104 } ] }, { "idx": 74, "layers": [ "model.layers.14.block_sparse_moe.experts.0.w2", "model.layers.14.block_sparse_moe.experts.1.w2", "model.layers.14.block_sparse_moe.experts.2.w2", "model.layers.14.block_sparse_moe.experts.3.w2", "model.layers.14.block_sparse_moe.experts.4.w2", "model.layers.14.block_sparse_moe.experts.5.w2", "model.layers.14.block_sparse_moe.experts.6.w2", "model.layers.14.block_sparse_moe.experts.7.w2", "model.layers.14.block_sparse_moe.experts.8.w2", "model.layers.14.block_sparse_moe.experts.9.w2", "model.layers.14.block_sparse_moe.experts.10.w2", "model.layers.14.block_sparse_moe.experts.11.w2", "model.layers.14.block_sparse_moe.experts.12.w2", "model.layers.14.block_sparse_moe.experts.13.w2", "model.layers.14.block_sparse_moe.experts.14.w2", "model.layers.14.block_sparse_moe.experts.15.w2", "model.layers.14.block_sparse_moe.experts.16.w2", "model.layers.14.block_sparse_moe.experts.17.w2", "model.layers.14.block_sparse_moe.experts.18.w2", "model.layers.14.block_sparse_moe.experts.19.w2", "model.layers.14.block_sparse_moe.experts.20.w2", "model.layers.14.block_sparse_moe.experts.21.w2", "model.layers.14.block_sparse_moe.experts.22.w2", "model.layers.14.block_sparse_moe.experts.23.w2", "model.layers.14.block_sparse_moe.experts.24.w2", "model.layers.14.block_sparse_moe.experts.25.w2", "model.layers.14.block_sparse_moe.experts.26.w2", "model.layers.14.block_sparse_moe.experts.27.w2", "model.layers.14.block_sparse_moe.experts.28.w2", "model.layers.14.block_sparse_moe.experts.29.w2", "model.layers.14.block_sparse_moe.experts.30.w2", "model.layers.14.block_sparse_moe.experts.31.w2", "model.layers.14.block_sparse_moe.experts.32.w2", "model.layers.14.block_sparse_moe.experts.33.w2", "model.layers.14.block_sparse_moe.experts.34.w2", "model.layers.14.block_sparse_moe.experts.35.w2", "model.layers.14.block_sparse_moe.experts.36.w2", "model.layers.14.block_sparse_moe.experts.37.w2", "model.layers.14.block_sparse_moe.experts.38.w2", "model.layers.14.block_sparse_moe.experts.39.w2", "model.layers.14.block_sparse_moe.experts.40.w2", "model.layers.14.block_sparse_moe.experts.41.w2", "model.layers.14.block_sparse_moe.experts.42.w2", "model.layers.14.block_sparse_moe.experts.43.w2", "model.layers.14.block_sparse_moe.experts.44.w2", "model.layers.14.block_sparse_moe.experts.45.w2", "model.layers.14.block_sparse_moe.experts.46.w2", "model.layers.14.block_sparse_moe.experts.47.w2", "model.layers.14.block_sparse_moe.experts.48.w2", "model.layers.14.block_sparse_moe.experts.49.w2", "model.layers.14.block_sparse_moe.experts.50.w2", "model.layers.14.block_sparse_moe.experts.51.w2", "model.layers.14.block_sparse_moe.experts.52.w2", "model.layers.14.block_sparse_moe.experts.53.w2", "model.layers.14.block_sparse_moe.experts.54.w2", "model.layers.14.block_sparse_moe.experts.55.w2", "model.layers.14.block_sparse_moe.experts.56.w2", "model.layers.14.block_sparse_moe.experts.57.w2", "model.layers.14.block_sparse_moe.experts.58.w2", "model.layers.14.block_sparse_moe.experts.59.w2", "model.layers.14.block_sparse_moe.experts.60.w2", "model.layers.14.block_sparse_moe.experts.61.w2", "model.layers.14.block_sparse_moe.experts.62.w2", "model.layers.14.block_sparse_moe.experts.63.w2", "model.layers.14.block_sparse_moe.experts.64.w2", "model.layers.14.block_sparse_moe.experts.65.w2", "model.layers.14.block_sparse_moe.experts.66.w2", "model.layers.14.block_sparse_moe.experts.67.w2", "model.layers.14.block_sparse_moe.experts.68.w2", "model.layers.14.block_sparse_moe.experts.69.w2", "model.layers.14.block_sparse_moe.experts.70.w2", "model.layers.14.block_sparse_moe.experts.71.w2", "model.layers.14.block_sparse_moe.experts.72.w2", "model.layers.14.block_sparse_moe.experts.73.w2", "model.layers.14.block_sparse_moe.experts.74.w2", "model.layers.14.block_sparse_moe.experts.75.w2", "model.layers.14.block_sparse_moe.experts.76.w2", "model.layers.14.block_sparse_moe.experts.77.w2", "model.layers.14.block_sparse_moe.experts.78.w2", "model.layers.14.block_sparse_moe.experts.79.w2", "model.layers.14.block_sparse_moe.experts.80.w2", "model.layers.14.block_sparse_moe.experts.81.w2", "model.layers.14.block_sparse_moe.experts.82.w2", "model.layers.14.block_sparse_moe.experts.83.w2", "model.layers.14.block_sparse_moe.experts.84.w2", "model.layers.14.block_sparse_moe.experts.85.w2", "model.layers.14.block_sparse_moe.experts.86.w2", "model.layers.14.block_sparse_moe.experts.87.w2", "model.layers.14.block_sparse_moe.experts.88.w2", "model.layers.14.block_sparse_moe.experts.89.w2", "model.layers.14.block_sparse_moe.experts.90.w2", "model.layers.14.block_sparse_moe.experts.91.w2", "model.layers.14.block_sparse_moe.experts.92.w2", "model.layers.14.block_sparse_moe.experts.93.w2", "model.layers.14.block_sparse_moe.experts.94.w2", "model.layers.14.block_sparse_moe.experts.95.w2", "model.layers.14.block_sparse_moe.experts.96.w2", "model.layers.14.block_sparse_moe.experts.97.w2", "model.layers.14.block_sparse_moe.experts.98.w2", "model.layers.14.block_sparse_moe.experts.99.w2", "model.layers.14.block_sparse_moe.experts.100.w2", "model.layers.14.block_sparse_moe.experts.101.w2", "model.layers.14.block_sparse_moe.experts.102.w2", "model.layers.14.block_sparse_moe.experts.103.w2", "model.layers.14.block_sparse_moe.experts.104.w2", "model.layers.14.block_sparse_moe.experts.105.w2", "model.layers.14.block_sparse_moe.experts.106.w2", "model.layers.14.block_sparse_moe.experts.107.w2", "model.layers.14.block_sparse_moe.experts.108.w2", "model.layers.14.block_sparse_moe.experts.109.w2", "model.layers.14.block_sparse_moe.experts.110.w2", "model.layers.14.block_sparse_moe.experts.111.w2", "model.layers.14.block_sparse_moe.experts.112.w2", "model.layers.14.block_sparse_moe.experts.113.w2", "model.layers.14.block_sparse_moe.experts.114.w2", "model.layers.14.block_sparse_moe.experts.115.w2", "model.layers.14.block_sparse_moe.experts.116.w2", "model.layers.14.block_sparse_moe.experts.117.w2", "model.layers.14.block_sparse_moe.experts.118.w2", "model.layers.14.block_sparse_moe.experts.119.w2", "model.layers.14.block_sparse_moe.experts.120.w2", "model.layers.14.block_sparse_moe.experts.121.w2", "model.layers.14.block_sparse_moe.experts.122.w2", "model.layers.14.block_sparse_moe.experts.123.w2", "model.layers.14.block_sparse_moe.experts.124.w2", "model.layers.14.block_sparse_moe.experts.125.w2", "model.layers.14.block_sparse_moe.experts.126.w2", "model.layers.14.block_sparse_moe.experts.127.w2", "model.layers.14.block_sparse_moe.experts.128.w2", "model.layers.14.block_sparse_moe.experts.129.w2", "model.layers.14.block_sparse_moe.experts.130.w2", "model.layers.14.block_sparse_moe.experts.131.w2", "model.layers.14.block_sparse_moe.experts.132.w2", "model.layers.14.block_sparse_moe.experts.133.w2", "model.layers.14.block_sparse_moe.experts.134.w2", "model.layers.14.block_sparse_moe.experts.135.w2", "model.layers.14.block_sparse_moe.experts.136.w2", "model.layers.14.block_sparse_moe.experts.137.w2", "model.layers.14.block_sparse_moe.experts.138.w2", "model.layers.14.block_sparse_moe.experts.139.w2", "model.layers.14.block_sparse_moe.experts.140.w2", "model.layers.14.block_sparse_moe.experts.141.w2", "model.layers.14.block_sparse_moe.experts.142.w2", "model.layers.14.block_sparse_moe.experts.143.w2", "model.layers.14.block_sparse_moe.experts.144.w2", "model.layers.14.block_sparse_moe.experts.145.w2", "model.layers.14.block_sparse_moe.experts.146.w2", "model.layers.14.block_sparse_moe.experts.147.w2", "model.layers.14.block_sparse_moe.experts.148.w2", "model.layers.14.block_sparse_moe.experts.149.w2", "model.layers.14.block_sparse_moe.experts.150.w2", "model.layers.14.block_sparse_moe.experts.151.w2", "model.layers.14.block_sparse_moe.experts.152.w2", "model.layers.14.block_sparse_moe.experts.153.w2", "model.layers.14.block_sparse_moe.experts.154.w2", "model.layers.14.block_sparse_moe.experts.155.w2", "model.layers.14.block_sparse_moe.experts.156.w2", "model.layers.14.block_sparse_moe.experts.157.w2", "model.layers.14.block_sparse_moe.experts.158.w2", "model.layers.14.block_sparse_moe.experts.159.w2", "model.layers.14.block_sparse_moe.experts.160.w2", "model.layers.14.block_sparse_moe.experts.161.w2", "model.layers.14.block_sparse_moe.experts.162.w2", "model.layers.14.block_sparse_moe.experts.163.w2", "model.layers.14.block_sparse_moe.experts.164.w2", "model.layers.14.block_sparse_moe.experts.165.w2", "model.layers.14.block_sparse_moe.experts.166.w2", "model.layers.14.block_sparse_moe.experts.167.w2", "model.layers.14.block_sparse_moe.experts.168.w2", "model.layers.14.block_sparse_moe.experts.169.w2", "model.layers.14.block_sparse_moe.experts.170.w2", "model.layers.14.block_sparse_moe.experts.171.w2", "model.layers.14.block_sparse_moe.experts.172.w2", "model.layers.14.block_sparse_moe.experts.173.w2", "model.layers.14.block_sparse_moe.experts.174.w2", "model.layers.14.block_sparse_moe.experts.175.w2", "model.layers.14.block_sparse_moe.experts.176.w2", "model.layers.14.block_sparse_moe.experts.177.w2", "model.layers.14.block_sparse_moe.experts.178.w2", "model.layers.14.block_sparse_moe.experts.179.w2", "model.layers.14.block_sparse_moe.experts.180.w2", "model.layers.14.block_sparse_moe.experts.181.w2", "model.layers.14.block_sparse_moe.experts.182.w2", "model.layers.14.block_sparse_moe.experts.183.w2", "model.layers.14.block_sparse_moe.experts.184.w2", "model.layers.14.block_sparse_moe.experts.185.w2", "model.layers.14.block_sparse_moe.experts.186.w2", "model.layers.14.block_sparse_moe.experts.187.w2", "model.layers.14.block_sparse_moe.experts.188.w2", "model.layers.14.block_sparse_moe.experts.189.w2", "model.layers.14.block_sparse_moe.experts.190.w2", "model.layers.14.block_sparse_moe.experts.191.w2", "model.layers.14.block_sparse_moe.experts.192.w2", "model.layers.14.block_sparse_moe.experts.193.w2", "model.layers.14.block_sparse_moe.experts.194.w2", "model.layers.14.block_sparse_moe.experts.195.w2", "model.layers.14.block_sparse_moe.experts.196.w2", "model.layers.14.block_sparse_moe.experts.197.w2", "model.layers.14.block_sparse_moe.experts.198.w2", "model.layers.14.block_sparse_moe.experts.199.w2", "model.layers.14.block_sparse_moe.experts.200.w2", "model.layers.14.block_sparse_moe.experts.201.w2", "model.layers.14.block_sparse_moe.experts.202.w2", "model.layers.14.block_sparse_moe.experts.203.w2", "model.layers.14.block_sparse_moe.experts.204.w2", "model.layers.14.block_sparse_moe.experts.205.w2", "model.layers.14.block_sparse_moe.experts.206.w2", "model.layers.14.block_sparse_moe.experts.207.w2", "model.layers.14.block_sparse_moe.experts.208.w2", "model.layers.14.block_sparse_moe.experts.209.w2", "model.layers.14.block_sparse_moe.experts.210.w2", "model.layers.14.block_sparse_moe.experts.211.w2", "model.layers.14.block_sparse_moe.experts.212.w2", "model.layers.14.block_sparse_moe.experts.213.w2", "model.layers.14.block_sparse_moe.experts.214.w2", "model.layers.14.block_sparse_moe.experts.215.w2", "model.layers.14.block_sparse_moe.experts.216.w2", "model.layers.14.block_sparse_moe.experts.217.w2", "model.layers.14.block_sparse_moe.experts.218.w2", "model.layers.14.block_sparse_moe.experts.219.w2", "model.layers.14.block_sparse_moe.experts.220.w2", "model.layers.14.block_sparse_moe.experts.221.w2", "model.layers.14.block_sparse_moe.experts.222.w2", "model.layers.14.block_sparse_moe.experts.223.w2", "model.layers.14.block_sparse_moe.experts.224.w2", "model.layers.14.block_sparse_moe.experts.225.w2", "model.layers.14.block_sparse_moe.experts.226.w2", "model.layers.14.block_sparse_moe.experts.227.w2", "model.layers.14.block_sparse_moe.experts.228.w2", "model.layers.14.block_sparse_moe.experts.229.w2", "model.layers.14.block_sparse_moe.experts.230.w2", "model.layers.14.block_sparse_moe.experts.231.w2", "model.layers.14.block_sparse_moe.experts.232.w2", "model.layers.14.block_sparse_moe.experts.233.w2", "model.layers.14.block_sparse_moe.experts.234.w2", "model.layers.14.block_sparse_moe.experts.235.w2", "model.layers.14.block_sparse_moe.experts.236.w2", "model.layers.14.block_sparse_moe.experts.237.w2", "model.layers.14.block_sparse_moe.experts.238.w2", "model.layers.14.block_sparse_moe.experts.239.w2", "model.layers.14.block_sparse_moe.experts.240.w2", "model.layers.14.block_sparse_moe.experts.241.w2", "model.layers.14.block_sparse_moe.experts.242.w2", "model.layers.14.block_sparse_moe.experts.243.w2", "model.layers.14.block_sparse_moe.experts.244.w2", "model.layers.14.block_sparse_moe.experts.245.w2", "model.layers.14.block_sparse_moe.experts.246.w2", "model.layers.14.block_sparse_moe.experts.247.w2", "model.layers.14.block_sparse_moe.experts.248.w2", "model.layers.14.block_sparse_moe.experts.249.w2", "model.layers.14.block_sparse_moe.experts.250.w2", "model.layers.14.block_sparse_moe.experts.251.w2", "model.layers.14.block_sparse_moe.experts.252.w2", "model.layers.14.block_sparse_moe.experts.253.w2", "model.layers.14.block_sparse_moe.experts.254.w2", "model.layers.14.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00014575347304340847, "dbits": 1207959552 } ] }, { "idx": 75, "layers": [ "model.layers.15.self_attn.q_proj" ], "candidates": [ { "dkld": -3.3088400960001874e-05, "dbits": 18874368 } ] }, { "idx": 76, "layers": [ "model.layers.15.self_attn.k_proj", "model.layers.15.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0032187055796384922, "dbits": 6291456 } ] }, { "idx": 77, "layers": [ "model.layers.15.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0009900458157062753, "dbits": 18874368 } ] }, { "idx": 78, "layers": [ "model.layers.15.block_sparse_moe.experts.0.w1", "model.layers.15.block_sparse_moe.experts.1.w1", "model.layers.15.block_sparse_moe.experts.2.w1", "model.layers.15.block_sparse_moe.experts.3.w1", "model.layers.15.block_sparse_moe.experts.4.w1", "model.layers.15.block_sparse_moe.experts.5.w1", "model.layers.15.block_sparse_moe.experts.6.w1", "model.layers.15.block_sparse_moe.experts.7.w1", "model.layers.15.block_sparse_moe.experts.8.w1", "model.layers.15.block_sparse_moe.experts.9.w1", "model.layers.15.block_sparse_moe.experts.10.w1", "model.layers.15.block_sparse_moe.experts.11.w1", "model.layers.15.block_sparse_moe.experts.12.w1", "model.layers.15.block_sparse_moe.experts.13.w1", "model.layers.15.block_sparse_moe.experts.14.w1", "model.layers.15.block_sparse_moe.experts.15.w1", "model.layers.15.block_sparse_moe.experts.16.w1", "model.layers.15.block_sparse_moe.experts.17.w1", "model.layers.15.block_sparse_moe.experts.18.w1", "model.layers.15.block_sparse_moe.experts.19.w1", "model.layers.15.block_sparse_moe.experts.20.w1", "model.layers.15.block_sparse_moe.experts.21.w1", "model.layers.15.block_sparse_moe.experts.22.w1", "model.layers.15.block_sparse_moe.experts.23.w1", "model.layers.15.block_sparse_moe.experts.24.w1", "model.layers.15.block_sparse_moe.experts.25.w1", "model.layers.15.block_sparse_moe.experts.26.w1", "model.layers.15.block_sparse_moe.experts.27.w1", "model.layers.15.block_sparse_moe.experts.28.w1", "model.layers.15.block_sparse_moe.experts.29.w1", "model.layers.15.block_sparse_moe.experts.30.w1", "model.layers.15.block_sparse_moe.experts.31.w1", "model.layers.15.block_sparse_moe.experts.32.w1", "model.layers.15.block_sparse_moe.experts.33.w1", "model.layers.15.block_sparse_moe.experts.34.w1", "model.layers.15.block_sparse_moe.experts.35.w1", "model.layers.15.block_sparse_moe.experts.36.w1", "model.layers.15.block_sparse_moe.experts.37.w1", "model.layers.15.block_sparse_moe.experts.38.w1", "model.layers.15.block_sparse_moe.experts.39.w1", "model.layers.15.block_sparse_moe.experts.40.w1", "model.layers.15.block_sparse_moe.experts.41.w1", "model.layers.15.block_sparse_moe.experts.42.w1", "model.layers.15.block_sparse_moe.experts.43.w1", "model.layers.15.block_sparse_moe.experts.44.w1", "model.layers.15.block_sparse_moe.experts.45.w1", "model.layers.15.block_sparse_moe.experts.46.w1", "model.layers.15.block_sparse_moe.experts.47.w1", "model.layers.15.block_sparse_moe.experts.48.w1", "model.layers.15.block_sparse_moe.experts.49.w1", "model.layers.15.block_sparse_moe.experts.50.w1", "model.layers.15.block_sparse_moe.experts.51.w1", "model.layers.15.block_sparse_moe.experts.52.w1", "model.layers.15.block_sparse_moe.experts.53.w1", "model.layers.15.block_sparse_moe.experts.54.w1", "model.layers.15.block_sparse_moe.experts.55.w1", "model.layers.15.block_sparse_moe.experts.56.w1", "model.layers.15.block_sparse_moe.experts.57.w1", "model.layers.15.block_sparse_moe.experts.58.w1", "model.layers.15.block_sparse_moe.experts.59.w1", "model.layers.15.block_sparse_moe.experts.60.w1", "model.layers.15.block_sparse_moe.experts.61.w1", "model.layers.15.block_sparse_moe.experts.62.w1", "model.layers.15.block_sparse_moe.experts.63.w1", "model.layers.15.block_sparse_moe.experts.64.w1", "model.layers.15.block_sparse_moe.experts.65.w1", "model.layers.15.block_sparse_moe.experts.66.w1", "model.layers.15.block_sparse_moe.experts.67.w1", "model.layers.15.block_sparse_moe.experts.68.w1", "model.layers.15.block_sparse_moe.experts.69.w1", "model.layers.15.block_sparse_moe.experts.70.w1", "model.layers.15.block_sparse_moe.experts.71.w1", "model.layers.15.block_sparse_moe.experts.72.w1", "model.layers.15.block_sparse_moe.experts.73.w1", "model.layers.15.block_sparse_moe.experts.74.w1", "model.layers.15.block_sparse_moe.experts.75.w1", "model.layers.15.block_sparse_moe.experts.76.w1", "model.layers.15.block_sparse_moe.experts.77.w1", "model.layers.15.block_sparse_moe.experts.78.w1", "model.layers.15.block_sparse_moe.experts.79.w1", "model.layers.15.block_sparse_moe.experts.80.w1", "model.layers.15.block_sparse_moe.experts.81.w1", "model.layers.15.block_sparse_moe.experts.82.w1", "model.layers.15.block_sparse_moe.experts.83.w1", "model.layers.15.block_sparse_moe.experts.84.w1", "model.layers.15.block_sparse_moe.experts.85.w1", "model.layers.15.block_sparse_moe.experts.86.w1", "model.layers.15.block_sparse_moe.experts.87.w1", "model.layers.15.block_sparse_moe.experts.88.w1", "model.layers.15.block_sparse_moe.experts.89.w1", "model.layers.15.block_sparse_moe.experts.90.w1", "model.layers.15.block_sparse_moe.experts.91.w1", "model.layers.15.block_sparse_moe.experts.92.w1", "model.layers.15.block_sparse_moe.experts.93.w1", "model.layers.15.block_sparse_moe.experts.94.w1", "model.layers.15.block_sparse_moe.experts.95.w1", "model.layers.15.block_sparse_moe.experts.96.w1", "model.layers.15.block_sparse_moe.experts.97.w1", "model.layers.15.block_sparse_moe.experts.98.w1", "model.layers.15.block_sparse_moe.experts.99.w1", "model.layers.15.block_sparse_moe.experts.100.w1", "model.layers.15.block_sparse_moe.experts.101.w1", "model.layers.15.block_sparse_moe.experts.102.w1", "model.layers.15.block_sparse_moe.experts.103.w1", "model.layers.15.block_sparse_moe.experts.104.w1", "model.layers.15.block_sparse_moe.experts.105.w1", "model.layers.15.block_sparse_moe.experts.106.w1", "model.layers.15.block_sparse_moe.experts.107.w1", "model.layers.15.block_sparse_moe.experts.108.w1", "model.layers.15.block_sparse_moe.experts.109.w1", "model.layers.15.block_sparse_moe.experts.110.w1", "model.layers.15.block_sparse_moe.experts.111.w1", "model.layers.15.block_sparse_moe.experts.112.w1", "model.layers.15.block_sparse_moe.experts.113.w1", "model.layers.15.block_sparse_moe.experts.114.w1", "model.layers.15.block_sparse_moe.experts.115.w1", "model.layers.15.block_sparse_moe.experts.116.w1", "model.layers.15.block_sparse_moe.experts.117.w1", "model.layers.15.block_sparse_moe.experts.118.w1", "model.layers.15.block_sparse_moe.experts.119.w1", "model.layers.15.block_sparse_moe.experts.120.w1", "model.layers.15.block_sparse_moe.experts.121.w1", "model.layers.15.block_sparse_moe.experts.122.w1", "model.layers.15.block_sparse_moe.experts.123.w1", "model.layers.15.block_sparse_moe.experts.124.w1", "model.layers.15.block_sparse_moe.experts.125.w1", "model.layers.15.block_sparse_moe.experts.126.w1", "model.layers.15.block_sparse_moe.experts.127.w1", "model.layers.15.block_sparse_moe.experts.128.w1", "model.layers.15.block_sparse_moe.experts.129.w1", "model.layers.15.block_sparse_moe.experts.130.w1", "model.layers.15.block_sparse_moe.experts.131.w1", "model.layers.15.block_sparse_moe.experts.132.w1", "model.layers.15.block_sparse_moe.experts.133.w1", "model.layers.15.block_sparse_moe.experts.134.w1", "model.layers.15.block_sparse_moe.experts.135.w1", "model.layers.15.block_sparse_moe.experts.136.w1", "model.layers.15.block_sparse_moe.experts.137.w1", "model.layers.15.block_sparse_moe.experts.138.w1", "model.layers.15.block_sparse_moe.experts.139.w1", "model.layers.15.block_sparse_moe.experts.140.w1", "model.layers.15.block_sparse_moe.experts.141.w1", "model.layers.15.block_sparse_moe.experts.142.w1", "model.layers.15.block_sparse_moe.experts.143.w1", "model.layers.15.block_sparse_moe.experts.144.w1", "model.layers.15.block_sparse_moe.experts.145.w1", "model.layers.15.block_sparse_moe.experts.146.w1", "model.layers.15.block_sparse_moe.experts.147.w1", "model.layers.15.block_sparse_moe.experts.148.w1", "model.layers.15.block_sparse_moe.experts.149.w1", "model.layers.15.block_sparse_moe.experts.150.w1", "model.layers.15.block_sparse_moe.experts.151.w1", "model.layers.15.block_sparse_moe.experts.152.w1", "model.layers.15.block_sparse_moe.experts.153.w1", "model.layers.15.block_sparse_moe.experts.154.w1", "model.layers.15.block_sparse_moe.experts.155.w1", "model.layers.15.block_sparse_moe.experts.156.w1", "model.layers.15.block_sparse_moe.experts.157.w1", "model.layers.15.block_sparse_moe.experts.158.w1", "model.layers.15.block_sparse_moe.experts.159.w1", "model.layers.15.block_sparse_moe.experts.160.w1", "model.layers.15.block_sparse_moe.experts.161.w1", "model.layers.15.block_sparse_moe.experts.162.w1", "model.layers.15.block_sparse_moe.experts.163.w1", "model.layers.15.block_sparse_moe.experts.164.w1", "model.layers.15.block_sparse_moe.experts.165.w1", "model.layers.15.block_sparse_moe.experts.166.w1", "model.layers.15.block_sparse_moe.experts.167.w1", "model.layers.15.block_sparse_moe.experts.168.w1", "model.layers.15.block_sparse_moe.experts.169.w1", "model.layers.15.block_sparse_moe.experts.170.w1", "model.layers.15.block_sparse_moe.experts.171.w1", "model.layers.15.block_sparse_moe.experts.172.w1", "model.layers.15.block_sparse_moe.experts.173.w1", "model.layers.15.block_sparse_moe.experts.174.w1", "model.layers.15.block_sparse_moe.experts.175.w1", "model.layers.15.block_sparse_moe.experts.176.w1", "model.layers.15.block_sparse_moe.experts.177.w1", "model.layers.15.block_sparse_moe.experts.178.w1", "model.layers.15.block_sparse_moe.experts.179.w1", "model.layers.15.block_sparse_moe.experts.180.w1", "model.layers.15.block_sparse_moe.experts.181.w1", "model.layers.15.block_sparse_moe.experts.182.w1", "model.layers.15.block_sparse_moe.experts.183.w1", "model.layers.15.block_sparse_moe.experts.184.w1", "model.layers.15.block_sparse_moe.experts.185.w1", "model.layers.15.block_sparse_moe.experts.186.w1", "model.layers.15.block_sparse_moe.experts.187.w1", "model.layers.15.block_sparse_moe.experts.188.w1", "model.layers.15.block_sparse_moe.experts.189.w1", "model.layers.15.block_sparse_moe.experts.190.w1", "model.layers.15.block_sparse_moe.experts.191.w1", "model.layers.15.block_sparse_moe.experts.192.w1", "model.layers.15.block_sparse_moe.experts.193.w1", "model.layers.15.block_sparse_moe.experts.194.w1", "model.layers.15.block_sparse_moe.experts.195.w1", "model.layers.15.block_sparse_moe.experts.196.w1", "model.layers.15.block_sparse_moe.experts.197.w1", "model.layers.15.block_sparse_moe.experts.198.w1", "model.layers.15.block_sparse_moe.experts.199.w1", "model.layers.15.block_sparse_moe.experts.200.w1", "model.layers.15.block_sparse_moe.experts.201.w1", "model.layers.15.block_sparse_moe.experts.202.w1", "model.layers.15.block_sparse_moe.experts.203.w1", "model.layers.15.block_sparse_moe.experts.204.w1", "model.layers.15.block_sparse_moe.experts.205.w1", "model.layers.15.block_sparse_moe.experts.206.w1", "model.layers.15.block_sparse_moe.experts.207.w1", "model.layers.15.block_sparse_moe.experts.208.w1", "model.layers.15.block_sparse_moe.experts.209.w1", "model.layers.15.block_sparse_moe.experts.210.w1", "model.layers.15.block_sparse_moe.experts.211.w1", "model.layers.15.block_sparse_moe.experts.212.w1", "model.layers.15.block_sparse_moe.experts.213.w1", "model.layers.15.block_sparse_moe.experts.214.w1", "model.layers.15.block_sparse_moe.experts.215.w1", "model.layers.15.block_sparse_moe.experts.216.w1", "model.layers.15.block_sparse_moe.experts.217.w1", "model.layers.15.block_sparse_moe.experts.218.w1", "model.layers.15.block_sparse_moe.experts.219.w1", "model.layers.15.block_sparse_moe.experts.220.w1", "model.layers.15.block_sparse_moe.experts.221.w1", "model.layers.15.block_sparse_moe.experts.222.w1", "model.layers.15.block_sparse_moe.experts.223.w1", "model.layers.15.block_sparse_moe.experts.224.w1", "model.layers.15.block_sparse_moe.experts.225.w1", "model.layers.15.block_sparse_moe.experts.226.w1", "model.layers.15.block_sparse_moe.experts.227.w1", "model.layers.15.block_sparse_moe.experts.228.w1", "model.layers.15.block_sparse_moe.experts.229.w1", "model.layers.15.block_sparse_moe.experts.230.w1", "model.layers.15.block_sparse_moe.experts.231.w1", "model.layers.15.block_sparse_moe.experts.232.w1", "model.layers.15.block_sparse_moe.experts.233.w1", "model.layers.15.block_sparse_moe.experts.234.w1", "model.layers.15.block_sparse_moe.experts.235.w1", "model.layers.15.block_sparse_moe.experts.236.w1", "model.layers.15.block_sparse_moe.experts.237.w1", "model.layers.15.block_sparse_moe.experts.238.w1", "model.layers.15.block_sparse_moe.experts.239.w1", "model.layers.15.block_sparse_moe.experts.240.w1", "model.layers.15.block_sparse_moe.experts.241.w1", "model.layers.15.block_sparse_moe.experts.242.w1", "model.layers.15.block_sparse_moe.experts.243.w1", "model.layers.15.block_sparse_moe.experts.244.w1", "model.layers.15.block_sparse_moe.experts.245.w1", "model.layers.15.block_sparse_moe.experts.246.w1", "model.layers.15.block_sparse_moe.experts.247.w1", "model.layers.15.block_sparse_moe.experts.248.w1", "model.layers.15.block_sparse_moe.experts.249.w1", "model.layers.15.block_sparse_moe.experts.250.w1", "model.layers.15.block_sparse_moe.experts.251.w1", "model.layers.15.block_sparse_moe.experts.252.w1", "model.layers.15.block_sparse_moe.experts.253.w1", "model.layers.15.block_sparse_moe.experts.254.w1", "model.layers.15.block_sparse_moe.experts.255.w1", "model.layers.15.block_sparse_moe.experts.0.w3", "model.layers.15.block_sparse_moe.experts.1.w3", "model.layers.15.block_sparse_moe.experts.2.w3", "model.layers.15.block_sparse_moe.experts.3.w3", "model.layers.15.block_sparse_moe.experts.4.w3", "model.layers.15.block_sparse_moe.experts.5.w3", "model.layers.15.block_sparse_moe.experts.6.w3", "model.layers.15.block_sparse_moe.experts.7.w3", "model.layers.15.block_sparse_moe.experts.8.w3", "model.layers.15.block_sparse_moe.experts.9.w3", "model.layers.15.block_sparse_moe.experts.10.w3", "model.layers.15.block_sparse_moe.experts.11.w3", "model.layers.15.block_sparse_moe.experts.12.w3", "model.layers.15.block_sparse_moe.experts.13.w3", "model.layers.15.block_sparse_moe.experts.14.w3", "model.layers.15.block_sparse_moe.experts.15.w3", "model.layers.15.block_sparse_moe.experts.16.w3", "model.layers.15.block_sparse_moe.experts.17.w3", "model.layers.15.block_sparse_moe.experts.18.w3", "model.layers.15.block_sparse_moe.experts.19.w3", "model.layers.15.block_sparse_moe.experts.20.w3", "model.layers.15.block_sparse_moe.experts.21.w3", "model.layers.15.block_sparse_moe.experts.22.w3", "model.layers.15.block_sparse_moe.experts.23.w3", "model.layers.15.block_sparse_moe.experts.24.w3", "model.layers.15.block_sparse_moe.experts.25.w3", "model.layers.15.block_sparse_moe.experts.26.w3", "model.layers.15.block_sparse_moe.experts.27.w3", "model.layers.15.block_sparse_moe.experts.28.w3", "model.layers.15.block_sparse_moe.experts.29.w3", "model.layers.15.block_sparse_moe.experts.30.w3", "model.layers.15.block_sparse_moe.experts.31.w3", "model.layers.15.block_sparse_moe.experts.32.w3", "model.layers.15.block_sparse_moe.experts.33.w3", "model.layers.15.block_sparse_moe.experts.34.w3", "model.layers.15.block_sparse_moe.experts.35.w3", "model.layers.15.block_sparse_moe.experts.36.w3", "model.layers.15.block_sparse_moe.experts.37.w3", "model.layers.15.block_sparse_moe.experts.38.w3", "model.layers.15.block_sparse_moe.experts.39.w3", "model.layers.15.block_sparse_moe.experts.40.w3", "model.layers.15.block_sparse_moe.experts.41.w3", "model.layers.15.block_sparse_moe.experts.42.w3", "model.layers.15.block_sparse_moe.experts.43.w3", "model.layers.15.block_sparse_moe.experts.44.w3", "model.layers.15.block_sparse_moe.experts.45.w3", "model.layers.15.block_sparse_moe.experts.46.w3", "model.layers.15.block_sparse_moe.experts.47.w3", "model.layers.15.block_sparse_moe.experts.48.w3", "model.layers.15.block_sparse_moe.experts.49.w3", "model.layers.15.block_sparse_moe.experts.50.w3", "model.layers.15.block_sparse_moe.experts.51.w3", "model.layers.15.block_sparse_moe.experts.52.w3", "model.layers.15.block_sparse_moe.experts.53.w3", "model.layers.15.block_sparse_moe.experts.54.w3", "model.layers.15.block_sparse_moe.experts.55.w3", "model.layers.15.block_sparse_moe.experts.56.w3", "model.layers.15.block_sparse_moe.experts.57.w3", "model.layers.15.block_sparse_moe.experts.58.w3", "model.layers.15.block_sparse_moe.experts.59.w3", "model.layers.15.block_sparse_moe.experts.60.w3", "model.layers.15.block_sparse_moe.experts.61.w3", "model.layers.15.block_sparse_moe.experts.62.w3", "model.layers.15.block_sparse_moe.experts.63.w3", "model.layers.15.block_sparse_moe.experts.64.w3", "model.layers.15.block_sparse_moe.experts.65.w3", "model.layers.15.block_sparse_moe.experts.66.w3", "model.layers.15.block_sparse_moe.experts.67.w3", "model.layers.15.block_sparse_moe.experts.68.w3", "model.layers.15.block_sparse_moe.experts.69.w3", "model.layers.15.block_sparse_moe.experts.70.w3", "model.layers.15.block_sparse_moe.experts.71.w3", "model.layers.15.block_sparse_moe.experts.72.w3", "model.layers.15.block_sparse_moe.experts.73.w3", "model.layers.15.block_sparse_moe.experts.74.w3", "model.layers.15.block_sparse_moe.experts.75.w3", "model.layers.15.block_sparse_moe.experts.76.w3", "model.layers.15.block_sparse_moe.experts.77.w3", "model.layers.15.block_sparse_moe.experts.78.w3", "model.layers.15.block_sparse_moe.experts.79.w3", "model.layers.15.block_sparse_moe.experts.80.w3", "model.layers.15.block_sparse_moe.experts.81.w3", "model.layers.15.block_sparse_moe.experts.82.w3", "model.layers.15.block_sparse_moe.experts.83.w3", "model.layers.15.block_sparse_moe.experts.84.w3", "model.layers.15.block_sparse_moe.experts.85.w3", "model.layers.15.block_sparse_moe.experts.86.w3", "model.layers.15.block_sparse_moe.experts.87.w3", "model.layers.15.block_sparse_moe.experts.88.w3", "model.layers.15.block_sparse_moe.experts.89.w3", "model.layers.15.block_sparse_moe.experts.90.w3", "model.layers.15.block_sparse_moe.experts.91.w3", "model.layers.15.block_sparse_moe.experts.92.w3", "model.layers.15.block_sparse_moe.experts.93.w3", "model.layers.15.block_sparse_moe.experts.94.w3", "model.layers.15.block_sparse_moe.experts.95.w3", "model.layers.15.block_sparse_moe.experts.96.w3", "model.layers.15.block_sparse_moe.experts.97.w3", "model.layers.15.block_sparse_moe.experts.98.w3", "model.layers.15.block_sparse_moe.experts.99.w3", "model.layers.15.block_sparse_moe.experts.100.w3", "model.layers.15.block_sparse_moe.experts.101.w3", "model.layers.15.block_sparse_moe.experts.102.w3", "model.layers.15.block_sparse_moe.experts.103.w3", "model.layers.15.block_sparse_moe.experts.104.w3", "model.layers.15.block_sparse_moe.experts.105.w3", "model.layers.15.block_sparse_moe.experts.106.w3", "model.layers.15.block_sparse_moe.experts.107.w3", "model.layers.15.block_sparse_moe.experts.108.w3", "model.layers.15.block_sparse_moe.experts.109.w3", "model.layers.15.block_sparse_moe.experts.110.w3", "model.layers.15.block_sparse_moe.experts.111.w3", "model.layers.15.block_sparse_moe.experts.112.w3", "model.layers.15.block_sparse_moe.experts.113.w3", "model.layers.15.block_sparse_moe.experts.114.w3", "model.layers.15.block_sparse_moe.experts.115.w3", "model.layers.15.block_sparse_moe.experts.116.w3", "model.layers.15.block_sparse_moe.experts.117.w3", "model.layers.15.block_sparse_moe.experts.118.w3", "model.layers.15.block_sparse_moe.experts.119.w3", "model.layers.15.block_sparse_moe.experts.120.w3", "model.layers.15.block_sparse_moe.experts.121.w3", "model.layers.15.block_sparse_moe.experts.122.w3", "model.layers.15.block_sparse_moe.experts.123.w3", "model.layers.15.block_sparse_moe.experts.124.w3", "model.layers.15.block_sparse_moe.experts.125.w3", "model.layers.15.block_sparse_moe.experts.126.w3", "model.layers.15.block_sparse_moe.experts.127.w3", "model.layers.15.block_sparse_moe.experts.128.w3", "model.layers.15.block_sparse_moe.experts.129.w3", "model.layers.15.block_sparse_moe.experts.130.w3", "model.layers.15.block_sparse_moe.experts.131.w3", "model.layers.15.block_sparse_moe.experts.132.w3", "model.layers.15.block_sparse_moe.experts.133.w3", "model.layers.15.block_sparse_moe.experts.134.w3", "model.layers.15.block_sparse_moe.experts.135.w3", "model.layers.15.block_sparse_moe.experts.136.w3", "model.layers.15.block_sparse_moe.experts.137.w3", "model.layers.15.block_sparse_moe.experts.138.w3", "model.layers.15.block_sparse_moe.experts.139.w3", "model.layers.15.block_sparse_moe.experts.140.w3", "model.layers.15.block_sparse_moe.experts.141.w3", "model.layers.15.block_sparse_moe.experts.142.w3", "model.layers.15.block_sparse_moe.experts.143.w3", "model.layers.15.block_sparse_moe.experts.144.w3", "model.layers.15.block_sparse_moe.experts.145.w3", "model.layers.15.block_sparse_moe.experts.146.w3", "model.layers.15.block_sparse_moe.experts.147.w3", "model.layers.15.block_sparse_moe.experts.148.w3", "model.layers.15.block_sparse_moe.experts.149.w3", "model.layers.15.block_sparse_moe.experts.150.w3", "model.layers.15.block_sparse_moe.experts.151.w3", "model.layers.15.block_sparse_moe.experts.152.w3", "model.layers.15.block_sparse_moe.experts.153.w3", "model.layers.15.block_sparse_moe.experts.154.w3", "model.layers.15.block_sparse_moe.experts.155.w3", "model.layers.15.block_sparse_moe.experts.156.w3", "model.layers.15.block_sparse_moe.experts.157.w3", "model.layers.15.block_sparse_moe.experts.158.w3", "model.layers.15.block_sparse_moe.experts.159.w3", "model.layers.15.block_sparse_moe.experts.160.w3", "model.layers.15.block_sparse_moe.experts.161.w3", "model.layers.15.block_sparse_moe.experts.162.w3", "model.layers.15.block_sparse_moe.experts.163.w3", "model.layers.15.block_sparse_moe.experts.164.w3", "model.layers.15.block_sparse_moe.experts.165.w3", "model.layers.15.block_sparse_moe.experts.166.w3", "model.layers.15.block_sparse_moe.experts.167.w3", "model.layers.15.block_sparse_moe.experts.168.w3", "model.layers.15.block_sparse_moe.experts.169.w3", "model.layers.15.block_sparse_moe.experts.170.w3", "model.layers.15.block_sparse_moe.experts.171.w3", "model.layers.15.block_sparse_moe.experts.172.w3", "model.layers.15.block_sparse_moe.experts.173.w3", "model.layers.15.block_sparse_moe.experts.174.w3", "model.layers.15.block_sparse_moe.experts.175.w3", "model.layers.15.block_sparse_moe.experts.176.w3", "model.layers.15.block_sparse_moe.experts.177.w3", "model.layers.15.block_sparse_moe.experts.178.w3", "model.layers.15.block_sparse_moe.experts.179.w3", "model.layers.15.block_sparse_moe.experts.180.w3", "model.layers.15.block_sparse_moe.experts.181.w3", "model.layers.15.block_sparse_moe.experts.182.w3", "model.layers.15.block_sparse_moe.experts.183.w3", "model.layers.15.block_sparse_moe.experts.184.w3", "model.layers.15.block_sparse_moe.experts.185.w3", "model.layers.15.block_sparse_moe.experts.186.w3", "model.layers.15.block_sparse_moe.experts.187.w3", "model.layers.15.block_sparse_moe.experts.188.w3", "model.layers.15.block_sparse_moe.experts.189.w3", "model.layers.15.block_sparse_moe.experts.190.w3", "model.layers.15.block_sparse_moe.experts.191.w3", "model.layers.15.block_sparse_moe.experts.192.w3", "model.layers.15.block_sparse_moe.experts.193.w3", "model.layers.15.block_sparse_moe.experts.194.w3", "model.layers.15.block_sparse_moe.experts.195.w3", "model.layers.15.block_sparse_moe.experts.196.w3", "model.layers.15.block_sparse_moe.experts.197.w3", "model.layers.15.block_sparse_moe.experts.198.w3", "model.layers.15.block_sparse_moe.experts.199.w3", "model.layers.15.block_sparse_moe.experts.200.w3", "model.layers.15.block_sparse_moe.experts.201.w3", "model.layers.15.block_sparse_moe.experts.202.w3", "model.layers.15.block_sparse_moe.experts.203.w3", "model.layers.15.block_sparse_moe.experts.204.w3", "model.layers.15.block_sparse_moe.experts.205.w3", "model.layers.15.block_sparse_moe.experts.206.w3", "model.layers.15.block_sparse_moe.experts.207.w3", "model.layers.15.block_sparse_moe.experts.208.w3", "model.layers.15.block_sparse_moe.experts.209.w3", "model.layers.15.block_sparse_moe.experts.210.w3", "model.layers.15.block_sparse_moe.experts.211.w3", "model.layers.15.block_sparse_moe.experts.212.w3", "model.layers.15.block_sparse_moe.experts.213.w3", "model.layers.15.block_sparse_moe.experts.214.w3", "model.layers.15.block_sparse_moe.experts.215.w3", "model.layers.15.block_sparse_moe.experts.216.w3", "model.layers.15.block_sparse_moe.experts.217.w3", "model.layers.15.block_sparse_moe.experts.218.w3", "model.layers.15.block_sparse_moe.experts.219.w3", "model.layers.15.block_sparse_moe.experts.220.w3", "model.layers.15.block_sparse_moe.experts.221.w3", "model.layers.15.block_sparse_moe.experts.222.w3", "model.layers.15.block_sparse_moe.experts.223.w3", "model.layers.15.block_sparse_moe.experts.224.w3", "model.layers.15.block_sparse_moe.experts.225.w3", "model.layers.15.block_sparse_moe.experts.226.w3", "model.layers.15.block_sparse_moe.experts.227.w3", "model.layers.15.block_sparse_moe.experts.228.w3", "model.layers.15.block_sparse_moe.experts.229.w3", "model.layers.15.block_sparse_moe.experts.230.w3", "model.layers.15.block_sparse_moe.experts.231.w3", "model.layers.15.block_sparse_moe.experts.232.w3", "model.layers.15.block_sparse_moe.experts.233.w3", "model.layers.15.block_sparse_moe.experts.234.w3", "model.layers.15.block_sparse_moe.experts.235.w3", "model.layers.15.block_sparse_moe.experts.236.w3", "model.layers.15.block_sparse_moe.experts.237.w3", "model.layers.15.block_sparse_moe.experts.238.w3", "model.layers.15.block_sparse_moe.experts.239.w3", "model.layers.15.block_sparse_moe.experts.240.w3", "model.layers.15.block_sparse_moe.experts.241.w3", "model.layers.15.block_sparse_moe.experts.242.w3", "model.layers.15.block_sparse_moe.experts.243.w3", "model.layers.15.block_sparse_moe.experts.244.w3", "model.layers.15.block_sparse_moe.experts.245.w3", "model.layers.15.block_sparse_moe.experts.246.w3", "model.layers.15.block_sparse_moe.experts.247.w3", "model.layers.15.block_sparse_moe.experts.248.w3", "model.layers.15.block_sparse_moe.experts.249.w3", "model.layers.15.block_sparse_moe.experts.250.w3", "model.layers.15.block_sparse_moe.experts.251.w3", "model.layers.15.block_sparse_moe.experts.252.w3", "model.layers.15.block_sparse_moe.experts.253.w3", "model.layers.15.block_sparse_moe.experts.254.w3", "model.layers.15.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.001356002315878846, "dbits": 2415919104 } ] }, { "idx": 79, "layers": [ "model.layers.15.block_sparse_moe.experts.0.w2", "model.layers.15.block_sparse_moe.experts.1.w2", "model.layers.15.block_sparse_moe.experts.2.w2", "model.layers.15.block_sparse_moe.experts.3.w2", "model.layers.15.block_sparse_moe.experts.4.w2", "model.layers.15.block_sparse_moe.experts.5.w2", "model.layers.15.block_sparse_moe.experts.6.w2", "model.layers.15.block_sparse_moe.experts.7.w2", "model.layers.15.block_sparse_moe.experts.8.w2", "model.layers.15.block_sparse_moe.experts.9.w2", "model.layers.15.block_sparse_moe.experts.10.w2", "model.layers.15.block_sparse_moe.experts.11.w2", "model.layers.15.block_sparse_moe.experts.12.w2", "model.layers.15.block_sparse_moe.experts.13.w2", "model.layers.15.block_sparse_moe.experts.14.w2", "model.layers.15.block_sparse_moe.experts.15.w2", "model.layers.15.block_sparse_moe.experts.16.w2", "model.layers.15.block_sparse_moe.experts.17.w2", "model.layers.15.block_sparse_moe.experts.18.w2", "model.layers.15.block_sparse_moe.experts.19.w2", "model.layers.15.block_sparse_moe.experts.20.w2", "model.layers.15.block_sparse_moe.experts.21.w2", "model.layers.15.block_sparse_moe.experts.22.w2", "model.layers.15.block_sparse_moe.experts.23.w2", "model.layers.15.block_sparse_moe.experts.24.w2", "model.layers.15.block_sparse_moe.experts.25.w2", "model.layers.15.block_sparse_moe.experts.26.w2", "model.layers.15.block_sparse_moe.experts.27.w2", "model.layers.15.block_sparse_moe.experts.28.w2", "model.layers.15.block_sparse_moe.experts.29.w2", "model.layers.15.block_sparse_moe.experts.30.w2", "model.layers.15.block_sparse_moe.experts.31.w2", "model.layers.15.block_sparse_moe.experts.32.w2", "model.layers.15.block_sparse_moe.experts.33.w2", "model.layers.15.block_sparse_moe.experts.34.w2", "model.layers.15.block_sparse_moe.experts.35.w2", "model.layers.15.block_sparse_moe.experts.36.w2", "model.layers.15.block_sparse_moe.experts.37.w2", "model.layers.15.block_sparse_moe.experts.38.w2", "model.layers.15.block_sparse_moe.experts.39.w2", "model.layers.15.block_sparse_moe.experts.40.w2", "model.layers.15.block_sparse_moe.experts.41.w2", "model.layers.15.block_sparse_moe.experts.42.w2", "model.layers.15.block_sparse_moe.experts.43.w2", "model.layers.15.block_sparse_moe.experts.44.w2", "model.layers.15.block_sparse_moe.experts.45.w2", "model.layers.15.block_sparse_moe.experts.46.w2", "model.layers.15.block_sparse_moe.experts.47.w2", "model.layers.15.block_sparse_moe.experts.48.w2", "model.layers.15.block_sparse_moe.experts.49.w2", "model.layers.15.block_sparse_moe.experts.50.w2", "model.layers.15.block_sparse_moe.experts.51.w2", "model.layers.15.block_sparse_moe.experts.52.w2", "model.layers.15.block_sparse_moe.experts.53.w2", "model.layers.15.block_sparse_moe.experts.54.w2", "model.layers.15.block_sparse_moe.experts.55.w2", "model.layers.15.block_sparse_moe.experts.56.w2", "model.layers.15.block_sparse_moe.experts.57.w2", "model.layers.15.block_sparse_moe.experts.58.w2", "model.layers.15.block_sparse_moe.experts.59.w2", "model.layers.15.block_sparse_moe.experts.60.w2", "model.layers.15.block_sparse_moe.experts.61.w2", "model.layers.15.block_sparse_moe.experts.62.w2", "model.layers.15.block_sparse_moe.experts.63.w2", "model.layers.15.block_sparse_moe.experts.64.w2", "model.layers.15.block_sparse_moe.experts.65.w2", "model.layers.15.block_sparse_moe.experts.66.w2", "model.layers.15.block_sparse_moe.experts.67.w2", "model.layers.15.block_sparse_moe.experts.68.w2", "model.layers.15.block_sparse_moe.experts.69.w2", "model.layers.15.block_sparse_moe.experts.70.w2", "model.layers.15.block_sparse_moe.experts.71.w2", "model.layers.15.block_sparse_moe.experts.72.w2", "model.layers.15.block_sparse_moe.experts.73.w2", "model.layers.15.block_sparse_moe.experts.74.w2", "model.layers.15.block_sparse_moe.experts.75.w2", "model.layers.15.block_sparse_moe.experts.76.w2", "model.layers.15.block_sparse_moe.experts.77.w2", "model.layers.15.block_sparse_moe.experts.78.w2", "model.layers.15.block_sparse_moe.experts.79.w2", "model.layers.15.block_sparse_moe.experts.80.w2", "model.layers.15.block_sparse_moe.experts.81.w2", "model.layers.15.block_sparse_moe.experts.82.w2", "model.layers.15.block_sparse_moe.experts.83.w2", "model.layers.15.block_sparse_moe.experts.84.w2", "model.layers.15.block_sparse_moe.experts.85.w2", "model.layers.15.block_sparse_moe.experts.86.w2", "model.layers.15.block_sparse_moe.experts.87.w2", "model.layers.15.block_sparse_moe.experts.88.w2", "model.layers.15.block_sparse_moe.experts.89.w2", "model.layers.15.block_sparse_moe.experts.90.w2", "model.layers.15.block_sparse_moe.experts.91.w2", "model.layers.15.block_sparse_moe.experts.92.w2", "model.layers.15.block_sparse_moe.experts.93.w2", "model.layers.15.block_sparse_moe.experts.94.w2", "model.layers.15.block_sparse_moe.experts.95.w2", "model.layers.15.block_sparse_moe.experts.96.w2", "model.layers.15.block_sparse_moe.experts.97.w2", "model.layers.15.block_sparse_moe.experts.98.w2", "model.layers.15.block_sparse_moe.experts.99.w2", "model.layers.15.block_sparse_moe.experts.100.w2", "model.layers.15.block_sparse_moe.experts.101.w2", "model.layers.15.block_sparse_moe.experts.102.w2", "model.layers.15.block_sparse_moe.experts.103.w2", "model.layers.15.block_sparse_moe.experts.104.w2", "model.layers.15.block_sparse_moe.experts.105.w2", "model.layers.15.block_sparse_moe.experts.106.w2", "model.layers.15.block_sparse_moe.experts.107.w2", "model.layers.15.block_sparse_moe.experts.108.w2", "model.layers.15.block_sparse_moe.experts.109.w2", "model.layers.15.block_sparse_moe.experts.110.w2", "model.layers.15.block_sparse_moe.experts.111.w2", "model.layers.15.block_sparse_moe.experts.112.w2", "model.layers.15.block_sparse_moe.experts.113.w2", "model.layers.15.block_sparse_moe.experts.114.w2", "model.layers.15.block_sparse_moe.experts.115.w2", "model.layers.15.block_sparse_moe.experts.116.w2", "model.layers.15.block_sparse_moe.experts.117.w2", "model.layers.15.block_sparse_moe.experts.118.w2", "model.layers.15.block_sparse_moe.experts.119.w2", "model.layers.15.block_sparse_moe.experts.120.w2", "model.layers.15.block_sparse_moe.experts.121.w2", "model.layers.15.block_sparse_moe.experts.122.w2", "model.layers.15.block_sparse_moe.experts.123.w2", "model.layers.15.block_sparse_moe.experts.124.w2", "model.layers.15.block_sparse_moe.experts.125.w2", "model.layers.15.block_sparse_moe.experts.126.w2", "model.layers.15.block_sparse_moe.experts.127.w2", "model.layers.15.block_sparse_moe.experts.128.w2", "model.layers.15.block_sparse_moe.experts.129.w2", "model.layers.15.block_sparse_moe.experts.130.w2", "model.layers.15.block_sparse_moe.experts.131.w2", "model.layers.15.block_sparse_moe.experts.132.w2", "model.layers.15.block_sparse_moe.experts.133.w2", "model.layers.15.block_sparse_moe.experts.134.w2", "model.layers.15.block_sparse_moe.experts.135.w2", "model.layers.15.block_sparse_moe.experts.136.w2", "model.layers.15.block_sparse_moe.experts.137.w2", "model.layers.15.block_sparse_moe.experts.138.w2", "model.layers.15.block_sparse_moe.experts.139.w2", "model.layers.15.block_sparse_moe.experts.140.w2", "model.layers.15.block_sparse_moe.experts.141.w2", "model.layers.15.block_sparse_moe.experts.142.w2", "model.layers.15.block_sparse_moe.experts.143.w2", "model.layers.15.block_sparse_moe.experts.144.w2", "model.layers.15.block_sparse_moe.experts.145.w2", "model.layers.15.block_sparse_moe.experts.146.w2", "model.layers.15.block_sparse_moe.experts.147.w2", "model.layers.15.block_sparse_moe.experts.148.w2", "model.layers.15.block_sparse_moe.experts.149.w2", "model.layers.15.block_sparse_moe.experts.150.w2", "model.layers.15.block_sparse_moe.experts.151.w2", "model.layers.15.block_sparse_moe.experts.152.w2", "model.layers.15.block_sparse_moe.experts.153.w2", "model.layers.15.block_sparse_moe.experts.154.w2", "model.layers.15.block_sparse_moe.experts.155.w2", "model.layers.15.block_sparse_moe.experts.156.w2", "model.layers.15.block_sparse_moe.experts.157.w2", "model.layers.15.block_sparse_moe.experts.158.w2", "model.layers.15.block_sparse_moe.experts.159.w2", "model.layers.15.block_sparse_moe.experts.160.w2", "model.layers.15.block_sparse_moe.experts.161.w2", "model.layers.15.block_sparse_moe.experts.162.w2", "model.layers.15.block_sparse_moe.experts.163.w2", "model.layers.15.block_sparse_moe.experts.164.w2", "model.layers.15.block_sparse_moe.experts.165.w2", "model.layers.15.block_sparse_moe.experts.166.w2", "model.layers.15.block_sparse_moe.experts.167.w2", "model.layers.15.block_sparse_moe.experts.168.w2", "model.layers.15.block_sparse_moe.experts.169.w2", "model.layers.15.block_sparse_moe.experts.170.w2", "model.layers.15.block_sparse_moe.experts.171.w2", "model.layers.15.block_sparse_moe.experts.172.w2", "model.layers.15.block_sparse_moe.experts.173.w2", "model.layers.15.block_sparse_moe.experts.174.w2", "model.layers.15.block_sparse_moe.experts.175.w2", "model.layers.15.block_sparse_moe.experts.176.w2", "model.layers.15.block_sparse_moe.experts.177.w2", "model.layers.15.block_sparse_moe.experts.178.w2", "model.layers.15.block_sparse_moe.experts.179.w2", "model.layers.15.block_sparse_moe.experts.180.w2", "model.layers.15.block_sparse_moe.experts.181.w2", "model.layers.15.block_sparse_moe.experts.182.w2", "model.layers.15.block_sparse_moe.experts.183.w2", "model.layers.15.block_sparse_moe.experts.184.w2", "model.layers.15.block_sparse_moe.experts.185.w2", "model.layers.15.block_sparse_moe.experts.186.w2", "model.layers.15.block_sparse_moe.experts.187.w2", "model.layers.15.block_sparse_moe.experts.188.w2", "model.layers.15.block_sparse_moe.experts.189.w2", "model.layers.15.block_sparse_moe.experts.190.w2", "model.layers.15.block_sparse_moe.experts.191.w2", "model.layers.15.block_sparse_moe.experts.192.w2", "model.layers.15.block_sparse_moe.experts.193.w2", "model.layers.15.block_sparse_moe.experts.194.w2", "model.layers.15.block_sparse_moe.experts.195.w2", "model.layers.15.block_sparse_moe.experts.196.w2", "model.layers.15.block_sparse_moe.experts.197.w2", "model.layers.15.block_sparse_moe.experts.198.w2", "model.layers.15.block_sparse_moe.experts.199.w2", "model.layers.15.block_sparse_moe.experts.200.w2", "model.layers.15.block_sparse_moe.experts.201.w2", "model.layers.15.block_sparse_moe.experts.202.w2", "model.layers.15.block_sparse_moe.experts.203.w2", "model.layers.15.block_sparse_moe.experts.204.w2", "model.layers.15.block_sparse_moe.experts.205.w2", "model.layers.15.block_sparse_moe.experts.206.w2", "model.layers.15.block_sparse_moe.experts.207.w2", "model.layers.15.block_sparse_moe.experts.208.w2", "model.layers.15.block_sparse_moe.experts.209.w2", "model.layers.15.block_sparse_moe.experts.210.w2", "model.layers.15.block_sparse_moe.experts.211.w2", "model.layers.15.block_sparse_moe.experts.212.w2", "model.layers.15.block_sparse_moe.experts.213.w2", "model.layers.15.block_sparse_moe.experts.214.w2", "model.layers.15.block_sparse_moe.experts.215.w2", "model.layers.15.block_sparse_moe.experts.216.w2", "model.layers.15.block_sparse_moe.experts.217.w2", "model.layers.15.block_sparse_moe.experts.218.w2", "model.layers.15.block_sparse_moe.experts.219.w2", "model.layers.15.block_sparse_moe.experts.220.w2", "model.layers.15.block_sparse_moe.experts.221.w2", "model.layers.15.block_sparse_moe.experts.222.w2", "model.layers.15.block_sparse_moe.experts.223.w2", "model.layers.15.block_sparse_moe.experts.224.w2", "model.layers.15.block_sparse_moe.experts.225.w2", "model.layers.15.block_sparse_moe.experts.226.w2", "model.layers.15.block_sparse_moe.experts.227.w2", "model.layers.15.block_sparse_moe.experts.228.w2", "model.layers.15.block_sparse_moe.experts.229.w2", "model.layers.15.block_sparse_moe.experts.230.w2", "model.layers.15.block_sparse_moe.experts.231.w2", "model.layers.15.block_sparse_moe.experts.232.w2", "model.layers.15.block_sparse_moe.experts.233.w2", "model.layers.15.block_sparse_moe.experts.234.w2", "model.layers.15.block_sparse_moe.experts.235.w2", "model.layers.15.block_sparse_moe.experts.236.w2", "model.layers.15.block_sparse_moe.experts.237.w2", "model.layers.15.block_sparse_moe.experts.238.w2", "model.layers.15.block_sparse_moe.experts.239.w2", "model.layers.15.block_sparse_moe.experts.240.w2", "model.layers.15.block_sparse_moe.experts.241.w2", "model.layers.15.block_sparse_moe.experts.242.w2", "model.layers.15.block_sparse_moe.experts.243.w2", "model.layers.15.block_sparse_moe.experts.244.w2", "model.layers.15.block_sparse_moe.experts.245.w2", "model.layers.15.block_sparse_moe.experts.246.w2", "model.layers.15.block_sparse_moe.experts.247.w2", "model.layers.15.block_sparse_moe.experts.248.w2", "model.layers.15.block_sparse_moe.experts.249.w2", "model.layers.15.block_sparse_moe.experts.250.w2", "model.layers.15.block_sparse_moe.experts.251.w2", "model.layers.15.block_sparse_moe.experts.252.w2", "model.layers.15.block_sparse_moe.experts.253.w2", "model.layers.15.block_sparse_moe.experts.254.w2", "model.layers.15.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0007468711584806553, "dbits": 1207959552 } ] }, { "idx": 80, "layers": [ "model.layers.16.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0017368871718645429, "dbits": 18874368 } ] }, { "idx": 81, "layers": [ "model.layers.16.self_attn.k_proj", "model.layers.16.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0008571367710829003, "dbits": 6291456 } ] }, { "idx": 82, "layers": [ "model.layers.16.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0019229892641305923, "dbits": 18874368 } ] }, { "idx": 83, "layers": [ "model.layers.16.block_sparse_moe.experts.0.w1", "model.layers.16.block_sparse_moe.experts.1.w1", "model.layers.16.block_sparse_moe.experts.2.w1", "model.layers.16.block_sparse_moe.experts.3.w1", "model.layers.16.block_sparse_moe.experts.4.w1", "model.layers.16.block_sparse_moe.experts.5.w1", "model.layers.16.block_sparse_moe.experts.6.w1", "model.layers.16.block_sparse_moe.experts.7.w1", "model.layers.16.block_sparse_moe.experts.8.w1", "model.layers.16.block_sparse_moe.experts.9.w1", "model.layers.16.block_sparse_moe.experts.10.w1", "model.layers.16.block_sparse_moe.experts.11.w1", "model.layers.16.block_sparse_moe.experts.12.w1", "model.layers.16.block_sparse_moe.experts.13.w1", "model.layers.16.block_sparse_moe.experts.14.w1", "model.layers.16.block_sparse_moe.experts.15.w1", "model.layers.16.block_sparse_moe.experts.16.w1", "model.layers.16.block_sparse_moe.experts.17.w1", "model.layers.16.block_sparse_moe.experts.18.w1", "model.layers.16.block_sparse_moe.experts.19.w1", "model.layers.16.block_sparse_moe.experts.20.w1", "model.layers.16.block_sparse_moe.experts.21.w1", "model.layers.16.block_sparse_moe.experts.22.w1", "model.layers.16.block_sparse_moe.experts.23.w1", "model.layers.16.block_sparse_moe.experts.24.w1", "model.layers.16.block_sparse_moe.experts.25.w1", "model.layers.16.block_sparse_moe.experts.26.w1", "model.layers.16.block_sparse_moe.experts.27.w1", "model.layers.16.block_sparse_moe.experts.28.w1", "model.layers.16.block_sparse_moe.experts.29.w1", "model.layers.16.block_sparse_moe.experts.30.w1", "model.layers.16.block_sparse_moe.experts.31.w1", "model.layers.16.block_sparse_moe.experts.32.w1", "model.layers.16.block_sparse_moe.experts.33.w1", "model.layers.16.block_sparse_moe.experts.34.w1", "model.layers.16.block_sparse_moe.experts.35.w1", "model.layers.16.block_sparse_moe.experts.36.w1", "model.layers.16.block_sparse_moe.experts.37.w1", "model.layers.16.block_sparse_moe.experts.38.w1", "model.layers.16.block_sparse_moe.experts.39.w1", "model.layers.16.block_sparse_moe.experts.40.w1", "model.layers.16.block_sparse_moe.experts.41.w1", "model.layers.16.block_sparse_moe.experts.42.w1", "model.layers.16.block_sparse_moe.experts.43.w1", "model.layers.16.block_sparse_moe.experts.44.w1", "model.layers.16.block_sparse_moe.experts.45.w1", "model.layers.16.block_sparse_moe.experts.46.w1", "model.layers.16.block_sparse_moe.experts.47.w1", "model.layers.16.block_sparse_moe.experts.48.w1", "model.layers.16.block_sparse_moe.experts.49.w1", "model.layers.16.block_sparse_moe.experts.50.w1", "model.layers.16.block_sparse_moe.experts.51.w1", "model.layers.16.block_sparse_moe.experts.52.w1", "model.layers.16.block_sparse_moe.experts.53.w1", "model.layers.16.block_sparse_moe.experts.54.w1", "model.layers.16.block_sparse_moe.experts.55.w1", "model.layers.16.block_sparse_moe.experts.56.w1", "model.layers.16.block_sparse_moe.experts.57.w1", "model.layers.16.block_sparse_moe.experts.58.w1", "model.layers.16.block_sparse_moe.experts.59.w1", "model.layers.16.block_sparse_moe.experts.60.w1", "model.layers.16.block_sparse_moe.experts.61.w1", "model.layers.16.block_sparse_moe.experts.62.w1", "model.layers.16.block_sparse_moe.experts.63.w1", "model.layers.16.block_sparse_moe.experts.64.w1", "model.layers.16.block_sparse_moe.experts.65.w1", "model.layers.16.block_sparse_moe.experts.66.w1", "model.layers.16.block_sparse_moe.experts.67.w1", "model.layers.16.block_sparse_moe.experts.68.w1", "model.layers.16.block_sparse_moe.experts.69.w1", "model.layers.16.block_sparse_moe.experts.70.w1", "model.layers.16.block_sparse_moe.experts.71.w1", "model.layers.16.block_sparse_moe.experts.72.w1", "model.layers.16.block_sparse_moe.experts.73.w1", "model.layers.16.block_sparse_moe.experts.74.w1", "model.layers.16.block_sparse_moe.experts.75.w1", "model.layers.16.block_sparse_moe.experts.76.w1", "model.layers.16.block_sparse_moe.experts.77.w1", "model.layers.16.block_sparse_moe.experts.78.w1", "model.layers.16.block_sparse_moe.experts.79.w1", "model.layers.16.block_sparse_moe.experts.80.w1", "model.layers.16.block_sparse_moe.experts.81.w1", "model.layers.16.block_sparse_moe.experts.82.w1", "model.layers.16.block_sparse_moe.experts.83.w1", "model.layers.16.block_sparse_moe.experts.84.w1", "model.layers.16.block_sparse_moe.experts.85.w1", "model.layers.16.block_sparse_moe.experts.86.w1", "model.layers.16.block_sparse_moe.experts.87.w1", "model.layers.16.block_sparse_moe.experts.88.w1", "model.layers.16.block_sparse_moe.experts.89.w1", "model.layers.16.block_sparse_moe.experts.90.w1", "model.layers.16.block_sparse_moe.experts.91.w1", "model.layers.16.block_sparse_moe.experts.92.w1", "model.layers.16.block_sparse_moe.experts.93.w1", "model.layers.16.block_sparse_moe.experts.94.w1", "model.layers.16.block_sparse_moe.experts.95.w1", "model.layers.16.block_sparse_moe.experts.96.w1", "model.layers.16.block_sparse_moe.experts.97.w1", "model.layers.16.block_sparse_moe.experts.98.w1", "model.layers.16.block_sparse_moe.experts.99.w1", "model.layers.16.block_sparse_moe.experts.100.w1", "model.layers.16.block_sparse_moe.experts.101.w1", "model.layers.16.block_sparse_moe.experts.102.w1", "model.layers.16.block_sparse_moe.experts.103.w1", "model.layers.16.block_sparse_moe.experts.104.w1", "model.layers.16.block_sparse_moe.experts.105.w1", "model.layers.16.block_sparse_moe.experts.106.w1", "model.layers.16.block_sparse_moe.experts.107.w1", "model.layers.16.block_sparse_moe.experts.108.w1", "model.layers.16.block_sparse_moe.experts.109.w1", "model.layers.16.block_sparse_moe.experts.110.w1", "model.layers.16.block_sparse_moe.experts.111.w1", "model.layers.16.block_sparse_moe.experts.112.w1", "model.layers.16.block_sparse_moe.experts.113.w1", "model.layers.16.block_sparse_moe.experts.114.w1", "model.layers.16.block_sparse_moe.experts.115.w1", "model.layers.16.block_sparse_moe.experts.116.w1", "model.layers.16.block_sparse_moe.experts.117.w1", "model.layers.16.block_sparse_moe.experts.118.w1", "model.layers.16.block_sparse_moe.experts.119.w1", "model.layers.16.block_sparse_moe.experts.120.w1", "model.layers.16.block_sparse_moe.experts.121.w1", "model.layers.16.block_sparse_moe.experts.122.w1", "model.layers.16.block_sparse_moe.experts.123.w1", "model.layers.16.block_sparse_moe.experts.124.w1", "model.layers.16.block_sparse_moe.experts.125.w1", "model.layers.16.block_sparse_moe.experts.126.w1", "model.layers.16.block_sparse_moe.experts.127.w1", "model.layers.16.block_sparse_moe.experts.128.w1", "model.layers.16.block_sparse_moe.experts.129.w1", "model.layers.16.block_sparse_moe.experts.130.w1", "model.layers.16.block_sparse_moe.experts.131.w1", "model.layers.16.block_sparse_moe.experts.132.w1", "model.layers.16.block_sparse_moe.experts.133.w1", "model.layers.16.block_sparse_moe.experts.134.w1", "model.layers.16.block_sparse_moe.experts.135.w1", "model.layers.16.block_sparse_moe.experts.136.w1", "model.layers.16.block_sparse_moe.experts.137.w1", "model.layers.16.block_sparse_moe.experts.138.w1", "model.layers.16.block_sparse_moe.experts.139.w1", "model.layers.16.block_sparse_moe.experts.140.w1", "model.layers.16.block_sparse_moe.experts.141.w1", "model.layers.16.block_sparse_moe.experts.142.w1", "model.layers.16.block_sparse_moe.experts.143.w1", "model.layers.16.block_sparse_moe.experts.144.w1", "model.layers.16.block_sparse_moe.experts.145.w1", "model.layers.16.block_sparse_moe.experts.146.w1", "model.layers.16.block_sparse_moe.experts.147.w1", "model.layers.16.block_sparse_moe.experts.148.w1", "model.layers.16.block_sparse_moe.experts.149.w1", "model.layers.16.block_sparse_moe.experts.150.w1", "model.layers.16.block_sparse_moe.experts.151.w1", "model.layers.16.block_sparse_moe.experts.152.w1", "model.layers.16.block_sparse_moe.experts.153.w1", "model.layers.16.block_sparse_moe.experts.154.w1", "model.layers.16.block_sparse_moe.experts.155.w1", "model.layers.16.block_sparse_moe.experts.156.w1", "model.layers.16.block_sparse_moe.experts.157.w1", "model.layers.16.block_sparse_moe.experts.158.w1", "model.layers.16.block_sparse_moe.experts.159.w1", "model.layers.16.block_sparse_moe.experts.160.w1", "model.layers.16.block_sparse_moe.experts.161.w1", "model.layers.16.block_sparse_moe.experts.162.w1", "model.layers.16.block_sparse_moe.experts.163.w1", "model.layers.16.block_sparse_moe.experts.164.w1", "model.layers.16.block_sparse_moe.experts.165.w1", "model.layers.16.block_sparse_moe.experts.166.w1", "model.layers.16.block_sparse_moe.experts.167.w1", "model.layers.16.block_sparse_moe.experts.168.w1", "model.layers.16.block_sparse_moe.experts.169.w1", "model.layers.16.block_sparse_moe.experts.170.w1", "model.layers.16.block_sparse_moe.experts.171.w1", "model.layers.16.block_sparse_moe.experts.172.w1", "model.layers.16.block_sparse_moe.experts.173.w1", "model.layers.16.block_sparse_moe.experts.174.w1", "model.layers.16.block_sparse_moe.experts.175.w1", "model.layers.16.block_sparse_moe.experts.176.w1", "model.layers.16.block_sparse_moe.experts.177.w1", "model.layers.16.block_sparse_moe.experts.178.w1", "model.layers.16.block_sparse_moe.experts.179.w1", "model.layers.16.block_sparse_moe.experts.180.w1", "model.layers.16.block_sparse_moe.experts.181.w1", "model.layers.16.block_sparse_moe.experts.182.w1", "model.layers.16.block_sparse_moe.experts.183.w1", "model.layers.16.block_sparse_moe.experts.184.w1", "model.layers.16.block_sparse_moe.experts.185.w1", "model.layers.16.block_sparse_moe.experts.186.w1", "model.layers.16.block_sparse_moe.experts.187.w1", "model.layers.16.block_sparse_moe.experts.188.w1", "model.layers.16.block_sparse_moe.experts.189.w1", "model.layers.16.block_sparse_moe.experts.190.w1", "model.layers.16.block_sparse_moe.experts.191.w1", "model.layers.16.block_sparse_moe.experts.192.w1", "model.layers.16.block_sparse_moe.experts.193.w1", "model.layers.16.block_sparse_moe.experts.194.w1", "model.layers.16.block_sparse_moe.experts.195.w1", "model.layers.16.block_sparse_moe.experts.196.w1", "model.layers.16.block_sparse_moe.experts.197.w1", "model.layers.16.block_sparse_moe.experts.198.w1", "model.layers.16.block_sparse_moe.experts.199.w1", "model.layers.16.block_sparse_moe.experts.200.w1", "model.layers.16.block_sparse_moe.experts.201.w1", "model.layers.16.block_sparse_moe.experts.202.w1", "model.layers.16.block_sparse_moe.experts.203.w1", "model.layers.16.block_sparse_moe.experts.204.w1", "model.layers.16.block_sparse_moe.experts.205.w1", "model.layers.16.block_sparse_moe.experts.206.w1", "model.layers.16.block_sparse_moe.experts.207.w1", "model.layers.16.block_sparse_moe.experts.208.w1", "model.layers.16.block_sparse_moe.experts.209.w1", "model.layers.16.block_sparse_moe.experts.210.w1", "model.layers.16.block_sparse_moe.experts.211.w1", "model.layers.16.block_sparse_moe.experts.212.w1", "model.layers.16.block_sparse_moe.experts.213.w1", "model.layers.16.block_sparse_moe.experts.214.w1", "model.layers.16.block_sparse_moe.experts.215.w1", "model.layers.16.block_sparse_moe.experts.216.w1", "model.layers.16.block_sparse_moe.experts.217.w1", "model.layers.16.block_sparse_moe.experts.218.w1", "model.layers.16.block_sparse_moe.experts.219.w1", "model.layers.16.block_sparse_moe.experts.220.w1", "model.layers.16.block_sparse_moe.experts.221.w1", "model.layers.16.block_sparse_moe.experts.222.w1", "model.layers.16.block_sparse_moe.experts.223.w1", "model.layers.16.block_sparse_moe.experts.224.w1", "model.layers.16.block_sparse_moe.experts.225.w1", "model.layers.16.block_sparse_moe.experts.226.w1", "model.layers.16.block_sparse_moe.experts.227.w1", "model.layers.16.block_sparse_moe.experts.228.w1", "model.layers.16.block_sparse_moe.experts.229.w1", "model.layers.16.block_sparse_moe.experts.230.w1", "model.layers.16.block_sparse_moe.experts.231.w1", "model.layers.16.block_sparse_moe.experts.232.w1", "model.layers.16.block_sparse_moe.experts.233.w1", "model.layers.16.block_sparse_moe.experts.234.w1", "model.layers.16.block_sparse_moe.experts.235.w1", "model.layers.16.block_sparse_moe.experts.236.w1", "model.layers.16.block_sparse_moe.experts.237.w1", "model.layers.16.block_sparse_moe.experts.238.w1", "model.layers.16.block_sparse_moe.experts.239.w1", "model.layers.16.block_sparse_moe.experts.240.w1", "model.layers.16.block_sparse_moe.experts.241.w1", "model.layers.16.block_sparse_moe.experts.242.w1", "model.layers.16.block_sparse_moe.experts.243.w1", "model.layers.16.block_sparse_moe.experts.244.w1", "model.layers.16.block_sparse_moe.experts.245.w1", "model.layers.16.block_sparse_moe.experts.246.w1", "model.layers.16.block_sparse_moe.experts.247.w1", "model.layers.16.block_sparse_moe.experts.248.w1", "model.layers.16.block_sparse_moe.experts.249.w1", "model.layers.16.block_sparse_moe.experts.250.w1", "model.layers.16.block_sparse_moe.experts.251.w1", "model.layers.16.block_sparse_moe.experts.252.w1", "model.layers.16.block_sparse_moe.experts.253.w1", "model.layers.16.block_sparse_moe.experts.254.w1", "model.layers.16.block_sparse_moe.experts.255.w1", "model.layers.16.block_sparse_moe.experts.0.w3", "model.layers.16.block_sparse_moe.experts.1.w3", "model.layers.16.block_sparse_moe.experts.2.w3", "model.layers.16.block_sparse_moe.experts.3.w3", "model.layers.16.block_sparse_moe.experts.4.w3", "model.layers.16.block_sparse_moe.experts.5.w3", "model.layers.16.block_sparse_moe.experts.6.w3", "model.layers.16.block_sparse_moe.experts.7.w3", "model.layers.16.block_sparse_moe.experts.8.w3", "model.layers.16.block_sparse_moe.experts.9.w3", "model.layers.16.block_sparse_moe.experts.10.w3", "model.layers.16.block_sparse_moe.experts.11.w3", "model.layers.16.block_sparse_moe.experts.12.w3", "model.layers.16.block_sparse_moe.experts.13.w3", "model.layers.16.block_sparse_moe.experts.14.w3", "model.layers.16.block_sparse_moe.experts.15.w3", "model.layers.16.block_sparse_moe.experts.16.w3", "model.layers.16.block_sparse_moe.experts.17.w3", "model.layers.16.block_sparse_moe.experts.18.w3", "model.layers.16.block_sparse_moe.experts.19.w3", "model.layers.16.block_sparse_moe.experts.20.w3", "model.layers.16.block_sparse_moe.experts.21.w3", "model.layers.16.block_sparse_moe.experts.22.w3", "model.layers.16.block_sparse_moe.experts.23.w3", "model.layers.16.block_sparse_moe.experts.24.w3", "model.layers.16.block_sparse_moe.experts.25.w3", "model.layers.16.block_sparse_moe.experts.26.w3", "model.layers.16.block_sparse_moe.experts.27.w3", "model.layers.16.block_sparse_moe.experts.28.w3", "model.layers.16.block_sparse_moe.experts.29.w3", "model.layers.16.block_sparse_moe.experts.30.w3", "model.layers.16.block_sparse_moe.experts.31.w3", "model.layers.16.block_sparse_moe.experts.32.w3", "model.layers.16.block_sparse_moe.experts.33.w3", "model.layers.16.block_sparse_moe.experts.34.w3", "model.layers.16.block_sparse_moe.experts.35.w3", "model.layers.16.block_sparse_moe.experts.36.w3", "model.layers.16.block_sparse_moe.experts.37.w3", "model.layers.16.block_sparse_moe.experts.38.w3", "model.layers.16.block_sparse_moe.experts.39.w3", "model.layers.16.block_sparse_moe.experts.40.w3", "model.layers.16.block_sparse_moe.experts.41.w3", "model.layers.16.block_sparse_moe.experts.42.w3", "model.layers.16.block_sparse_moe.experts.43.w3", "model.layers.16.block_sparse_moe.experts.44.w3", "model.layers.16.block_sparse_moe.experts.45.w3", "model.layers.16.block_sparse_moe.experts.46.w3", "model.layers.16.block_sparse_moe.experts.47.w3", "model.layers.16.block_sparse_moe.experts.48.w3", "model.layers.16.block_sparse_moe.experts.49.w3", "model.layers.16.block_sparse_moe.experts.50.w3", "model.layers.16.block_sparse_moe.experts.51.w3", "model.layers.16.block_sparse_moe.experts.52.w3", "model.layers.16.block_sparse_moe.experts.53.w3", "model.layers.16.block_sparse_moe.experts.54.w3", "model.layers.16.block_sparse_moe.experts.55.w3", "model.layers.16.block_sparse_moe.experts.56.w3", "model.layers.16.block_sparse_moe.experts.57.w3", "model.layers.16.block_sparse_moe.experts.58.w3", "model.layers.16.block_sparse_moe.experts.59.w3", "model.layers.16.block_sparse_moe.experts.60.w3", "model.layers.16.block_sparse_moe.experts.61.w3", "model.layers.16.block_sparse_moe.experts.62.w3", "model.layers.16.block_sparse_moe.experts.63.w3", "model.layers.16.block_sparse_moe.experts.64.w3", "model.layers.16.block_sparse_moe.experts.65.w3", "model.layers.16.block_sparse_moe.experts.66.w3", "model.layers.16.block_sparse_moe.experts.67.w3", "model.layers.16.block_sparse_moe.experts.68.w3", "model.layers.16.block_sparse_moe.experts.69.w3", "model.layers.16.block_sparse_moe.experts.70.w3", "model.layers.16.block_sparse_moe.experts.71.w3", "model.layers.16.block_sparse_moe.experts.72.w3", "model.layers.16.block_sparse_moe.experts.73.w3", "model.layers.16.block_sparse_moe.experts.74.w3", "model.layers.16.block_sparse_moe.experts.75.w3", "model.layers.16.block_sparse_moe.experts.76.w3", "model.layers.16.block_sparse_moe.experts.77.w3", "model.layers.16.block_sparse_moe.experts.78.w3", "model.layers.16.block_sparse_moe.experts.79.w3", "model.layers.16.block_sparse_moe.experts.80.w3", "model.layers.16.block_sparse_moe.experts.81.w3", "model.layers.16.block_sparse_moe.experts.82.w3", "model.layers.16.block_sparse_moe.experts.83.w3", "model.layers.16.block_sparse_moe.experts.84.w3", "model.layers.16.block_sparse_moe.experts.85.w3", "model.layers.16.block_sparse_moe.experts.86.w3", "model.layers.16.block_sparse_moe.experts.87.w3", "model.layers.16.block_sparse_moe.experts.88.w3", "model.layers.16.block_sparse_moe.experts.89.w3", "model.layers.16.block_sparse_moe.experts.90.w3", "model.layers.16.block_sparse_moe.experts.91.w3", "model.layers.16.block_sparse_moe.experts.92.w3", "model.layers.16.block_sparse_moe.experts.93.w3", "model.layers.16.block_sparse_moe.experts.94.w3", "model.layers.16.block_sparse_moe.experts.95.w3", "model.layers.16.block_sparse_moe.experts.96.w3", "model.layers.16.block_sparse_moe.experts.97.w3", "model.layers.16.block_sparse_moe.experts.98.w3", "model.layers.16.block_sparse_moe.experts.99.w3", "model.layers.16.block_sparse_moe.experts.100.w3", "model.layers.16.block_sparse_moe.experts.101.w3", "model.layers.16.block_sparse_moe.experts.102.w3", "model.layers.16.block_sparse_moe.experts.103.w3", "model.layers.16.block_sparse_moe.experts.104.w3", "model.layers.16.block_sparse_moe.experts.105.w3", "model.layers.16.block_sparse_moe.experts.106.w3", "model.layers.16.block_sparse_moe.experts.107.w3", "model.layers.16.block_sparse_moe.experts.108.w3", "model.layers.16.block_sparse_moe.experts.109.w3", "model.layers.16.block_sparse_moe.experts.110.w3", "model.layers.16.block_sparse_moe.experts.111.w3", "model.layers.16.block_sparse_moe.experts.112.w3", "model.layers.16.block_sparse_moe.experts.113.w3", "model.layers.16.block_sparse_moe.experts.114.w3", "model.layers.16.block_sparse_moe.experts.115.w3", "model.layers.16.block_sparse_moe.experts.116.w3", "model.layers.16.block_sparse_moe.experts.117.w3", "model.layers.16.block_sparse_moe.experts.118.w3", "model.layers.16.block_sparse_moe.experts.119.w3", "model.layers.16.block_sparse_moe.experts.120.w3", "model.layers.16.block_sparse_moe.experts.121.w3", "model.layers.16.block_sparse_moe.experts.122.w3", "model.layers.16.block_sparse_moe.experts.123.w3", "model.layers.16.block_sparse_moe.experts.124.w3", "model.layers.16.block_sparse_moe.experts.125.w3", "model.layers.16.block_sparse_moe.experts.126.w3", "model.layers.16.block_sparse_moe.experts.127.w3", "model.layers.16.block_sparse_moe.experts.128.w3", "model.layers.16.block_sparse_moe.experts.129.w3", "model.layers.16.block_sparse_moe.experts.130.w3", "model.layers.16.block_sparse_moe.experts.131.w3", "model.layers.16.block_sparse_moe.experts.132.w3", "model.layers.16.block_sparse_moe.experts.133.w3", "model.layers.16.block_sparse_moe.experts.134.w3", "model.layers.16.block_sparse_moe.experts.135.w3", "model.layers.16.block_sparse_moe.experts.136.w3", "model.layers.16.block_sparse_moe.experts.137.w3", "model.layers.16.block_sparse_moe.experts.138.w3", "model.layers.16.block_sparse_moe.experts.139.w3", "model.layers.16.block_sparse_moe.experts.140.w3", "model.layers.16.block_sparse_moe.experts.141.w3", "model.layers.16.block_sparse_moe.experts.142.w3", "model.layers.16.block_sparse_moe.experts.143.w3", "model.layers.16.block_sparse_moe.experts.144.w3", "model.layers.16.block_sparse_moe.experts.145.w3", "model.layers.16.block_sparse_moe.experts.146.w3", "model.layers.16.block_sparse_moe.experts.147.w3", "model.layers.16.block_sparse_moe.experts.148.w3", "model.layers.16.block_sparse_moe.experts.149.w3", "model.layers.16.block_sparse_moe.experts.150.w3", "model.layers.16.block_sparse_moe.experts.151.w3", "model.layers.16.block_sparse_moe.experts.152.w3", "model.layers.16.block_sparse_moe.experts.153.w3", "model.layers.16.block_sparse_moe.experts.154.w3", "model.layers.16.block_sparse_moe.experts.155.w3", "model.layers.16.block_sparse_moe.experts.156.w3", "model.layers.16.block_sparse_moe.experts.157.w3", "model.layers.16.block_sparse_moe.experts.158.w3", "model.layers.16.block_sparse_moe.experts.159.w3", "model.layers.16.block_sparse_moe.experts.160.w3", "model.layers.16.block_sparse_moe.experts.161.w3", "model.layers.16.block_sparse_moe.experts.162.w3", "model.layers.16.block_sparse_moe.experts.163.w3", "model.layers.16.block_sparse_moe.experts.164.w3", "model.layers.16.block_sparse_moe.experts.165.w3", "model.layers.16.block_sparse_moe.experts.166.w3", "model.layers.16.block_sparse_moe.experts.167.w3", "model.layers.16.block_sparse_moe.experts.168.w3", "model.layers.16.block_sparse_moe.experts.169.w3", "model.layers.16.block_sparse_moe.experts.170.w3", "model.layers.16.block_sparse_moe.experts.171.w3", "model.layers.16.block_sparse_moe.experts.172.w3", "model.layers.16.block_sparse_moe.experts.173.w3", "model.layers.16.block_sparse_moe.experts.174.w3", "model.layers.16.block_sparse_moe.experts.175.w3", "model.layers.16.block_sparse_moe.experts.176.w3", "model.layers.16.block_sparse_moe.experts.177.w3", "model.layers.16.block_sparse_moe.experts.178.w3", "model.layers.16.block_sparse_moe.experts.179.w3", "model.layers.16.block_sparse_moe.experts.180.w3", "model.layers.16.block_sparse_moe.experts.181.w3", "model.layers.16.block_sparse_moe.experts.182.w3", "model.layers.16.block_sparse_moe.experts.183.w3", "model.layers.16.block_sparse_moe.experts.184.w3", "model.layers.16.block_sparse_moe.experts.185.w3", "model.layers.16.block_sparse_moe.experts.186.w3", "model.layers.16.block_sparse_moe.experts.187.w3", "model.layers.16.block_sparse_moe.experts.188.w3", "model.layers.16.block_sparse_moe.experts.189.w3", "model.layers.16.block_sparse_moe.experts.190.w3", "model.layers.16.block_sparse_moe.experts.191.w3", "model.layers.16.block_sparse_moe.experts.192.w3", "model.layers.16.block_sparse_moe.experts.193.w3", "model.layers.16.block_sparse_moe.experts.194.w3", "model.layers.16.block_sparse_moe.experts.195.w3", "model.layers.16.block_sparse_moe.experts.196.w3", "model.layers.16.block_sparse_moe.experts.197.w3", "model.layers.16.block_sparse_moe.experts.198.w3", "model.layers.16.block_sparse_moe.experts.199.w3", "model.layers.16.block_sparse_moe.experts.200.w3", "model.layers.16.block_sparse_moe.experts.201.w3", "model.layers.16.block_sparse_moe.experts.202.w3", "model.layers.16.block_sparse_moe.experts.203.w3", "model.layers.16.block_sparse_moe.experts.204.w3", "model.layers.16.block_sparse_moe.experts.205.w3", "model.layers.16.block_sparse_moe.experts.206.w3", "model.layers.16.block_sparse_moe.experts.207.w3", "model.layers.16.block_sparse_moe.experts.208.w3", "model.layers.16.block_sparse_moe.experts.209.w3", "model.layers.16.block_sparse_moe.experts.210.w3", "model.layers.16.block_sparse_moe.experts.211.w3", "model.layers.16.block_sparse_moe.experts.212.w3", "model.layers.16.block_sparse_moe.experts.213.w3", "model.layers.16.block_sparse_moe.experts.214.w3", "model.layers.16.block_sparse_moe.experts.215.w3", "model.layers.16.block_sparse_moe.experts.216.w3", "model.layers.16.block_sparse_moe.experts.217.w3", "model.layers.16.block_sparse_moe.experts.218.w3", "model.layers.16.block_sparse_moe.experts.219.w3", "model.layers.16.block_sparse_moe.experts.220.w3", "model.layers.16.block_sparse_moe.experts.221.w3", "model.layers.16.block_sparse_moe.experts.222.w3", "model.layers.16.block_sparse_moe.experts.223.w3", "model.layers.16.block_sparse_moe.experts.224.w3", "model.layers.16.block_sparse_moe.experts.225.w3", "model.layers.16.block_sparse_moe.experts.226.w3", "model.layers.16.block_sparse_moe.experts.227.w3", "model.layers.16.block_sparse_moe.experts.228.w3", "model.layers.16.block_sparse_moe.experts.229.w3", "model.layers.16.block_sparse_moe.experts.230.w3", "model.layers.16.block_sparse_moe.experts.231.w3", "model.layers.16.block_sparse_moe.experts.232.w3", "model.layers.16.block_sparse_moe.experts.233.w3", "model.layers.16.block_sparse_moe.experts.234.w3", "model.layers.16.block_sparse_moe.experts.235.w3", "model.layers.16.block_sparse_moe.experts.236.w3", "model.layers.16.block_sparse_moe.experts.237.w3", "model.layers.16.block_sparse_moe.experts.238.w3", "model.layers.16.block_sparse_moe.experts.239.w3", "model.layers.16.block_sparse_moe.experts.240.w3", "model.layers.16.block_sparse_moe.experts.241.w3", "model.layers.16.block_sparse_moe.experts.242.w3", "model.layers.16.block_sparse_moe.experts.243.w3", "model.layers.16.block_sparse_moe.experts.244.w3", "model.layers.16.block_sparse_moe.experts.245.w3", "model.layers.16.block_sparse_moe.experts.246.w3", "model.layers.16.block_sparse_moe.experts.247.w3", "model.layers.16.block_sparse_moe.experts.248.w3", "model.layers.16.block_sparse_moe.experts.249.w3", "model.layers.16.block_sparse_moe.experts.250.w3", "model.layers.16.block_sparse_moe.experts.251.w3", "model.layers.16.block_sparse_moe.experts.252.w3", "model.layers.16.block_sparse_moe.experts.253.w3", "model.layers.16.block_sparse_moe.experts.254.w3", "model.layers.16.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 6.283223628994472e-05, "dbits": 2415919104 } ] }, { "idx": 84, "layers": [ "model.layers.16.block_sparse_moe.experts.0.w2", "model.layers.16.block_sparse_moe.experts.1.w2", "model.layers.16.block_sparse_moe.experts.2.w2", "model.layers.16.block_sparse_moe.experts.3.w2", "model.layers.16.block_sparse_moe.experts.4.w2", "model.layers.16.block_sparse_moe.experts.5.w2", "model.layers.16.block_sparse_moe.experts.6.w2", "model.layers.16.block_sparse_moe.experts.7.w2", "model.layers.16.block_sparse_moe.experts.8.w2", "model.layers.16.block_sparse_moe.experts.9.w2", "model.layers.16.block_sparse_moe.experts.10.w2", "model.layers.16.block_sparse_moe.experts.11.w2", "model.layers.16.block_sparse_moe.experts.12.w2", "model.layers.16.block_sparse_moe.experts.13.w2", "model.layers.16.block_sparse_moe.experts.14.w2", "model.layers.16.block_sparse_moe.experts.15.w2", "model.layers.16.block_sparse_moe.experts.16.w2", "model.layers.16.block_sparse_moe.experts.17.w2", "model.layers.16.block_sparse_moe.experts.18.w2", "model.layers.16.block_sparse_moe.experts.19.w2", "model.layers.16.block_sparse_moe.experts.20.w2", "model.layers.16.block_sparse_moe.experts.21.w2", "model.layers.16.block_sparse_moe.experts.22.w2", "model.layers.16.block_sparse_moe.experts.23.w2", "model.layers.16.block_sparse_moe.experts.24.w2", "model.layers.16.block_sparse_moe.experts.25.w2", "model.layers.16.block_sparse_moe.experts.26.w2", "model.layers.16.block_sparse_moe.experts.27.w2", "model.layers.16.block_sparse_moe.experts.28.w2", "model.layers.16.block_sparse_moe.experts.29.w2", "model.layers.16.block_sparse_moe.experts.30.w2", "model.layers.16.block_sparse_moe.experts.31.w2", "model.layers.16.block_sparse_moe.experts.32.w2", "model.layers.16.block_sparse_moe.experts.33.w2", "model.layers.16.block_sparse_moe.experts.34.w2", "model.layers.16.block_sparse_moe.experts.35.w2", "model.layers.16.block_sparse_moe.experts.36.w2", "model.layers.16.block_sparse_moe.experts.37.w2", "model.layers.16.block_sparse_moe.experts.38.w2", "model.layers.16.block_sparse_moe.experts.39.w2", "model.layers.16.block_sparse_moe.experts.40.w2", "model.layers.16.block_sparse_moe.experts.41.w2", "model.layers.16.block_sparse_moe.experts.42.w2", "model.layers.16.block_sparse_moe.experts.43.w2", "model.layers.16.block_sparse_moe.experts.44.w2", "model.layers.16.block_sparse_moe.experts.45.w2", "model.layers.16.block_sparse_moe.experts.46.w2", "model.layers.16.block_sparse_moe.experts.47.w2", "model.layers.16.block_sparse_moe.experts.48.w2", "model.layers.16.block_sparse_moe.experts.49.w2", "model.layers.16.block_sparse_moe.experts.50.w2", "model.layers.16.block_sparse_moe.experts.51.w2", "model.layers.16.block_sparse_moe.experts.52.w2", "model.layers.16.block_sparse_moe.experts.53.w2", "model.layers.16.block_sparse_moe.experts.54.w2", "model.layers.16.block_sparse_moe.experts.55.w2", "model.layers.16.block_sparse_moe.experts.56.w2", "model.layers.16.block_sparse_moe.experts.57.w2", "model.layers.16.block_sparse_moe.experts.58.w2", "model.layers.16.block_sparse_moe.experts.59.w2", "model.layers.16.block_sparse_moe.experts.60.w2", "model.layers.16.block_sparse_moe.experts.61.w2", "model.layers.16.block_sparse_moe.experts.62.w2", "model.layers.16.block_sparse_moe.experts.63.w2", "model.layers.16.block_sparse_moe.experts.64.w2", "model.layers.16.block_sparse_moe.experts.65.w2", "model.layers.16.block_sparse_moe.experts.66.w2", "model.layers.16.block_sparse_moe.experts.67.w2", "model.layers.16.block_sparse_moe.experts.68.w2", "model.layers.16.block_sparse_moe.experts.69.w2", "model.layers.16.block_sparse_moe.experts.70.w2", "model.layers.16.block_sparse_moe.experts.71.w2", "model.layers.16.block_sparse_moe.experts.72.w2", "model.layers.16.block_sparse_moe.experts.73.w2", "model.layers.16.block_sparse_moe.experts.74.w2", "model.layers.16.block_sparse_moe.experts.75.w2", "model.layers.16.block_sparse_moe.experts.76.w2", "model.layers.16.block_sparse_moe.experts.77.w2", "model.layers.16.block_sparse_moe.experts.78.w2", "model.layers.16.block_sparse_moe.experts.79.w2", "model.layers.16.block_sparse_moe.experts.80.w2", "model.layers.16.block_sparse_moe.experts.81.w2", "model.layers.16.block_sparse_moe.experts.82.w2", "model.layers.16.block_sparse_moe.experts.83.w2", "model.layers.16.block_sparse_moe.experts.84.w2", "model.layers.16.block_sparse_moe.experts.85.w2", "model.layers.16.block_sparse_moe.experts.86.w2", "model.layers.16.block_sparse_moe.experts.87.w2", "model.layers.16.block_sparse_moe.experts.88.w2", "model.layers.16.block_sparse_moe.experts.89.w2", "model.layers.16.block_sparse_moe.experts.90.w2", "model.layers.16.block_sparse_moe.experts.91.w2", "model.layers.16.block_sparse_moe.experts.92.w2", "model.layers.16.block_sparse_moe.experts.93.w2", "model.layers.16.block_sparse_moe.experts.94.w2", "model.layers.16.block_sparse_moe.experts.95.w2", "model.layers.16.block_sparse_moe.experts.96.w2", "model.layers.16.block_sparse_moe.experts.97.w2", "model.layers.16.block_sparse_moe.experts.98.w2", "model.layers.16.block_sparse_moe.experts.99.w2", "model.layers.16.block_sparse_moe.experts.100.w2", "model.layers.16.block_sparse_moe.experts.101.w2", "model.layers.16.block_sparse_moe.experts.102.w2", "model.layers.16.block_sparse_moe.experts.103.w2", "model.layers.16.block_sparse_moe.experts.104.w2", "model.layers.16.block_sparse_moe.experts.105.w2", "model.layers.16.block_sparse_moe.experts.106.w2", "model.layers.16.block_sparse_moe.experts.107.w2", "model.layers.16.block_sparse_moe.experts.108.w2", "model.layers.16.block_sparse_moe.experts.109.w2", "model.layers.16.block_sparse_moe.experts.110.w2", "model.layers.16.block_sparse_moe.experts.111.w2", "model.layers.16.block_sparse_moe.experts.112.w2", "model.layers.16.block_sparse_moe.experts.113.w2", "model.layers.16.block_sparse_moe.experts.114.w2", "model.layers.16.block_sparse_moe.experts.115.w2", "model.layers.16.block_sparse_moe.experts.116.w2", "model.layers.16.block_sparse_moe.experts.117.w2", "model.layers.16.block_sparse_moe.experts.118.w2", "model.layers.16.block_sparse_moe.experts.119.w2", "model.layers.16.block_sparse_moe.experts.120.w2", "model.layers.16.block_sparse_moe.experts.121.w2", "model.layers.16.block_sparse_moe.experts.122.w2", "model.layers.16.block_sparse_moe.experts.123.w2", "model.layers.16.block_sparse_moe.experts.124.w2", "model.layers.16.block_sparse_moe.experts.125.w2", "model.layers.16.block_sparse_moe.experts.126.w2", "model.layers.16.block_sparse_moe.experts.127.w2", "model.layers.16.block_sparse_moe.experts.128.w2", "model.layers.16.block_sparse_moe.experts.129.w2", "model.layers.16.block_sparse_moe.experts.130.w2", "model.layers.16.block_sparse_moe.experts.131.w2", "model.layers.16.block_sparse_moe.experts.132.w2", "model.layers.16.block_sparse_moe.experts.133.w2", "model.layers.16.block_sparse_moe.experts.134.w2", "model.layers.16.block_sparse_moe.experts.135.w2", "model.layers.16.block_sparse_moe.experts.136.w2", "model.layers.16.block_sparse_moe.experts.137.w2", "model.layers.16.block_sparse_moe.experts.138.w2", "model.layers.16.block_sparse_moe.experts.139.w2", "model.layers.16.block_sparse_moe.experts.140.w2", "model.layers.16.block_sparse_moe.experts.141.w2", "model.layers.16.block_sparse_moe.experts.142.w2", "model.layers.16.block_sparse_moe.experts.143.w2", "model.layers.16.block_sparse_moe.experts.144.w2", "model.layers.16.block_sparse_moe.experts.145.w2", "model.layers.16.block_sparse_moe.experts.146.w2", "model.layers.16.block_sparse_moe.experts.147.w2", "model.layers.16.block_sparse_moe.experts.148.w2", "model.layers.16.block_sparse_moe.experts.149.w2", "model.layers.16.block_sparse_moe.experts.150.w2", "model.layers.16.block_sparse_moe.experts.151.w2", "model.layers.16.block_sparse_moe.experts.152.w2", "model.layers.16.block_sparse_moe.experts.153.w2", "model.layers.16.block_sparse_moe.experts.154.w2", "model.layers.16.block_sparse_moe.experts.155.w2", "model.layers.16.block_sparse_moe.experts.156.w2", "model.layers.16.block_sparse_moe.experts.157.w2", "model.layers.16.block_sparse_moe.experts.158.w2", "model.layers.16.block_sparse_moe.experts.159.w2", "model.layers.16.block_sparse_moe.experts.160.w2", "model.layers.16.block_sparse_moe.experts.161.w2", "model.layers.16.block_sparse_moe.experts.162.w2", "model.layers.16.block_sparse_moe.experts.163.w2", "model.layers.16.block_sparse_moe.experts.164.w2", "model.layers.16.block_sparse_moe.experts.165.w2", "model.layers.16.block_sparse_moe.experts.166.w2", "model.layers.16.block_sparse_moe.experts.167.w2", "model.layers.16.block_sparse_moe.experts.168.w2", "model.layers.16.block_sparse_moe.experts.169.w2", "model.layers.16.block_sparse_moe.experts.170.w2", "model.layers.16.block_sparse_moe.experts.171.w2", "model.layers.16.block_sparse_moe.experts.172.w2", "model.layers.16.block_sparse_moe.experts.173.w2", "model.layers.16.block_sparse_moe.experts.174.w2", "model.layers.16.block_sparse_moe.experts.175.w2", "model.layers.16.block_sparse_moe.experts.176.w2", "model.layers.16.block_sparse_moe.experts.177.w2", "model.layers.16.block_sparse_moe.experts.178.w2", "model.layers.16.block_sparse_moe.experts.179.w2", "model.layers.16.block_sparse_moe.experts.180.w2", "model.layers.16.block_sparse_moe.experts.181.w2", "model.layers.16.block_sparse_moe.experts.182.w2", "model.layers.16.block_sparse_moe.experts.183.w2", "model.layers.16.block_sparse_moe.experts.184.w2", "model.layers.16.block_sparse_moe.experts.185.w2", "model.layers.16.block_sparse_moe.experts.186.w2", "model.layers.16.block_sparse_moe.experts.187.w2", "model.layers.16.block_sparse_moe.experts.188.w2", "model.layers.16.block_sparse_moe.experts.189.w2", "model.layers.16.block_sparse_moe.experts.190.w2", "model.layers.16.block_sparse_moe.experts.191.w2", "model.layers.16.block_sparse_moe.experts.192.w2", "model.layers.16.block_sparse_moe.experts.193.w2", "model.layers.16.block_sparse_moe.experts.194.w2", "model.layers.16.block_sparse_moe.experts.195.w2", "model.layers.16.block_sparse_moe.experts.196.w2", "model.layers.16.block_sparse_moe.experts.197.w2", "model.layers.16.block_sparse_moe.experts.198.w2", "model.layers.16.block_sparse_moe.experts.199.w2", "model.layers.16.block_sparse_moe.experts.200.w2", "model.layers.16.block_sparse_moe.experts.201.w2", "model.layers.16.block_sparse_moe.experts.202.w2", "model.layers.16.block_sparse_moe.experts.203.w2", "model.layers.16.block_sparse_moe.experts.204.w2", "model.layers.16.block_sparse_moe.experts.205.w2", "model.layers.16.block_sparse_moe.experts.206.w2", "model.layers.16.block_sparse_moe.experts.207.w2", "model.layers.16.block_sparse_moe.experts.208.w2", "model.layers.16.block_sparse_moe.experts.209.w2", "model.layers.16.block_sparse_moe.experts.210.w2", "model.layers.16.block_sparse_moe.experts.211.w2", "model.layers.16.block_sparse_moe.experts.212.w2", "model.layers.16.block_sparse_moe.experts.213.w2", "model.layers.16.block_sparse_moe.experts.214.w2", "model.layers.16.block_sparse_moe.experts.215.w2", "model.layers.16.block_sparse_moe.experts.216.w2", "model.layers.16.block_sparse_moe.experts.217.w2", "model.layers.16.block_sparse_moe.experts.218.w2", "model.layers.16.block_sparse_moe.experts.219.w2", "model.layers.16.block_sparse_moe.experts.220.w2", "model.layers.16.block_sparse_moe.experts.221.w2", "model.layers.16.block_sparse_moe.experts.222.w2", "model.layers.16.block_sparse_moe.experts.223.w2", "model.layers.16.block_sparse_moe.experts.224.w2", "model.layers.16.block_sparse_moe.experts.225.w2", "model.layers.16.block_sparse_moe.experts.226.w2", "model.layers.16.block_sparse_moe.experts.227.w2", "model.layers.16.block_sparse_moe.experts.228.w2", "model.layers.16.block_sparse_moe.experts.229.w2", "model.layers.16.block_sparse_moe.experts.230.w2", "model.layers.16.block_sparse_moe.experts.231.w2", "model.layers.16.block_sparse_moe.experts.232.w2", "model.layers.16.block_sparse_moe.experts.233.w2", "model.layers.16.block_sparse_moe.experts.234.w2", "model.layers.16.block_sparse_moe.experts.235.w2", "model.layers.16.block_sparse_moe.experts.236.w2", "model.layers.16.block_sparse_moe.experts.237.w2", "model.layers.16.block_sparse_moe.experts.238.w2", "model.layers.16.block_sparse_moe.experts.239.w2", "model.layers.16.block_sparse_moe.experts.240.w2", "model.layers.16.block_sparse_moe.experts.241.w2", "model.layers.16.block_sparse_moe.experts.242.w2", "model.layers.16.block_sparse_moe.experts.243.w2", "model.layers.16.block_sparse_moe.experts.244.w2", "model.layers.16.block_sparse_moe.experts.245.w2", "model.layers.16.block_sparse_moe.experts.246.w2", "model.layers.16.block_sparse_moe.experts.247.w2", "model.layers.16.block_sparse_moe.experts.248.w2", "model.layers.16.block_sparse_moe.experts.249.w2", "model.layers.16.block_sparse_moe.experts.250.w2", "model.layers.16.block_sparse_moe.experts.251.w2", "model.layers.16.block_sparse_moe.experts.252.w2", "model.layers.16.block_sparse_moe.experts.253.w2", "model.layers.16.block_sparse_moe.experts.254.w2", "model.layers.16.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00038206689059733234, "dbits": 1207959552 } ] }, { "idx": 85, "layers": [ "model.layers.17.self_attn.q_proj" ], "candidates": [ { "dkld": -0.001512434706091914, "dbits": 18874368 } ] }, { "idx": 86, "layers": [ "model.layers.17.self_attn.k_proj", "model.layers.17.self_attn.v_proj" ], "candidates": [ { "dkld": -0.003040917217731476, "dbits": 6291456 } ] }, { "idx": 87, "layers": [ "model.layers.17.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0018047180026769527, "dbits": 18874368 } ] }, { "idx": 88, "layers": [ "model.layers.17.block_sparse_moe.experts.0.w1", "model.layers.17.block_sparse_moe.experts.1.w1", "model.layers.17.block_sparse_moe.experts.2.w1", "model.layers.17.block_sparse_moe.experts.3.w1", "model.layers.17.block_sparse_moe.experts.4.w1", "model.layers.17.block_sparse_moe.experts.5.w1", "model.layers.17.block_sparse_moe.experts.6.w1", "model.layers.17.block_sparse_moe.experts.7.w1", "model.layers.17.block_sparse_moe.experts.8.w1", "model.layers.17.block_sparse_moe.experts.9.w1", "model.layers.17.block_sparse_moe.experts.10.w1", "model.layers.17.block_sparse_moe.experts.11.w1", "model.layers.17.block_sparse_moe.experts.12.w1", "model.layers.17.block_sparse_moe.experts.13.w1", "model.layers.17.block_sparse_moe.experts.14.w1", "model.layers.17.block_sparse_moe.experts.15.w1", "model.layers.17.block_sparse_moe.experts.16.w1", "model.layers.17.block_sparse_moe.experts.17.w1", "model.layers.17.block_sparse_moe.experts.18.w1", "model.layers.17.block_sparse_moe.experts.19.w1", "model.layers.17.block_sparse_moe.experts.20.w1", "model.layers.17.block_sparse_moe.experts.21.w1", "model.layers.17.block_sparse_moe.experts.22.w1", "model.layers.17.block_sparse_moe.experts.23.w1", "model.layers.17.block_sparse_moe.experts.24.w1", "model.layers.17.block_sparse_moe.experts.25.w1", "model.layers.17.block_sparse_moe.experts.26.w1", "model.layers.17.block_sparse_moe.experts.27.w1", "model.layers.17.block_sparse_moe.experts.28.w1", "model.layers.17.block_sparse_moe.experts.29.w1", "model.layers.17.block_sparse_moe.experts.30.w1", "model.layers.17.block_sparse_moe.experts.31.w1", "model.layers.17.block_sparse_moe.experts.32.w1", "model.layers.17.block_sparse_moe.experts.33.w1", "model.layers.17.block_sparse_moe.experts.34.w1", "model.layers.17.block_sparse_moe.experts.35.w1", "model.layers.17.block_sparse_moe.experts.36.w1", "model.layers.17.block_sparse_moe.experts.37.w1", "model.layers.17.block_sparse_moe.experts.38.w1", "model.layers.17.block_sparse_moe.experts.39.w1", "model.layers.17.block_sparse_moe.experts.40.w1", "model.layers.17.block_sparse_moe.experts.41.w1", "model.layers.17.block_sparse_moe.experts.42.w1", "model.layers.17.block_sparse_moe.experts.43.w1", "model.layers.17.block_sparse_moe.experts.44.w1", "model.layers.17.block_sparse_moe.experts.45.w1", "model.layers.17.block_sparse_moe.experts.46.w1", "model.layers.17.block_sparse_moe.experts.47.w1", "model.layers.17.block_sparse_moe.experts.48.w1", "model.layers.17.block_sparse_moe.experts.49.w1", "model.layers.17.block_sparse_moe.experts.50.w1", "model.layers.17.block_sparse_moe.experts.51.w1", "model.layers.17.block_sparse_moe.experts.52.w1", "model.layers.17.block_sparse_moe.experts.53.w1", "model.layers.17.block_sparse_moe.experts.54.w1", "model.layers.17.block_sparse_moe.experts.55.w1", "model.layers.17.block_sparse_moe.experts.56.w1", "model.layers.17.block_sparse_moe.experts.57.w1", "model.layers.17.block_sparse_moe.experts.58.w1", "model.layers.17.block_sparse_moe.experts.59.w1", "model.layers.17.block_sparse_moe.experts.60.w1", "model.layers.17.block_sparse_moe.experts.61.w1", "model.layers.17.block_sparse_moe.experts.62.w1", "model.layers.17.block_sparse_moe.experts.63.w1", "model.layers.17.block_sparse_moe.experts.64.w1", "model.layers.17.block_sparse_moe.experts.65.w1", "model.layers.17.block_sparse_moe.experts.66.w1", "model.layers.17.block_sparse_moe.experts.67.w1", "model.layers.17.block_sparse_moe.experts.68.w1", "model.layers.17.block_sparse_moe.experts.69.w1", "model.layers.17.block_sparse_moe.experts.70.w1", "model.layers.17.block_sparse_moe.experts.71.w1", "model.layers.17.block_sparse_moe.experts.72.w1", "model.layers.17.block_sparse_moe.experts.73.w1", "model.layers.17.block_sparse_moe.experts.74.w1", "model.layers.17.block_sparse_moe.experts.75.w1", "model.layers.17.block_sparse_moe.experts.76.w1", "model.layers.17.block_sparse_moe.experts.77.w1", "model.layers.17.block_sparse_moe.experts.78.w1", "model.layers.17.block_sparse_moe.experts.79.w1", "model.layers.17.block_sparse_moe.experts.80.w1", "model.layers.17.block_sparse_moe.experts.81.w1", "model.layers.17.block_sparse_moe.experts.82.w1", "model.layers.17.block_sparse_moe.experts.83.w1", "model.layers.17.block_sparse_moe.experts.84.w1", "model.layers.17.block_sparse_moe.experts.85.w1", "model.layers.17.block_sparse_moe.experts.86.w1", "model.layers.17.block_sparse_moe.experts.87.w1", "model.layers.17.block_sparse_moe.experts.88.w1", "model.layers.17.block_sparse_moe.experts.89.w1", "model.layers.17.block_sparse_moe.experts.90.w1", "model.layers.17.block_sparse_moe.experts.91.w1", "model.layers.17.block_sparse_moe.experts.92.w1", "model.layers.17.block_sparse_moe.experts.93.w1", "model.layers.17.block_sparse_moe.experts.94.w1", "model.layers.17.block_sparse_moe.experts.95.w1", "model.layers.17.block_sparse_moe.experts.96.w1", "model.layers.17.block_sparse_moe.experts.97.w1", "model.layers.17.block_sparse_moe.experts.98.w1", "model.layers.17.block_sparse_moe.experts.99.w1", "model.layers.17.block_sparse_moe.experts.100.w1", "model.layers.17.block_sparse_moe.experts.101.w1", "model.layers.17.block_sparse_moe.experts.102.w1", "model.layers.17.block_sparse_moe.experts.103.w1", "model.layers.17.block_sparse_moe.experts.104.w1", "model.layers.17.block_sparse_moe.experts.105.w1", "model.layers.17.block_sparse_moe.experts.106.w1", "model.layers.17.block_sparse_moe.experts.107.w1", "model.layers.17.block_sparse_moe.experts.108.w1", "model.layers.17.block_sparse_moe.experts.109.w1", "model.layers.17.block_sparse_moe.experts.110.w1", "model.layers.17.block_sparse_moe.experts.111.w1", "model.layers.17.block_sparse_moe.experts.112.w1", "model.layers.17.block_sparse_moe.experts.113.w1", "model.layers.17.block_sparse_moe.experts.114.w1", "model.layers.17.block_sparse_moe.experts.115.w1", "model.layers.17.block_sparse_moe.experts.116.w1", "model.layers.17.block_sparse_moe.experts.117.w1", "model.layers.17.block_sparse_moe.experts.118.w1", "model.layers.17.block_sparse_moe.experts.119.w1", "model.layers.17.block_sparse_moe.experts.120.w1", "model.layers.17.block_sparse_moe.experts.121.w1", "model.layers.17.block_sparse_moe.experts.122.w1", "model.layers.17.block_sparse_moe.experts.123.w1", "model.layers.17.block_sparse_moe.experts.124.w1", "model.layers.17.block_sparse_moe.experts.125.w1", "model.layers.17.block_sparse_moe.experts.126.w1", "model.layers.17.block_sparse_moe.experts.127.w1", "model.layers.17.block_sparse_moe.experts.128.w1", "model.layers.17.block_sparse_moe.experts.129.w1", "model.layers.17.block_sparse_moe.experts.130.w1", "model.layers.17.block_sparse_moe.experts.131.w1", "model.layers.17.block_sparse_moe.experts.132.w1", "model.layers.17.block_sparse_moe.experts.133.w1", "model.layers.17.block_sparse_moe.experts.134.w1", "model.layers.17.block_sparse_moe.experts.135.w1", "model.layers.17.block_sparse_moe.experts.136.w1", "model.layers.17.block_sparse_moe.experts.137.w1", "model.layers.17.block_sparse_moe.experts.138.w1", "model.layers.17.block_sparse_moe.experts.139.w1", "model.layers.17.block_sparse_moe.experts.140.w1", "model.layers.17.block_sparse_moe.experts.141.w1", "model.layers.17.block_sparse_moe.experts.142.w1", "model.layers.17.block_sparse_moe.experts.143.w1", "model.layers.17.block_sparse_moe.experts.144.w1", "model.layers.17.block_sparse_moe.experts.145.w1", "model.layers.17.block_sparse_moe.experts.146.w1", "model.layers.17.block_sparse_moe.experts.147.w1", "model.layers.17.block_sparse_moe.experts.148.w1", "model.layers.17.block_sparse_moe.experts.149.w1", "model.layers.17.block_sparse_moe.experts.150.w1", "model.layers.17.block_sparse_moe.experts.151.w1", "model.layers.17.block_sparse_moe.experts.152.w1", "model.layers.17.block_sparse_moe.experts.153.w1", "model.layers.17.block_sparse_moe.experts.154.w1", "model.layers.17.block_sparse_moe.experts.155.w1", "model.layers.17.block_sparse_moe.experts.156.w1", "model.layers.17.block_sparse_moe.experts.157.w1", "model.layers.17.block_sparse_moe.experts.158.w1", "model.layers.17.block_sparse_moe.experts.159.w1", "model.layers.17.block_sparse_moe.experts.160.w1", "model.layers.17.block_sparse_moe.experts.161.w1", "model.layers.17.block_sparse_moe.experts.162.w1", "model.layers.17.block_sparse_moe.experts.163.w1", "model.layers.17.block_sparse_moe.experts.164.w1", "model.layers.17.block_sparse_moe.experts.165.w1", "model.layers.17.block_sparse_moe.experts.166.w1", "model.layers.17.block_sparse_moe.experts.167.w1", "model.layers.17.block_sparse_moe.experts.168.w1", "model.layers.17.block_sparse_moe.experts.169.w1", "model.layers.17.block_sparse_moe.experts.170.w1", "model.layers.17.block_sparse_moe.experts.171.w1", "model.layers.17.block_sparse_moe.experts.172.w1", "model.layers.17.block_sparse_moe.experts.173.w1", "model.layers.17.block_sparse_moe.experts.174.w1", "model.layers.17.block_sparse_moe.experts.175.w1", "model.layers.17.block_sparse_moe.experts.176.w1", "model.layers.17.block_sparse_moe.experts.177.w1", "model.layers.17.block_sparse_moe.experts.178.w1", "model.layers.17.block_sparse_moe.experts.179.w1", "model.layers.17.block_sparse_moe.experts.180.w1", "model.layers.17.block_sparse_moe.experts.181.w1", "model.layers.17.block_sparse_moe.experts.182.w1", "model.layers.17.block_sparse_moe.experts.183.w1", "model.layers.17.block_sparse_moe.experts.184.w1", "model.layers.17.block_sparse_moe.experts.185.w1", "model.layers.17.block_sparse_moe.experts.186.w1", "model.layers.17.block_sparse_moe.experts.187.w1", "model.layers.17.block_sparse_moe.experts.188.w1", "model.layers.17.block_sparse_moe.experts.189.w1", "model.layers.17.block_sparse_moe.experts.190.w1", "model.layers.17.block_sparse_moe.experts.191.w1", "model.layers.17.block_sparse_moe.experts.192.w1", "model.layers.17.block_sparse_moe.experts.193.w1", "model.layers.17.block_sparse_moe.experts.194.w1", "model.layers.17.block_sparse_moe.experts.195.w1", "model.layers.17.block_sparse_moe.experts.196.w1", "model.layers.17.block_sparse_moe.experts.197.w1", "model.layers.17.block_sparse_moe.experts.198.w1", "model.layers.17.block_sparse_moe.experts.199.w1", "model.layers.17.block_sparse_moe.experts.200.w1", "model.layers.17.block_sparse_moe.experts.201.w1", "model.layers.17.block_sparse_moe.experts.202.w1", "model.layers.17.block_sparse_moe.experts.203.w1", "model.layers.17.block_sparse_moe.experts.204.w1", "model.layers.17.block_sparse_moe.experts.205.w1", "model.layers.17.block_sparse_moe.experts.206.w1", "model.layers.17.block_sparse_moe.experts.207.w1", "model.layers.17.block_sparse_moe.experts.208.w1", "model.layers.17.block_sparse_moe.experts.209.w1", "model.layers.17.block_sparse_moe.experts.210.w1", "model.layers.17.block_sparse_moe.experts.211.w1", "model.layers.17.block_sparse_moe.experts.212.w1", "model.layers.17.block_sparse_moe.experts.213.w1", "model.layers.17.block_sparse_moe.experts.214.w1", "model.layers.17.block_sparse_moe.experts.215.w1", "model.layers.17.block_sparse_moe.experts.216.w1", "model.layers.17.block_sparse_moe.experts.217.w1", "model.layers.17.block_sparse_moe.experts.218.w1", "model.layers.17.block_sparse_moe.experts.219.w1", "model.layers.17.block_sparse_moe.experts.220.w1", "model.layers.17.block_sparse_moe.experts.221.w1", "model.layers.17.block_sparse_moe.experts.222.w1", "model.layers.17.block_sparse_moe.experts.223.w1", "model.layers.17.block_sparse_moe.experts.224.w1", "model.layers.17.block_sparse_moe.experts.225.w1", "model.layers.17.block_sparse_moe.experts.226.w1", "model.layers.17.block_sparse_moe.experts.227.w1", "model.layers.17.block_sparse_moe.experts.228.w1", "model.layers.17.block_sparse_moe.experts.229.w1", "model.layers.17.block_sparse_moe.experts.230.w1", "model.layers.17.block_sparse_moe.experts.231.w1", "model.layers.17.block_sparse_moe.experts.232.w1", "model.layers.17.block_sparse_moe.experts.233.w1", "model.layers.17.block_sparse_moe.experts.234.w1", "model.layers.17.block_sparse_moe.experts.235.w1", "model.layers.17.block_sparse_moe.experts.236.w1", "model.layers.17.block_sparse_moe.experts.237.w1", "model.layers.17.block_sparse_moe.experts.238.w1", "model.layers.17.block_sparse_moe.experts.239.w1", "model.layers.17.block_sparse_moe.experts.240.w1", "model.layers.17.block_sparse_moe.experts.241.w1", "model.layers.17.block_sparse_moe.experts.242.w1", "model.layers.17.block_sparse_moe.experts.243.w1", "model.layers.17.block_sparse_moe.experts.244.w1", "model.layers.17.block_sparse_moe.experts.245.w1", "model.layers.17.block_sparse_moe.experts.246.w1", "model.layers.17.block_sparse_moe.experts.247.w1", "model.layers.17.block_sparse_moe.experts.248.w1", "model.layers.17.block_sparse_moe.experts.249.w1", "model.layers.17.block_sparse_moe.experts.250.w1", "model.layers.17.block_sparse_moe.experts.251.w1", "model.layers.17.block_sparse_moe.experts.252.w1", "model.layers.17.block_sparse_moe.experts.253.w1", "model.layers.17.block_sparse_moe.experts.254.w1", "model.layers.17.block_sparse_moe.experts.255.w1", "model.layers.17.block_sparse_moe.experts.0.w3", "model.layers.17.block_sparse_moe.experts.1.w3", "model.layers.17.block_sparse_moe.experts.2.w3", "model.layers.17.block_sparse_moe.experts.3.w3", "model.layers.17.block_sparse_moe.experts.4.w3", "model.layers.17.block_sparse_moe.experts.5.w3", "model.layers.17.block_sparse_moe.experts.6.w3", "model.layers.17.block_sparse_moe.experts.7.w3", "model.layers.17.block_sparse_moe.experts.8.w3", "model.layers.17.block_sparse_moe.experts.9.w3", "model.layers.17.block_sparse_moe.experts.10.w3", "model.layers.17.block_sparse_moe.experts.11.w3", "model.layers.17.block_sparse_moe.experts.12.w3", "model.layers.17.block_sparse_moe.experts.13.w3", "model.layers.17.block_sparse_moe.experts.14.w3", "model.layers.17.block_sparse_moe.experts.15.w3", "model.layers.17.block_sparse_moe.experts.16.w3", "model.layers.17.block_sparse_moe.experts.17.w3", "model.layers.17.block_sparse_moe.experts.18.w3", "model.layers.17.block_sparse_moe.experts.19.w3", "model.layers.17.block_sparse_moe.experts.20.w3", "model.layers.17.block_sparse_moe.experts.21.w3", "model.layers.17.block_sparse_moe.experts.22.w3", "model.layers.17.block_sparse_moe.experts.23.w3", "model.layers.17.block_sparse_moe.experts.24.w3", "model.layers.17.block_sparse_moe.experts.25.w3", "model.layers.17.block_sparse_moe.experts.26.w3", "model.layers.17.block_sparse_moe.experts.27.w3", "model.layers.17.block_sparse_moe.experts.28.w3", "model.layers.17.block_sparse_moe.experts.29.w3", "model.layers.17.block_sparse_moe.experts.30.w3", "model.layers.17.block_sparse_moe.experts.31.w3", "model.layers.17.block_sparse_moe.experts.32.w3", "model.layers.17.block_sparse_moe.experts.33.w3", "model.layers.17.block_sparse_moe.experts.34.w3", "model.layers.17.block_sparse_moe.experts.35.w3", "model.layers.17.block_sparse_moe.experts.36.w3", "model.layers.17.block_sparse_moe.experts.37.w3", "model.layers.17.block_sparse_moe.experts.38.w3", "model.layers.17.block_sparse_moe.experts.39.w3", "model.layers.17.block_sparse_moe.experts.40.w3", "model.layers.17.block_sparse_moe.experts.41.w3", "model.layers.17.block_sparse_moe.experts.42.w3", "model.layers.17.block_sparse_moe.experts.43.w3", "model.layers.17.block_sparse_moe.experts.44.w3", "model.layers.17.block_sparse_moe.experts.45.w3", "model.layers.17.block_sparse_moe.experts.46.w3", "model.layers.17.block_sparse_moe.experts.47.w3", "model.layers.17.block_sparse_moe.experts.48.w3", "model.layers.17.block_sparse_moe.experts.49.w3", "model.layers.17.block_sparse_moe.experts.50.w3", "model.layers.17.block_sparse_moe.experts.51.w3", "model.layers.17.block_sparse_moe.experts.52.w3", "model.layers.17.block_sparse_moe.experts.53.w3", "model.layers.17.block_sparse_moe.experts.54.w3", "model.layers.17.block_sparse_moe.experts.55.w3", "model.layers.17.block_sparse_moe.experts.56.w3", "model.layers.17.block_sparse_moe.experts.57.w3", "model.layers.17.block_sparse_moe.experts.58.w3", "model.layers.17.block_sparse_moe.experts.59.w3", "model.layers.17.block_sparse_moe.experts.60.w3", "model.layers.17.block_sparse_moe.experts.61.w3", "model.layers.17.block_sparse_moe.experts.62.w3", "model.layers.17.block_sparse_moe.experts.63.w3", "model.layers.17.block_sparse_moe.experts.64.w3", "model.layers.17.block_sparse_moe.experts.65.w3", "model.layers.17.block_sparse_moe.experts.66.w3", "model.layers.17.block_sparse_moe.experts.67.w3", "model.layers.17.block_sparse_moe.experts.68.w3", "model.layers.17.block_sparse_moe.experts.69.w3", "model.layers.17.block_sparse_moe.experts.70.w3", "model.layers.17.block_sparse_moe.experts.71.w3", "model.layers.17.block_sparse_moe.experts.72.w3", "model.layers.17.block_sparse_moe.experts.73.w3", "model.layers.17.block_sparse_moe.experts.74.w3", "model.layers.17.block_sparse_moe.experts.75.w3", "model.layers.17.block_sparse_moe.experts.76.w3", "model.layers.17.block_sparse_moe.experts.77.w3", "model.layers.17.block_sparse_moe.experts.78.w3", "model.layers.17.block_sparse_moe.experts.79.w3", "model.layers.17.block_sparse_moe.experts.80.w3", "model.layers.17.block_sparse_moe.experts.81.w3", "model.layers.17.block_sparse_moe.experts.82.w3", "model.layers.17.block_sparse_moe.experts.83.w3", "model.layers.17.block_sparse_moe.experts.84.w3", "model.layers.17.block_sparse_moe.experts.85.w3", "model.layers.17.block_sparse_moe.experts.86.w3", "model.layers.17.block_sparse_moe.experts.87.w3", "model.layers.17.block_sparse_moe.experts.88.w3", "model.layers.17.block_sparse_moe.experts.89.w3", "model.layers.17.block_sparse_moe.experts.90.w3", "model.layers.17.block_sparse_moe.experts.91.w3", "model.layers.17.block_sparse_moe.experts.92.w3", "model.layers.17.block_sparse_moe.experts.93.w3", "model.layers.17.block_sparse_moe.experts.94.w3", "model.layers.17.block_sparse_moe.experts.95.w3", "model.layers.17.block_sparse_moe.experts.96.w3", "model.layers.17.block_sparse_moe.experts.97.w3", "model.layers.17.block_sparse_moe.experts.98.w3", "model.layers.17.block_sparse_moe.experts.99.w3", "model.layers.17.block_sparse_moe.experts.100.w3", "model.layers.17.block_sparse_moe.experts.101.w3", "model.layers.17.block_sparse_moe.experts.102.w3", "model.layers.17.block_sparse_moe.experts.103.w3", "model.layers.17.block_sparse_moe.experts.104.w3", "model.layers.17.block_sparse_moe.experts.105.w3", "model.layers.17.block_sparse_moe.experts.106.w3", "model.layers.17.block_sparse_moe.experts.107.w3", "model.layers.17.block_sparse_moe.experts.108.w3", "model.layers.17.block_sparse_moe.experts.109.w3", "model.layers.17.block_sparse_moe.experts.110.w3", "model.layers.17.block_sparse_moe.experts.111.w3", "model.layers.17.block_sparse_moe.experts.112.w3", "model.layers.17.block_sparse_moe.experts.113.w3", "model.layers.17.block_sparse_moe.experts.114.w3", "model.layers.17.block_sparse_moe.experts.115.w3", "model.layers.17.block_sparse_moe.experts.116.w3", "model.layers.17.block_sparse_moe.experts.117.w3", "model.layers.17.block_sparse_moe.experts.118.w3", "model.layers.17.block_sparse_moe.experts.119.w3", "model.layers.17.block_sparse_moe.experts.120.w3", "model.layers.17.block_sparse_moe.experts.121.w3", "model.layers.17.block_sparse_moe.experts.122.w3", "model.layers.17.block_sparse_moe.experts.123.w3", "model.layers.17.block_sparse_moe.experts.124.w3", "model.layers.17.block_sparse_moe.experts.125.w3", "model.layers.17.block_sparse_moe.experts.126.w3", "model.layers.17.block_sparse_moe.experts.127.w3", "model.layers.17.block_sparse_moe.experts.128.w3", "model.layers.17.block_sparse_moe.experts.129.w3", "model.layers.17.block_sparse_moe.experts.130.w3", "model.layers.17.block_sparse_moe.experts.131.w3", "model.layers.17.block_sparse_moe.experts.132.w3", "model.layers.17.block_sparse_moe.experts.133.w3", "model.layers.17.block_sparse_moe.experts.134.w3", "model.layers.17.block_sparse_moe.experts.135.w3", "model.layers.17.block_sparse_moe.experts.136.w3", "model.layers.17.block_sparse_moe.experts.137.w3", "model.layers.17.block_sparse_moe.experts.138.w3", "model.layers.17.block_sparse_moe.experts.139.w3", "model.layers.17.block_sparse_moe.experts.140.w3", "model.layers.17.block_sparse_moe.experts.141.w3", "model.layers.17.block_sparse_moe.experts.142.w3", "model.layers.17.block_sparse_moe.experts.143.w3", "model.layers.17.block_sparse_moe.experts.144.w3", "model.layers.17.block_sparse_moe.experts.145.w3", "model.layers.17.block_sparse_moe.experts.146.w3", "model.layers.17.block_sparse_moe.experts.147.w3", "model.layers.17.block_sparse_moe.experts.148.w3", "model.layers.17.block_sparse_moe.experts.149.w3", "model.layers.17.block_sparse_moe.experts.150.w3", "model.layers.17.block_sparse_moe.experts.151.w3", "model.layers.17.block_sparse_moe.experts.152.w3", "model.layers.17.block_sparse_moe.experts.153.w3", "model.layers.17.block_sparse_moe.experts.154.w3", "model.layers.17.block_sparse_moe.experts.155.w3", "model.layers.17.block_sparse_moe.experts.156.w3", "model.layers.17.block_sparse_moe.experts.157.w3", "model.layers.17.block_sparse_moe.experts.158.w3", "model.layers.17.block_sparse_moe.experts.159.w3", "model.layers.17.block_sparse_moe.experts.160.w3", "model.layers.17.block_sparse_moe.experts.161.w3", "model.layers.17.block_sparse_moe.experts.162.w3", "model.layers.17.block_sparse_moe.experts.163.w3", "model.layers.17.block_sparse_moe.experts.164.w3", "model.layers.17.block_sparse_moe.experts.165.w3", "model.layers.17.block_sparse_moe.experts.166.w3", "model.layers.17.block_sparse_moe.experts.167.w3", "model.layers.17.block_sparse_moe.experts.168.w3", "model.layers.17.block_sparse_moe.experts.169.w3", "model.layers.17.block_sparse_moe.experts.170.w3", "model.layers.17.block_sparse_moe.experts.171.w3", "model.layers.17.block_sparse_moe.experts.172.w3", "model.layers.17.block_sparse_moe.experts.173.w3", "model.layers.17.block_sparse_moe.experts.174.w3", "model.layers.17.block_sparse_moe.experts.175.w3", "model.layers.17.block_sparse_moe.experts.176.w3", "model.layers.17.block_sparse_moe.experts.177.w3", "model.layers.17.block_sparse_moe.experts.178.w3", "model.layers.17.block_sparse_moe.experts.179.w3", "model.layers.17.block_sparse_moe.experts.180.w3", "model.layers.17.block_sparse_moe.experts.181.w3", "model.layers.17.block_sparse_moe.experts.182.w3", "model.layers.17.block_sparse_moe.experts.183.w3", "model.layers.17.block_sparse_moe.experts.184.w3", "model.layers.17.block_sparse_moe.experts.185.w3", "model.layers.17.block_sparse_moe.experts.186.w3", "model.layers.17.block_sparse_moe.experts.187.w3", "model.layers.17.block_sparse_moe.experts.188.w3", "model.layers.17.block_sparse_moe.experts.189.w3", "model.layers.17.block_sparse_moe.experts.190.w3", "model.layers.17.block_sparse_moe.experts.191.w3", "model.layers.17.block_sparse_moe.experts.192.w3", "model.layers.17.block_sparse_moe.experts.193.w3", "model.layers.17.block_sparse_moe.experts.194.w3", "model.layers.17.block_sparse_moe.experts.195.w3", "model.layers.17.block_sparse_moe.experts.196.w3", "model.layers.17.block_sparse_moe.experts.197.w3", "model.layers.17.block_sparse_moe.experts.198.w3", "model.layers.17.block_sparse_moe.experts.199.w3", "model.layers.17.block_sparse_moe.experts.200.w3", "model.layers.17.block_sparse_moe.experts.201.w3", "model.layers.17.block_sparse_moe.experts.202.w3", "model.layers.17.block_sparse_moe.experts.203.w3", "model.layers.17.block_sparse_moe.experts.204.w3", "model.layers.17.block_sparse_moe.experts.205.w3", "model.layers.17.block_sparse_moe.experts.206.w3", "model.layers.17.block_sparse_moe.experts.207.w3", "model.layers.17.block_sparse_moe.experts.208.w3", "model.layers.17.block_sparse_moe.experts.209.w3", "model.layers.17.block_sparse_moe.experts.210.w3", "model.layers.17.block_sparse_moe.experts.211.w3", "model.layers.17.block_sparse_moe.experts.212.w3", "model.layers.17.block_sparse_moe.experts.213.w3", "model.layers.17.block_sparse_moe.experts.214.w3", "model.layers.17.block_sparse_moe.experts.215.w3", "model.layers.17.block_sparse_moe.experts.216.w3", "model.layers.17.block_sparse_moe.experts.217.w3", "model.layers.17.block_sparse_moe.experts.218.w3", "model.layers.17.block_sparse_moe.experts.219.w3", "model.layers.17.block_sparse_moe.experts.220.w3", "model.layers.17.block_sparse_moe.experts.221.w3", "model.layers.17.block_sparse_moe.experts.222.w3", "model.layers.17.block_sparse_moe.experts.223.w3", "model.layers.17.block_sparse_moe.experts.224.w3", "model.layers.17.block_sparse_moe.experts.225.w3", "model.layers.17.block_sparse_moe.experts.226.w3", "model.layers.17.block_sparse_moe.experts.227.w3", "model.layers.17.block_sparse_moe.experts.228.w3", "model.layers.17.block_sparse_moe.experts.229.w3", "model.layers.17.block_sparse_moe.experts.230.w3", "model.layers.17.block_sparse_moe.experts.231.w3", "model.layers.17.block_sparse_moe.experts.232.w3", "model.layers.17.block_sparse_moe.experts.233.w3", "model.layers.17.block_sparse_moe.experts.234.w3", "model.layers.17.block_sparse_moe.experts.235.w3", "model.layers.17.block_sparse_moe.experts.236.w3", "model.layers.17.block_sparse_moe.experts.237.w3", "model.layers.17.block_sparse_moe.experts.238.w3", "model.layers.17.block_sparse_moe.experts.239.w3", "model.layers.17.block_sparse_moe.experts.240.w3", "model.layers.17.block_sparse_moe.experts.241.w3", "model.layers.17.block_sparse_moe.experts.242.w3", "model.layers.17.block_sparse_moe.experts.243.w3", "model.layers.17.block_sparse_moe.experts.244.w3", "model.layers.17.block_sparse_moe.experts.245.w3", "model.layers.17.block_sparse_moe.experts.246.w3", "model.layers.17.block_sparse_moe.experts.247.w3", "model.layers.17.block_sparse_moe.experts.248.w3", "model.layers.17.block_sparse_moe.experts.249.w3", "model.layers.17.block_sparse_moe.experts.250.w3", "model.layers.17.block_sparse_moe.experts.251.w3", "model.layers.17.block_sparse_moe.experts.252.w3", "model.layers.17.block_sparse_moe.experts.253.w3", "model.layers.17.block_sparse_moe.experts.254.w3", "model.layers.17.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.001434944197535537, "dbits": 2415919104 } ] }, { "idx": 89, "layers": [ "model.layers.17.block_sparse_moe.experts.0.w2", "model.layers.17.block_sparse_moe.experts.1.w2", "model.layers.17.block_sparse_moe.experts.2.w2", "model.layers.17.block_sparse_moe.experts.3.w2", "model.layers.17.block_sparse_moe.experts.4.w2", "model.layers.17.block_sparse_moe.experts.5.w2", "model.layers.17.block_sparse_moe.experts.6.w2", "model.layers.17.block_sparse_moe.experts.7.w2", "model.layers.17.block_sparse_moe.experts.8.w2", "model.layers.17.block_sparse_moe.experts.9.w2", "model.layers.17.block_sparse_moe.experts.10.w2", "model.layers.17.block_sparse_moe.experts.11.w2", "model.layers.17.block_sparse_moe.experts.12.w2", "model.layers.17.block_sparse_moe.experts.13.w2", "model.layers.17.block_sparse_moe.experts.14.w2", "model.layers.17.block_sparse_moe.experts.15.w2", "model.layers.17.block_sparse_moe.experts.16.w2", "model.layers.17.block_sparse_moe.experts.17.w2", "model.layers.17.block_sparse_moe.experts.18.w2", "model.layers.17.block_sparse_moe.experts.19.w2", "model.layers.17.block_sparse_moe.experts.20.w2", "model.layers.17.block_sparse_moe.experts.21.w2", "model.layers.17.block_sparse_moe.experts.22.w2", "model.layers.17.block_sparse_moe.experts.23.w2", "model.layers.17.block_sparse_moe.experts.24.w2", "model.layers.17.block_sparse_moe.experts.25.w2", "model.layers.17.block_sparse_moe.experts.26.w2", "model.layers.17.block_sparse_moe.experts.27.w2", "model.layers.17.block_sparse_moe.experts.28.w2", "model.layers.17.block_sparse_moe.experts.29.w2", "model.layers.17.block_sparse_moe.experts.30.w2", "model.layers.17.block_sparse_moe.experts.31.w2", "model.layers.17.block_sparse_moe.experts.32.w2", "model.layers.17.block_sparse_moe.experts.33.w2", "model.layers.17.block_sparse_moe.experts.34.w2", "model.layers.17.block_sparse_moe.experts.35.w2", "model.layers.17.block_sparse_moe.experts.36.w2", "model.layers.17.block_sparse_moe.experts.37.w2", "model.layers.17.block_sparse_moe.experts.38.w2", "model.layers.17.block_sparse_moe.experts.39.w2", "model.layers.17.block_sparse_moe.experts.40.w2", "model.layers.17.block_sparse_moe.experts.41.w2", "model.layers.17.block_sparse_moe.experts.42.w2", "model.layers.17.block_sparse_moe.experts.43.w2", "model.layers.17.block_sparse_moe.experts.44.w2", "model.layers.17.block_sparse_moe.experts.45.w2", "model.layers.17.block_sparse_moe.experts.46.w2", "model.layers.17.block_sparse_moe.experts.47.w2", "model.layers.17.block_sparse_moe.experts.48.w2", "model.layers.17.block_sparse_moe.experts.49.w2", "model.layers.17.block_sparse_moe.experts.50.w2", "model.layers.17.block_sparse_moe.experts.51.w2", "model.layers.17.block_sparse_moe.experts.52.w2", "model.layers.17.block_sparse_moe.experts.53.w2", "model.layers.17.block_sparse_moe.experts.54.w2", "model.layers.17.block_sparse_moe.experts.55.w2", "model.layers.17.block_sparse_moe.experts.56.w2", "model.layers.17.block_sparse_moe.experts.57.w2", "model.layers.17.block_sparse_moe.experts.58.w2", "model.layers.17.block_sparse_moe.experts.59.w2", "model.layers.17.block_sparse_moe.experts.60.w2", "model.layers.17.block_sparse_moe.experts.61.w2", "model.layers.17.block_sparse_moe.experts.62.w2", "model.layers.17.block_sparse_moe.experts.63.w2", "model.layers.17.block_sparse_moe.experts.64.w2", "model.layers.17.block_sparse_moe.experts.65.w2", "model.layers.17.block_sparse_moe.experts.66.w2", "model.layers.17.block_sparse_moe.experts.67.w2", "model.layers.17.block_sparse_moe.experts.68.w2", "model.layers.17.block_sparse_moe.experts.69.w2", "model.layers.17.block_sparse_moe.experts.70.w2", "model.layers.17.block_sparse_moe.experts.71.w2", "model.layers.17.block_sparse_moe.experts.72.w2", "model.layers.17.block_sparse_moe.experts.73.w2", "model.layers.17.block_sparse_moe.experts.74.w2", "model.layers.17.block_sparse_moe.experts.75.w2", "model.layers.17.block_sparse_moe.experts.76.w2", "model.layers.17.block_sparse_moe.experts.77.w2", "model.layers.17.block_sparse_moe.experts.78.w2", "model.layers.17.block_sparse_moe.experts.79.w2", "model.layers.17.block_sparse_moe.experts.80.w2", "model.layers.17.block_sparse_moe.experts.81.w2", "model.layers.17.block_sparse_moe.experts.82.w2", "model.layers.17.block_sparse_moe.experts.83.w2", "model.layers.17.block_sparse_moe.experts.84.w2", "model.layers.17.block_sparse_moe.experts.85.w2", "model.layers.17.block_sparse_moe.experts.86.w2", "model.layers.17.block_sparse_moe.experts.87.w2", "model.layers.17.block_sparse_moe.experts.88.w2", "model.layers.17.block_sparse_moe.experts.89.w2", "model.layers.17.block_sparse_moe.experts.90.w2", "model.layers.17.block_sparse_moe.experts.91.w2", "model.layers.17.block_sparse_moe.experts.92.w2", "model.layers.17.block_sparse_moe.experts.93.w2", "model.layers.17.block_sparse_moe.experts.94.w2", "model.layers.17.block_sparse_moe.experts.95.w2", "model.layers.17.block_sparse_moe.experts.96.w2", "model.layers.17.block_sparse_moe.experts.97.w2", "model.layers.17.block_sparse_moe.experts.98.w2", "model.layers.17.block_sparse_moe.experts.99.w2", "model.layers.17.block_sparse_moe.experts.100.w2", "model.layers.17.block_sparse_moe.experts.101.w2", "model.layers.17.block_sparse_moe.experts.102.w2", "model.layers.17.block_sparse_moe.experts.103.w2", "model.layers.17.block_sparse_moe.experts.104.w2", "model.layers.17.block_sparse_moe.experts.105.w2", "model.layers.17.block_sparse_moe.experts.106.w2", "model.layers.17.block_sparse_moe.experts.107.w2", "model.layers.17.block_sparse_moe.experts.108.w2", "model.layers.17.block_sparse_moe.experts.109.w2", "model.layers.17.block_sparse_moe.experts.110.w2", "model.layers.17.block_sparse_moe.experts.111.w2", "model.layers.17.block_sparse_moe.experts.112.w2", "model.layers.17.block_sparse_moe.experts.113.w2", "model.layers.17.block_sparse_moe.experts.114.w2", "model.layers.17.block_sparse_moe.experts.115.w2", "model.layers.17.block_sparse_moe.experts.116.w2", "model.layers.17.block_sparse_moe.experts.117.w2", "model.layers.17.block_sparse_moe.experts.118.w2", "model.layers.17.block_sparse_moe.experts.119.w2", "model.layers.17.block_sparse_moe.experts.120.w2", "model.layers.17.block_sparse_moe.experts.121.w2", "model.layers.17.block_sparse_moe.experts.122.w2", "model.layers.17.block_sparse_moe.experts.123.w2", "model.layers.17.block_sparse_moe.experts.124.w2", "model.layers.17.block_sparse_moe.experts.125.w2", "model.layers.17.block_sparse_moe.experts.126.w2", "model.layers.17.block_sparse_moe.experts.127.w2", "model.layers.17.block_sparse_moe.experts.128.w2", "model.layers.17.block_sparse_moe.experts.129.w2", "model.layers.17.block_sparse_moe.experts.130.w2", "model.layers.17.block_sparse_moe.experts.131.w2", "model.layers.17.block_sparse_moe.experts.132.w2", "model.layers.17.block_sparse_moe.experts.133.w2", "model.layers.17.block_sparse_moe.experts.134.w2", "model.layers.17.block_sparse_moe.experts.135.w2", "model.layers.17.block_sparse_moe.experts.136.w2", "model.layers.17.block_sparse_moe.experts.137.w2", "model.layers.17.block_sparse_moe.experts.138.w2", "model.layers.17.block_sparse_moe.experts.139.w2", "model.layers.17.block_sparse_moe.experts.140.w2", "model.layers.17.block_sparse_moe.experts.141.w2", "model.layers.17.block_sparse_moe.experts.142.w2", "model.layers.17.block_sparse_moe.experts.143.w2", "model.layers.17.block_sparse_moe.experts.144.w2", "model.layers.17.block_sparse_moe.experts.145.w2", "model.layers.17.block_sparse_moe.experts.146.w2", "model.layers.17.block_sparse_moe.experts.147.w2", "model.layers.17.block_sparse_moe.experts.148.w2", "model.layers.17.block_sparse_moe.experts.149.w2", "model.layers.17.block_sparse_moe.experts.150.w2", "model.layers.17.block_sparse_moe.experts.151.w2", "model.layers.17.block_sparse_moe.experts.152.w2", "model.layers.17.block_sparse_moe.experts.153.w2", "model.layers.17.block_sparse_moe.experts.154.w2", "model.layers.17.block_sparse_moe.experts.155.w2", "model.layers.17.block_sparse_moe.experts.156.w2", "model.layers.17.block_sparse_moe.experts.157.w2", "model.layers.17.block_sparse_moe.experts.158.w2", "model.layers.17.block_sparse_moe.experts.159.w2", "model.layers.17.block_sparse_moe.experts.160.w2", "model.layers.17.block_sparse_moe.experts.161.w2", "model.layers.17.block_sparse_moe.experts.162.w2", "model.layers.17.block_sparse_moe.experts.163.w2", "model.layers.17.block_sparse_moe.experts.164.w2", "model.layers.17.block_sparse_moe.experts.165.w2", "model.layers.17.block_sparse_moe.experts.166.w2", "model.layers.17.block_sparse_moe.experts.167.w2", "model.layers.17.block_sparse_moe.experts.168.w2", "model.layers.17.block_sparse_moe.experts.169.w2", "model.layers.17.block_sparse_moe.experts.170.w2", "model.layers.17.block_sparse_moe.experts.171.w2", "model.layers.17.block_sparse_moe.experts.172.w2", "model.layers.17.block_sparse_moe.experts.173.w2", "model.layers.17.block_sparse_moe.experts.174.w2", "model.layers.17.block_sparse_moe.experts.175.w2", "model.layers.17.block_sparse_moe.experts.176.w2", "model.layers.17.block_sparse_moe.experts.177.w2", "model.layers.17.block_sparse_moe.experts.178.w2", "model.layers.17.block_sparse_moe.experts.179.w2", "model.layers.17.block_sparse_moe.experts.180.w2", "model.layers.17.block_sparse_moe.experts.181.w2", "model.layers.17.block_sparse_moe.experts.182.w2", "model.layers.17.block_sparse_moe.experts.183.w2", "model.layers.17.block_sparse_moe.experts.184.w2", "model.layers.17.block_sparse_moe.experts.185.w2", "model.layers.17.block_sparse_moe.experts.186.w2", "model.layers.17.block_sparse_moe.experts.187.w2", "model.layers.17.block_sparse_moe.experts.188.w2", "model.layers.17.block_sparse_moe.experts.189.w2", "model.layers.17.block_sparse_moe.experts.190.w2", "model.layers.17.block_sparse_moe.experts.191.w2", "model.layers.17.block_sparse_moe.experts.192.w2", "model.layers.17.block_sparse_moe.experts.193.w2", "model.layers.17.block_sparse_moe.experts.194.w2", "model.layers.17.block_sparse_moe.experts.195.w2", "model.layers.17.block_sparse_moe.experts.196.w2", "model.layers.17.block_sparse_moe.experts.197.w2", "model.layers.17.block_sparse_moe.experts.198.w2", "model.layers.17.block_sparse_moe.experts.199.w2", "model.layers.17.block_sparse_moe.experts.200.w2", "model.layers.17.block_sparse_moe.experts.201.w2", "model.layers.17.block_sparse_moe.experts.202.w2", "model.layers.17.block_sparse_moe.experts.203.w2", "model.layers.17.block_sparse_moe.experts.204.w2", "model.layers.17.block_sparse_moe.experts.205.w2", "model.layers.17.block_sparse_moe.experts.206.w2", "model.layers.17.block_sparse_moe.experts.207.w2", "model.layers.17.block_sparse_moe.experts.208.w2", "model.layers.17.block_sparse_moe.experts.209.w2", "model.layers.17.block_sparse_moe.experts.210.w2", "model.layers.17.block_sparse_moe.experts.211.w2", "model.layers.17.block_sparse_moe.experts.212.w2", "model.layers.17.block_sparse_moe.experts.213.w2", "model.layers.17.block_sparse_moe.experts.214.w2", "model.layers.17.block_sparse_moe.experts.215.w2", "model.layers.17.block_sparse_moe.experts.216.w2", "model.layers.17.block_sparse_moe.experts.217.w2", "model.layers.17.block_sparse_moe.experts.218.w2", "model.layers.17.block_sparse_moe.experts.219.w2", "model.layers.17.block_sparse_moe.experts.220.w2", "model.layers.17.block_sparse_moe.experts.221.w2", "model.layers.17.block_sparse_moe.experts.222.w2", "model.layers.17.block_sparse_moe.experts.223.w2", "model.layers.17.block_sparse_moe.experts.224.w2", "model.layers.17.block_sparse_moe.experts.225.w2", "model.layers.17.block_sparse_moe.experts.226.w2", "model.layers.17.block_sparse_moe.experts.227.w2", "model.layers.17.block_sparse_moe.experts.228.w2", "model.layers.17.block_sparse_moe.experts.229.w2", "model.layers.17.block_sparse_moe.experts.230.w2", "model.layers.17.block_sparse_moe.experts.231.w2", "model.layers.17.block_sparse_moe.experts.232.w2", "model.layers.17.block_sparse_moe.experts.233.w2", "model.layers.17.block_sparse_moe.experts.234.w2", "model.layers.17.block_sparse_moe.experts.235.w2", "model.layers.17.block_sparse_moe.experts.236.w2", "model.layers.17.block_sparse_moe.experts.237.w2", "model.layers.17.block_sparse_moe.experts.238.w2", "model.layers.17.block_sparse_moe.experts.239.w2", "model.layers.17.block_sparse_moe.experts.240.w2", "model.layers.17.block_sparse_moe.experts.241.w2", "model.layers.17.block_sparse_moe.experts.242.w2", "model.layers.17.block_sparse_moe.experts.243.w2", "model.layers.17.block_sparse_moe.experts.244.w2", "model.layers.17.block_sparse_moe.experts.245.w2", "model.layers.17.block_sparse_moe.experts.246.w2", "model.layers.17.block_sparse_moe.experts.247.w2", "model.layers.17.block_sparse_moe.experts.248.w2", "model.layers.17.block_sparse_moe.experts.249.w2", "model.layers.17.block_sparse_moe.experts.250.w2", "model.layers.17.block_sparse_moe.experts.251.w2", "model.layers.17.block_sparse_moe.experts.252.w2", "model.layers.17.block_sparse_moe.experts.253.w2", "model.layers.17.block_sparse_moe.experts.254.w2", "model.layers.17.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0006471261382102744, "dbits": 1207959552 } ] }, { "idx": 90, "layers": [ "model.layers.18.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0005644541233778, "dbits": 18874368 } ] }, { "idx": 91, "layers": [ "model.layers.18.self_attn.k_proj", "model.layers.18.self_attn.v_proj" ], "candidates": [ { "dkld": -0.004864127933979057, "dbits": 6291456 } ] }, { "idx": 92, "layers": [ "model.layers.18.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0011403109878301287, "dbits": 18874368 } ] }, { "idx": 93, "layers": [ "model.layers.18.block_sparse_moe.experts.0.w1", "model.layers.18.block_sparse_moe.experts.1.w1", "model.layers.18.block_sparse_moe.experts.2.w1", "model.layers.18.block_sparse_moe.experts.3.w1", "model.layers.18.block_sparse_moe.experts.4.w1", "model.layers.18.block_sparse_moe.experts.5.w1", "model.layers.18.block_sparse_moe.experts.6.w1", "model.layers.18.block_sparse_moe.experts.7.w1", "model.layers.18.block_sparse_moe.experts.8.w1", "model.layers.18.block_sparse_moe.experts.9.w1", "model.layers.18.block_sparse_moe.experts.10.w1", "model.layers.18.block_sparse_moe.experts.11.w1", "model.layers.18.block_sparse_moe.experts.12.w1", "model.layers.18.block_sparse_moe.experts.13.w1", "model.layers.18.block_sparse_moe.experts.14.w1", "model.layers.18.block_sparse_moe.experts.15.w1", "model.layers.18.block_sparse_moe.experts.16.w1", "model.layers.18.block_sparse_moe.experts.17.w1", "model.layers.18.block_sparse_moe.experts.18.w1", "model.layers.18.block_sparse_moe.experts.19.w1", "model.layers.18.block_sparse_moe.experts.20.w1", "model.layers.18.block_sparse_moe.experts.21.w1", "model.layers.18.block_sparse_moe.experts.22.w1", "model.layers.18.block_sparse_moe.experts.23.w1", "model.layers.18.block_sparse_moe.experts.24.w1", "model.layers.18.block_sparse_moe.experts.25.w1", "model.layers.18.block_sparse_moe.experts.26.w1", "model.layers.18.block_sparse_moe.experts.27.w1", "model.layers.18.block_sparse_moe.experts.28.w1", "model.layers.18.block_sparse_moe.experts.29.w1", "model.layers.18.block_sparse_moe.experts.30.w1", "model.layers.18.block_sparse_moe.experts.31.w1", "model.layers.18.block_sparse_moe.experts.32.w1", "model.layers.18.block_sparse_moe.experts.33.w1", "model.layers.18.block_sparse_moe.experts.34.w1", "model.layers.18.block_sparse_moe.experts.35.w1", "model.layers.18.block_sparse_moe.experts.36.w1", "model.layers.18.block_sparse_moe.experts.37.w1", "model.layers.18.block_sparse_moe.experts.38.w1", "model.layers.18.block_sparse_moe.experts.39.w1", "model.layers.18.block_sparse_moe.experts.40.w1", "model.layers.18.block_sparse_moe.experts.41.w1", "model.layers.18.block_sparse_moe.experts.42.w1", "model.layers.18.block_sparse_moe.experts.43.w1", "model.layers.18.block_sparse_moe.experts.44.w1", "model.layers.18.block_sparse_moe.experts.45.w1", "model.layers.18.block_sparse_moe.experts.46.w1", "model.layers.18.block_sparse_moe.experts.47.w1", "model.layers.18.block_sparse_moe.experts.48.w1", "model.layers.18.block_sparse_moe.experts.49.w1", "model.layers.18.block_sparse_moe.experts.50.w1", "model.layers.18.block_sparse_moe.experts.51.w1", "model.layers.18.block_sparse_moe.experts.52.w1", "model.layers.18.block_sparse_moe.experts.53.w1", "model.layers.18.block_sparse_moe.experts.54.w1", "model.layers.18.block_sparse_moe.experts.55.w1", "model.layers.18.block_sparse_moe.experts.56.w1", "model.layers.18.block_sparse_moe.experts.57.w1", "model.layers.18.block_sparse_moe.experts.58.w1", "model.layers.18.block_sparse_moe.experts.59.w1", "model.layers.18.block_sparse_moe.experts.60.w1", "model.layers.18.block_sparse_moe.experts.61.w1", "model.layers.18.block_sparse_moe.experts.62.w1", "model.layers.18.block_sparse_moe.experts.63.w1", "model.layers.18.block_sparse_moe.experts.64.w1", "model.layers.18.block_sparse_moe.experts.65.w1", "model.layers.18.block_sparse_moe.experts.66.w1", "model.layers.18.block_sparse_moe.experts.67.w1", "model.layers.18.block_sparse_moe.experts.68.w1", "model.layers.18.block_sparse_moe.experts.69.w1", "model.layers.18.block_sparse_moe.experts.70.w1", "model.layers.18.block_sparse_moe.experts.71.w1", "model.layers.18.block_sparse_moe.experts.72.w1", "model.layers.18.block_sparse_moe.experts.73.w1", "model.layers.18.block_sparse_moe.experts.74.w1", "model.layers.18.block_sparse_moe.experts.75.w1", "model.layers.18.block_sparse_moe.experts.76.w1", "model.layers.18.block_sparse_moe.experts.77.w1", "model.layers.18.block_sparse_moe.experts.78.w1", "model.layers.18.block_sparse_moe.experts.79.w1", "model.layers.18.block_sparse_moe.experts.80.w1", "model.layers.18.block_sparse_moe.experts.81.w1", "model.layers.18.block_sparse_moe.experts.82.w1", "model.layers.18.block_sparse_moe.experts.83.w1", "model.layers.18.block_sparse_moe.experts.84.w1", "model.layers.18.block_sparse_moe.experts.85.w1", "model.layers.18.block_sparse_moe.experts.86.w1", "model.layers.18.block_sparse_moe.experts.87.w1", "model.layers.18.block_sparse_moe.experts.88.w1", "model.layers.18.block_sparse_moe.experts.89.w1", "model.layers.18.block_sparse_moe.experts.90.w1", "model.layers.18.block_sparse_moe.experts.91.w1", "model.layers.18.block_sparse_moe.experts.92.w1", "model.layers.18.block_sparse_moe.experts.93.w1", "model.layers.18.block_sparse_moe.experts.94.w1", "model.layers.18.block_sparse_moe.experts.95.w1", "model.layers.18.block_sparse_moe.experts.96.w1", "model.layers.18.block_sparse_moe.experts.97.w1", "model.layers.18.block_sparse_moe.experts.98.w1", "model.layers.18.block_sparse_moe.experts.99.w1", "model.layers.18.block_sparse_moe.experts.100.w1", "model.layers.18.block_sparse_moe.experts.101.w1", "model.layers.18.block_sparse_moe.experts.102.w1", "model.layers.18.block_sparse_moe.experts.103.w1", "model.layers.18.block_sparse_moe.experts.104.w1", "model.layers.18.block_sparse_moe.experts.105.w1", "model.layers.18.block_sparse_moe.experts.106.w1", "model.layers.18.block_sparse_moe.experts.107.w1", "model.layers.18.block_sparse_moe.experts.108.w1", "model.layers.18.block_sparse_moe.experts.109.w1", "model.layers.18.block_sparse_moe.experts.110.w1", "model.layers.18.block_sparse_moe.experts.111.w1", "model.layers.18.block_sparse_moe.experts.112.w1", "model.layers.18.block_sparse_moe.experts.113.w1", "model.layers.18.block_sparse_moe.experts.114.w1", "model.layers.18.block_sparse_moe.experts.115.w1", "model.layers.18.block_sparse_moe.experts.116.w1", "model.layers.18.block_sparse_moe.experts.117.w1", "model.layers.18.block_sparse_moe.experts.118.w1", "model.layers.18.block_sparse_moe.experts.119.w1", "model.layers.18.block_sparse_moe.experts.120.w1", "model.layers.18.block_sparse_moe.experts.121.w1", "model.layers.18.block_sparse_moe.experts.122.w1", "model.layers.18.block_sparse_moe.experts.123.w1", "model.layers.18.block_sparse_moe.experts.124.w1", "model.layers.18.block_sparse_moe.experts.125.w1", "model.layers.18.block_sparse_moe.experts.126.w1", "model.layers.18.block_sparse_moe.experts.127.w1", "model.layers.18.block_sparse_moe.experts.128.w1", "model.layers.18.block_sparse_moe.experts.129.w1", "model.layers.18.block_sparse_moe.experts.130.w1", "model.layers.18.block_sparse_moe.experts.131.w1", "model.layers.18.block_sparse_moe.experts.132.w1", "model.layers.18.block_sparse_moe.experts.133.w1", "model.layers.18.block_sparse_moe.experts.134.w1", "model.layers.18.block_sparse_moe.experts.135.w1", "model.layers.18.block_sparse_moe.experts.136.w1", "model.layers.18.block_sparse_moe.experts.137.w1", "model.layers.18.block_sparse_moe.experts.138.w1", "model.layers.18.block_sparse_moe.experts.139.w1", "model.layers.18.block_sparse_moe.experts.140.w1", "model.layers.18.block_sparse_moe.experts.141.w1", "model.layers.18.block_sparse_moe.experts.142.w1", "model.layers.18.block_sparse_moe.experts.143.w1", "model.layers.18.block_sparse_moe.experts.144.w1", "model.layers.18.block_sparse_moe.experts.145.w1", "model.layers.18.block_sparse_moe.experts.146.w1", "model.layers.18.block_sparse_moe.experts.147.w1", "model.layers.18.block_sparse_moe.experts.148.w1", "model.layers.18.block_sparse_moe.experts.149.w1", "model.layers.18.block_sparse_moe.experts.150.w1", "model.layers.18.block_sparse_moe.experts.151.w1", "model.layers.18.block_sparse_moe.experts.152.w1", "model.layers.18.block_sparse_moe.experts.153.w1", "model.layers.18.block_sparse_moe.experts.154.w1", "model.layers.18.block_sparse_moe.experts.155.w1", "model.layers.18.block_sparse_moe.experts.156.w1", "model.layers.18.block_sparse_moe.experts.157.w1", "model.layers.18.block_sparse_moe.experts.158.w1", "model.layers.18.block_sparse_moe.experts.159.w1", "model.layers.18.block_sparse_moe.experts.160.w1", "model.layers.18.block_sparse_moe.experts.161.w1", "model.layers.18.block_sparse_moe.experts.162.w1", "model.layers.18.block_sparse_moe.experts.163.w1", "model.layers.18.block_sparse_moe.experts.164.w1", "model.layers.18.block_sparse_moe.experts.165.w1", "model.layers.18.block_sparse_moe.experts.166.w1", "model.layers.18.block_sparse_moe.experts.167.w1", "model.layers.18.block_sparse_moe.experts.168.w1", "model.layers.18.block_sparse_moe.experts.169.w1", "model.layers.18.block_sparse_moe.experts.170.w1", "model.layers.18.block_sparse_moe.experts.171.w1", "model.layers.18.block_sparse_moe.experts.172.w1", "model.layers.18.block_sparse_moe.experts.173.w1", "model.layers.18.block_sparse_moe.experts.174.w1", "model.layers.18.block_sparse_moe.experts.175.w1", "model.layers.18.block_sparse_moe.experts.176.w1", "model.layers.18.block_sparse_moe.experts.177.w1", "model.layers.18.block_sparse_moe.experts.178.w1", "model.layers.18.block_sparse_moe.experts.179.w1", "model.layers.18.block_sparse_moe.experts.180.w1", "model.layers.18.block_sparse_moe.experts.181.w1", "model.layers.18.block_sparse_moe.experts.182.w1", "model.layers.18.block_sparse_moe.experts.183.w1", "model.layers.18.block_sparse_moe.experts.184.w1", "model.layers.18.block_sparse_moe.experts.185.w1", "model.layers.18.block_sparse_moe.experts.186.w1", "model.layers.18.block_sparse_moe.experts.187.w1", "model.layers.18.block_sparse_moe.experts.188.w1", "model.layers.18.block_sparse_moe.experts.189.w1", "model.layers.18.block_sparse_moe.experts.190.w1", "model.layers.18.block_sparse_moe.experts.191.w1", "model.layers.18.block_sparse_moe.experts.192.w1", "model.layers.18.block_sparse_moe.experts.193.w1", "model.layers.18.block_sparse_moe.experts.194.w1", "model.layers.18.block_sparse_moe.experts.195.w1", "model.layers.18.block_sparse_moe.experts.196.w1", "model.layers.18.block_sparse_moe.experts.197.w1", "model.layers.18.block_sparse_moe.experts.198.w1", "model.layers.18.block_sparse_moe.experts.199.w1", "model.layers.18.block_sparse_moe.experts.200.w1", "model.layers.18.block_sparse_moe.experts.201.w1", "model.layers.18.block_sparse_moe.experts.202.w1", "model.layers.18.block_sparse_moe.experts.203.w1", "model.layers.18.block_sparse_moe.experts.204.w1", "model.layers.18.block_sparse_moe.experts.205.w1", "model.layers.18.block_sparse_moe.experts.206.w1", "model.layers.18.block_sparse_moe.experts.207.w1", "model.layers.18.block_sparse_moe.experts.208.w1", "model.layers.18.block_sparse_moe.experts.209.w1", "model.layers.18.block_sparse_moe.experts.210.w1", "model.layers.18.block_sparse_moe.experts.211.w1", "model.layers.18.block_sparse_moe.experts.212.w1", "model.layers.18.block_sparse_moe.experts.213.w1", "model.layers.18.block_sparse_moe.experts.214.w1", "model.layers.18.block_sparse_moe.experts.215.w1", "model.layers.18.block_sparse_moe.experts.216.w1", "model.layers.18.block_sparse_moe.experts.217.w1", "model.layers.18.block_sparse_moe.experts.218.w1", "model.layers.18.block_sparse_moe.experts.219.w1", "model.layers.18.block_sparse_moe.experts.220.w1", "model.layers.18.block_sparse_moe.experts.221.w1", "model.layers.18.block_sparse_moe.experts.222.w1", "model.layers.18.block_sparse_moe.experts.223.w1", "model.layers.18.block_sparse_moe.experts.224.w1", "model.layers.18.block_sparse_moe.experts.225.w1", "model.layers.18.block_sparse_moe.experts.226.w1", "model.layers.18.block_sparse_moe.experts.227.w1", "model.layers.18.block_sparse_moe.experts.228.w1", "model.layers.18.block_sparse_moe.experts.229.w1", "model.layers.18.block_sparse_moe.experts.230.w1", "model.layers.18.block_sparse_moe.experts.231.w1", "model.layers.18.block_sparse_moe.experts.232.w1", "model.layers.18.block_sparse_moe.experts.233.w1", "model.layers.18.block_sparse_moe.experts.234.w1", "model.layers.18.block_sparse_moe.experts.235.w1", "model.layers.18.block_sparse_moe.experts.236.w1", "model.layers.18.block_sparse_moe.experts.237.w1", "model.layers.18.block_sparse_moe.experts.238.w1", "model.layers.18.block_sparse_moe.experts.239.w1", "model.layers.18.block_sparse_moe.experts.240.w1", "model.layers.18.block_sparse_moe.experts.241.w1", "model.layers.18.block_sparse_moe.experts.242.w1", "model.layers.18.block_sparse_moe.experts.243.w1", "model.layers.18.block_sparse_moe.experts.244.w1", "model.layers.18.block_sparse_moe.experts.245.w1", "model.layers.18.block_sparse_moe.experts.246.w1", "model.layers.18.block_sparse_moe.experts.247.w1", "model.layers.18.block_sparse_moe.experts.248.w1", "model.layers.18.block_sparse_moe.experts.249.w1", "model.layers.18.block_sparse_moe.experts.250.w1", "model.layers.18.block_sparse_moe.experts.251.w1", "model.layers.18.block_sparse_moe.experts.252.w1", "model.layers.18.block_sparse_moe.experts.253.w1", "model.layers.18.block_sparse_moe.experts.254.w1", "model.layers.18.block_sparse_moe.experts.255.w1", "model.layers.18.block_sparse_moe.experts.0.w3", "model.layers.18.block_sparse_moe.experts.1.w3", "model.layers.18.block_sparse_moe.experts.2.w3", "model.layers.18.block_sparse_moe.experts.3.w3", "model.layers.18.block_sparse_moe.experts.4.w3", "model.layers.18.block_sparse_moe.experts.5.w3", "model.layers.18.block_sparse_moe.experts.6.w3", "model.layers.18.block_sparse_moe.experts.7.w3", "model.layers.18.block_sparse_moe.experts.8.w3", "model.layers.18.block_sparse_moe.experts.9.w3", "model.layers.18.block_sparse_moe.experts.10.w3", "model.layers.18.block_sparse_moe.experts.11.w3", "model.layers.18.block_sparse_moe.experts.12.w3", "model.layers.18.block_sparse_moe.experts.13.w3", "model.layers.18.block_sparse_moe.experts.14.w3", "model.layers.18.block_sparse_moe.experts.15.w3", "model.layers.18.block_sparse_moe.experts.16.w3", "model.layers.18.block_sparse_moe.experts.17.w3", "model.layers.18.block_sparse_moe.experts.18.w3", "model.layers.18.block_sparse_moe.experts.19.w3", "model.layers.18.block_sparse_moe.experts.20.w3", "model.layers.18.block_sparse_moe.experts.21.w3", "model.layers.18.block_sparse_moe.experts.22.w3", "model.layers.18.block_sparse_moe.experts.23.w3", "model.layers.18.block_sparse_moe.experts.24.w3", "model.layers.18.block_sparse_moe.experts.25.w3", "model.layers.18.block_sparse_moe.experts.26.w3", "model.layers.18.block_sparse_moe.experts.27.w3", "model.layers.18.block_sparse_moe.experts.28.w3", "model.layers.18.block_sparse_moe.experts.29.w3", "model.layers.18.block_sparse_moe.experts.30.w3", "model.layers.18.block_sparse_moe.experts.31.w3", "model.layers.18.block_sparse_moe.experts.32.w3", "model.layers.18.block_sparse_moe.experts.33.w3", "model.layers.18.block_sparse_moe.experts.34.w3", "model.layers.18.block_sparse_moe.experts.35.w3", "model.layers.18.block_sparse_moe.experts.36.w3", "model.layers.18.block_sparse_moe.experts.37.w3", "model.layers.18.block_sparse_moe.experts.38.w3", "model.layers.18.block_sparse_moe.experts.39.w3", "model.layers.18.block_sparse_moe.experts.40.w3", "model.layers.18.block_sparse_moe.experts.41.w3", "model.layers.18.block_sparse_moe.experts.42.w3", "model.layers.18.block_sparse_moe.experts.43.w3", "model.layers.18.block_sparse_moe.experts.44.w3", "model.layers.18.block_sparse_moe.experts.45.w3", "model.layers.18.block_sparse_moe.experts.46.w3", "model.layers.18.block_sparse_moe.experts.47.w3", "model.layers.18.block_sparse_moe.experts.48.w3", "model.layers.18.block_sparse_moe.experts.49.w3", "model.layers.18.block_sparse_moe.experts.50.w3", "model.layers.18.block_sparse_moe.experts.51.w3", "model.layers.18.block_sparse_moe.experts.52.w3", "model.layers.18.block_sparse_moe.experts.53.w3", "model.layers.18.block_sparse_moe.experts.54.w3", "model.layers.18.block_sparse_moe.experts.55.w3", "model.layers.18.block_sparse_moe.experts.56.w3", "model.layers.18.block_sparse_moe.experts.57.w3", "model.layers.18.block_sparse_moe.experts.58.w3", "model.layers.18.block_sparse_moe.experts.59.w3", "model.layers.18.block_sparse_moe.experts.60.w3", "model.layers.18.block_sparse_moe.experts.61.w3", "model.layers.18.block_sparse_moe.experts.62.w3", "model.layers.18.block_sparse_moe.experts.63.w3", "model.layers.18.block_sparse_moe.experts.64.w3", "model.layers.18.block_sparse_moe.experts.65.w3", "model.layers.18.block_sparse_moe.experts.66.w3", "model.layers.18.block_sparse_moe.experts.67.w3", "model.layers.18.block_sparse_moe.experts.68.w3", "model.layers.18.block_sparse_moe.experts.69.w3", "model.layers.18.block_sparse_moe.experts.70.w3", "model.layers.18.block_sparse_moe.experts.71.w3", "model.layers.18.block_sparse_moe.experts.72.w3", "model.layers.18.block_sparse_moe.experts.73.w3", "model.layers.18.block_sparse_moe.experts.74.w3", "model.layers.18.block_sparse_moe.experts.75.w3", "model.layers.18.block_sparse_moe.experts.76.w3", "model.layers.18.block_sparse_moe.experts.77.w3", "model.layers.18.block_sparse_moe.experts.78.w3", "model.layers.18.block_sparse_moe.experts.79.w3", "model.layers.18.block_sparse_moe.experts.80.w3", "model.layers.18.block_sparse_moe.experts.81.w3", "model.layers.18.block_sparse_moe.experts.82.w3", "model.layers.18.block_sparse_moe.experts.83.w3", "model.layers.18.block_sparse_moe.experts.84.w3", "model.layers.18.block_sparse_moe.experts.85.w3", "model.layers.18.block_sparse_moe.experts.86.w3", "model.layers.18.block_sparse_moe.experts.87.w3", "model.layers.18.block_sparse_moe.experts.88.w3", "model.layers.18.block_sparse_moe.experts.89.w3", "model.layers.18.block_sparse_moe.experts.90.w3", "model.layers.18.block_sparse_moe.experts.91.w3", "model.layers.18.block_sparse_moe.experts.92.w3", "model.layers.18.block_sparse_moe.experts.93.w3", "model.layers.18.block_sparse_moe.experts.94.w3", "model.layers.18.block_sparse_moe.experts.95.w3", "model.layers.18.block_sparse_moe.experts.96.w3", "model.layers.18.block_sparse_moe.experts.97.w3", "model.layers.18.block_sparse_moe.experts.98.w3", "model.layers.18.block_sparse_moe.experts.99.w3", "model.layers.18.block_sparse_moe.experts.100.w3", "model.layers.18.block_sparse_moe.experts.101.w3", "model.layers.18.block_sparse_moe.experts.102.w3", "model.layers.18.block_sparse_moe.experts.103.w3", "model.layers.18.block_sparse_moe.experts.104.w3", "model.layers.18.block_sparse_moe.experts.105.w3", "model.layers.18.block_sparse_moe.experts.106.w3", "model.layers.18.block_sparse_moe.experts.107.w3", "model.layers.18.block_sparse_moe.experts.108.w3", "model.layers.18.block_sparse_moe.experts.109.w3", "model.layers.18.block_sparse_moe.experts.110.w3", "model.layers.18.block_sparse_moe.experts.111.w3", "model.layers.18.block_sparse_moe.experts.112.w3", "model.layers.18.block_sparse_moe.experts.113.w3", "model.layers.18.block_sparse_moe.experts.114.w3", "model.layers.18.block_sparse_moe.experts.115.w3", "model.layers.18.block_sparse_moe.experts.116.w3", "model.layers.18.block_sparse_moe.experts.117.w3", "model.layers.18.block_sparse_moe.experts.118.w3", "model.layers.18.block_sparse_moe.experts.119.w3", "model.layers.18.block_sparse_moe.experts.120.w3", "model.layers.18.block_sparse_moe.experts.121.w3", "model.layers.18.block_sparse_moe.experts.122.w3", "model.layers.18.block_sparse_moe.experts.123.w3", "model.layers.18.block_sparse_moe.experts.124.w3", "model.layers.18.block_sparse_moe.experts.125.w3", "model.layers.18.block_sparse_moe.experts.126.w3", "model.layers.18.block_sparse_moe.experts.127.w3", "model.layers.18.block_sparse_moe.experts.128.w3", "model.layers.18.block_sparse_moe.experts.129.w3", "model.layers.18.block_sparse_moe.experts.130.w3", "model.layers.18.block_sparse_moe.experts.131.w3", "model.layers.18.block_sparse_moe.experts.132.w3", "model.layers.18.block_sparse_moe.experts.133.w3", "model.layers.18.block_sparse_moe.experts.134.w3", "model.layers.18.block_sparse_moe.experts.135.w3", "model.layers.18.block_sparse_moe.experts.136.w3", "model.layers.18.block_sparse_moe.experts.137.w3", "model.layers.18.block_sparse_moe.experts.138.w3", "model.layers.18.block_sparse_moe.experts.139.w3", "model.layers.18.block_sparse_moe.experts.140.w3", "model.layers.18.block_sparse_moe.experts.141.w3", "model.layers.18.block_sparse_moe.experts.142.w3", "model.layers.18.block_sparse_moe.experts.143.w3", "model.layers.18.block_sparse_moe.experts.144.w3", "model.layers.18.block_sparse_moe.experts.145.w3", "model.layers.18.block_sparse_moe.experts.146.w3", "model.layers.18.block_sparse_moe.experts.147.w3", "model.layers.18.block_sparse_moe.experts.148.w3", "model.layers.18.block_sparse_moe.experts.149.w3", "model.layers.18.block_sparse_moe.experts.150.w3", "model.layers.18.block_sparse_moe.experts.151.w3", "model.layers.18.block_sparse_moe.experts.152.w3", "model.layers.18.block_sparse_moe.experts.153.w3", "model.layers.18.block_sparse_moe.experts.154.w3", "model.layers.18.block_sparse_moe.experts.155.w3", "model.layers.18.block_sparse_moe.experts.156.w3", "model.layers.18.block_sparse_moe.experts.157.w3", "model.layers.18.block_sparse_moe.experts.158.w3", "model.layers.18.block_sparse_moe.experts.159.w3", "model.layers.18.block_sparse_moe.experts.160.w3", "model.layers.18.block_sparse_moe.experts.161.w3", "model.layers.18.block_sparse_moe.experts.162.w3", "model.layers.18.block_sparse_moe.experts.163.w3", "model.layers.18.block_sparse_moe.experts.164.w3", "model.layers.18.block_sparse_moe.experts.165.w3", "model.layers.18.block_sparse_moe.experts.166.w3", "model.layers.18.block_sparse_moe.experts.167.w3", "model.layers.18.block_sparse_moe.experts.168.w3", "model.layers.18.block_sparse_moe.experts.169.w3", "model.layers.18.block_sparse_moe.experts.170.w3", "model.layers.18.block_sparse_moe.experts.171.w3", "model.layers.18.block_sparse_moe.experts.172.w3", "model.layers.18.block_sparse_moe.experts.173.w3", "model.layers.18.block_sparse_moe.experts.174.w3", "model.layers.18.block_sparse_moe.experts.175.w3", "model.layers.18.block_sparse_moe.experts.176.w3", "model.layers.18.block_sparse_moe.experts.177.w3", "model.layers.18.block_sparse_moe.experts.178.w3", "model.layers.18.block_sparse_moe.experts.179.w3", "model.layers.18.block_sparse_moe.experts.180.w3", "model.layers.18.block_sparse_moe.experts.181.w3", "model.layers.18.block_sparse_moe.experts.182.w3", "model.layers.18.block_sparse_moe.experts.183.w3", "model.layers.18.block_sparse_moe.experts.184.w3", "model.layers.18.block_sparse_moe.experts.185.w3", "model.layers.18.block_sparse_moe.experts.186.w3", "model.layers.18.block_sparse_moe.experts.187.w3", "model.layers.18.block_sparse_moe.experts.188.w3", "model.layers.18.block_sparse_moe.experts.189.w3", "model.layers.18.block_sparse_moe.experts.190.w3", "model.layers.18.block_sparse_moe.experts.191.w3", "model.layers.18.block_sparse_moe.experts.192.w3", "model.layers.18.block_sparse_moe.experts.193.w3", "model.layers.18.block_sparse_moe.experts.194.w3", "model.layers.18.block_sparse_moe.experts.195.w3", "model.layers.18.block_sparse_moe.experts.196.w3", "model.layers.18.block_sparse_moe.experts.197.w3", "model.layers.18.block_sparse_moe.experts.198.w3", "model.layers.18.block_sparse_moe.experts.199.w3", "model.layers.18.block_sparse_moe.experts.200.w3", "model.layers.18.block_sparse_moe.experts.201.w3", "model.layers.18.block_sparse_moe.experts.202.w3", "model.layers.18.block_sparse_moe.experts.203.w3", "model.layers.18.block_sparse_moe.experts.204.w3", "model.layers.18.block_sparse_moe.experts.205.w3", "model.layers.18.block_sparse_moe.experts.206.w3", "model.layers.18.block_sparse_moe.experts.207.w3", "model.layers.18.block_sparse_moe.experts.208.w3", "model.layers.18.block_sparse_moe.experts.209.w3", "model.layers.18.block_sparse_moe.experts.210.w3", "model.layers.18.block_sparse_moe.experts.211.w3", "model.layers.18.block_sparse_moe.experts.212.w3", "model.layers.18.block_sparse_moe.experts.213.w3", "model.layers.18.block_sparse_moe.experts.214.w3", "model.layers.18.block_sparse_moe.experts.215.w3", "model.layers.18.block_sparse_moe.experts.216.w3", "model.layers.18.block_sparse_moe.experts.217.w3", "model.layers.18.block_sparse_moe.experts.218.w3", "model.layers.18.block_sparse_moe.experts.219.w3", "model.layers.18.block_sparse_moe.experts.220.w3", "model.layers.18.block_sparse_moe.experts.221.w3", "model.layers.18.block_sparse_moe.experts.222.w3", "model.layers.18.block_sparse_moe.experts.223.w3", "model.layers.18.block_sparse_moe.experts.224.w3", "model.layers.18.block_sparse_moe.experts.225.w3", "model.layers.18.block_sparse_moe.experts.226.w3", "model.layers.18.block_sparse_moe.experts.227.w3", "model.layers.18.block_sparse_moe.experts.228.w3", "model.layers.18.block_sparse_moe.experts.229.w3", "model.layers.18.block_sparse_moe.experts.230.w3", "model.layers.18.block_sparse_moe.experts.231.w3", "model.layers.18.block_sparse_moe.experts.232.w3", "model.layers.18.block_sparse_moe.experts.233.w3", "model.layers.18.block_sparse_moe.experts.234.w3", "model.layers.18.block_sparse_moe.experts.235.w3", "model.layers.18.block_sparse_moe.experts.236.w3", "model.layers.18.block_sparse_moe.experts.237.w3", "model.layers.18.block_sparse_moe.experts.238.w3", "model.layers.18.block_sparse_moe.experts.239.w3", "model.layers.18.block_sparse_moe.experts.240.w3", "model.layers.18.block_sparse_moe.experts.241.w3", "model.layers.18.block_sparse_moe.experts.242.w3", "model.layers.18.block_sparse_moe.experts.243.w3", "model.layers.18.block_sparse_moe.experts.244.w3", "model.layers.18.block_sparse_moe.experts.245.w3", "model.layers.18.block_sparse_moe.experts.246.w3", "model.layers.18.block_sparse_moe.experts.247.w3", "model.layers.18.block_sparse_moe.experts.248.w3", "model.layers.18.block_sparse_moe.experts.249.w3", "model.layers.18.block_sparse_moe.experts.250.w3", "model.layers.18.block_sparse_moe.experts.251.w3", "model.layers.18.block_sparse_moe.experts.252.w3", "model.layers.18.block_sparse_moe.experts.253.w3", "model.layers.18.block_sparse_moe.experts.254.w3", "model.layers.18.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0017091941088437923, "dbits": 2415919104 } ] }, { "idx": 94, "layers": [ "model.layers.18.block_sparse_moe.experts.0.w2", "model.layers.18.block_sparse_moe.experts.1.w2", "model.layers.18.block_sparse_moe.experts.2.w2", "model.layers.18.block_sparse_moe.experts.3.w2", "model.layers.18.block_sparse_moe.experts.4.w2", "model.layers.18.block_sparse_moe.experts.5.w2", "model.layers.18.block_sparse_moe.experts.6.w2", "model.layers.18.block_sparse_moe.experts.7.w2", "model.layers.18.block_sparse_moe.experts.8.w2", "model.layers.18.block_sparse_moe.experts.9.w2", "model.layers.18.block_sparse_moe.experts.10.w2", "model.layers.18.block_sparse_moe.experts.11.w2", "model.layers.18.block_sparse_moe.experts.12.w2", "model.layers.18.block_sparse_moe.experts.13.w2", "model.layers.18.block_sparse_moe.experts.14.w2", "model.layers.18.block_sparse_moe.experts.15.w2", "model.layers.18.block_sparse_moe.experts.16.w2", "model.layers.18.block_sparse_moe.experts.17.w2", "model.layers.18.block_sparse_moe.experts.18.w2", "model.layers.18.block_sparse_moe.experts.19.w2", "model.layers.18.block_sparse_moe.experts.20.w2", "model.layers.18.block_sparse_moe.experts.21.w2", "model.layers.18.block_sparse_moe.experts.22.w2", "model.layers.18.block_sparse_moe.experts.23.w2", "model.layers.18.block_sparse_moe.experts.24.w2", "model.layers.18.block_sparse_moe.experts.25.w2", "model.layers.18.block_sparse_moe.experts.26.w2", "model.layers.18.block_sparse_moe.experts.27.w2", "model.layers.18.block_sparse_moe.experts.28.w2", "model.layers.18.block_sparse_moe.experts.29.w2", "model.layers.18.block_sparse_moe.experts.30.w2", "model.layers.18.block_sparse_moe.experts.31.w2", "model.layers.18.block_sparse_moe.experts.32.w2", "model.layers.18.block_sparse_moe.experts.33.w2", "model.layers.18.block_sparse_moe.experts.34.w2", "model.layers.18.block_sparse_moe.experts.35.w2", "model.layers.18.block_sparse_moe.experts.36.w2", "model.layers.18.block_sparse_moe.experts.37.w2", "model.layers.18.block_sparse_moe.experts.38.w2", "model.layers.18.block_sparse_moe.experts.39.w2", "model.layers.18.block_sparse_moe.experts.40.w2", "model.layers.18.block_sparse_moe.experts.41.w2", "model.layers.18.block_sparse_moe.experts.42.w2", "model.layers.18.block_sparse_moe.experts.43.w2", "model.layers.18.block_sparse_moe.experts.44.w2", "model.layers.18.block_sparse_moe.experts.45.w2", "model.layers.18.block_sparse_moe.experts.46.w2", "model.layers.18.block_sparse_moe.experts.47.w2", "model.layers.18.block_sparse_moe.experts.48.w2", "model.layers.18.block_sparse_moe.experts.49.w2", "model.layers.18.block_sparse_moe.experts.50.w2", "model.layers.18.block_sparse_moe.experts.51.w2", "model.layers.18.block_sparse_moe.experts.52.w2", "model.layers.18.block_sparse_moe.experts.53.w2", "model.layers.18.block_sparse_moe.experts.54.w2", "model.layers.18.block_sparse_moe.experts.55.w2", "model.layers.18.block_sparse_moe.experts.56.w2", "model.layers.18.block_sparse_moe.experts.57.w2", "model.layers.18.block_sparse_moe.experts.58.w2", "model.layers.18.block_sparse_moe.experts.59.w2", "model.layers.18.block_sparse_moe.experts.60.w2", "model.layers.18.block_sparse_moe.experts.61.w2", "model.layers.18.block_sparse_moe.experts.62.w2", "model.layers.18.block_sparse_moe.experts.63.w2", "model.layers.18.block_sparse_moe.experts.64.w2", "model.layers.18.block_sparse_moe.experts.65.w2", "model.layers.18.block_sparse_moe.experts.66.w2", "model.layers.18.block_sparse_moe.experts.67.w2", "model.layers.18.block_sparse_moe.experts.68.w2", "model.layers.18.block_sparse_moe.experts.69.w2", "model.layers.18.block_sparse_moe.experts.70.w2", "model.layers.18.block_sparse_moe.experts.71.w2", "model.layers.18.block_sparse_moe.experts.72.w2", "model.layers.18.block_sparse_moe.experts.73.w2", "model.layers.18.block_sparse_moe.experts.74.w2", "model.layers.18.block_sparse_moe.experts.75.w2", "model.layers.18.block_sparse_moe.experts.76.w2", "model.layers.18.block_sparse_moe.experts.77.w2", "model.layers.18.block_sparse_moe.experts.78.w2", "model.layers.18.block_sparse_moe.experts.79.w2", "model.layers.18.block_sparse_moe.experts.80.w2", "model.layers.18.block_sparse_moe.experts.81.w2", "model.layers.18.block_sparse_moe.experts.82.w2", "model.layers.18.block_sparse_moe.experts.83.w2", "model.layers.18.block_sparse_moe.experts.84.w2", "model.layers.18.block_sparse_moe.experts.85.w2", "model.layers.18.block_sparse_moe.experts.86.w2", "model.layers.18.block_sparse_moe.experts.87.w2", "model.layers.18.block_sparse_moe.experts.88.w2", "model.layers.18.block_sparse_moe.experts.89.w2", "model.layers.18.block_sparse_moe.experts.90.w2", "model.layers.18.block_sparse_moe.experts.91.w2", "model.layers.18.block_sparse_moe.experts.92.w2", "model.layers.18.block_sparse_moe.experts.93.w2", "model.layers.18.block_sparse_moe.experts.94.w2", "model.layers.18.block_sparse_moe.experts.95.w2", "model.layers.18.block_sparse_moe.experts.96.w2", "model.layers.18.block_sparse_moe.experts.97.w2", "model.layers.18.block_sparse_moe.experts.98.w2", "model.layers.18.block_sparse_moe.experts.99.w2", "model.layers.18.block_sparse_moe.experts.100.w2", "model.layers.18.block_sparse_moe.experts.101.w2", "model.layers.18.block_sparse_moe.experts.102.w2", "model.layers.18.block_sparse_moe.experts.103.w2", "model.layers.18.block_sparse_moe.experts.104.w2", "model.layers.18.block_sparse_moe.experts.105.w2", "model.layers.18.block_sparse_moe.experts.106.w2", "model.layers.18.block_sparse_moe.experts.107.w2", "model.layers.18.block_sparse_moe.experts.108.w2", "model.layers.18.block_sparse_moe.experts.109.w2", "model.layers.18.block_sparse_moe.experts.110.w2", "model.layers.18.block_sparse_moe.experts.111.w2", "model.layers.18.block_sparse_moe.experts.112.w2", "model.layers.18.block_sparse_moe.experts.113.w2", "model.layers.18.block_sparse_moe.experts.114.w2", "model.layers.18.block_sparse_moe.experts.115.w2", "model.layers.18.block_sparse_moe.experts.116.w2", "model.layers.18.block_sparse_moe.experts.117.w2", "model.layers.18.block_sparse_moe.experts.118.w2", "model.layers.18.block_sparse_moe.experts.119.w2", "model.layers.18.block_sparse_moe.experts.120.w2", "model.layers.18.block_sparse_moe.experts.121.w2", "model.layers.18.block_sparse_moe.experts.122.w2", "model.layers.18.block_sparse_moe.experts.123.w2", "model.layers.18.block_sparse_moe.experts.124.w2", "model.layers.18.block_sparse_moe.experts.125.w2", "model.layers.18.block_sparse_moe.experts.126.w2", "model.layers.18.block_sparse_moe.experts.127.w2", "model.layers.18.block_sparse_moe.experts.128.w2", "model.layers.18.block_sparse_moe.experts.129.w2", "model.layers.18.block_sparse_moe.experts.130.w2", "model.layers.18.block_sparse_moe.experts.131.w2", "model.layers.18.block_sparse_moe.experts.132.w2", "model.layers.18.block_sparse_moe.experts.133.w2", "model.layers.18.block_sparse_moe.experts.134.w2", "model.layers.18.block_sparse_moe.experts.135.w2", "model.layers.18.block_sparse_moe.experts.136.w2", "model.layers.18.block_sparse_moe.experts.137.w2", "model.layers.18.block_sparse_moe.experts.138.w2", "model.layers.18.block_sparse_moe.experts.139.w2", "model.layers.18.block_sparse_moe.experts.140.w2", "model.layers.18.block_sparse_moe.experts.141.w2", "model.layers.18.block_sparse_moe.experts.142.w2", "model.layers.18.block_sparse_moe.experts.143.w2", "model.layers.18.block_sparse_moe.experts.144.w2", "model.layers.18.block_sparse_moe.experts.145.w2", "model.layers.18.block_sparse_moe.experts.146.w2", "model.layers.18.block_sparse_moe.experts.147.w2", "model.layers.18.block_sparse_moe.experts.148.w2", "model.layers.18.block_sparse_moe.experts.149.w2", "model.layers.18.block_sparse_moe.experts.150.w2", "model.layers.18.block_sparse_moe.experts.151.w2", "model.layers.18.block_sparse_moe.experts.152.w2", "model.layers.18.block_sparse_moe.experts.153.w2", "model.layers.18.block_sparse_moe.experts.154.w2", "model.layers.18.block_sparse_moe.experts.155.w2", "model.layers.18.block_sparse_moe.experts.156.w2", "model.layers.18.block_sparse_moe.experts.157.w2", "model.layers.18.block_sparse_moe.experts.158.w2", "model.layers.18.block_sparse_moe.experts.159.w2", "model.layers.18.block_sparse_moe.experts.160.w2", "model.layers.18.block_sparse_moe.experts.161.w2", "model.layers.18.block_sparse_moe.experts.162.w2", "model.layers.18.block_sparse_moe.experts.163.w2", "model.layers.18.block_sparse_moe.experts.164.w2", "model.layers.18.block_sparse_moe.experts.165.w2", "model.layers.18.block_sparse_moe.experts.166.w2", "model.layers.18.block_sparse_moe.experts.167.w2", "model.layers.18.block_sparse_moe.experts.168.w2", "model.layers.18.block_sparse_moe.experts.169.w2", "model.layers.18.block_sparse_moe.experts.170.w2", "model.layers.18.block_sparse_moe.experts.171.w2", "model.layers.18.block_sparse_moe.experts.172.w2", "model.layers.18.block_sparse_moe.experts.173.w2", "model.layers.18.block_sparse_moe.experts.174.w2", "model.layers.18.block_sparse_moe.experts.175.w2", "model.layers.18.block_sparse_moe.experts.176.w2", "model.layers.18.block_sparse_moe.experts.177.w2", "model.layers.18.block_sparse_moe.experts.178.w2", "model.layers.18.block_sparse_moe.experts.179.w2", "model.layers.18.block_sparse_moe.experts.180.w2", "model.layers.18.block_sparse_moe.experts.181.w2", "model.layers.18.block_sparse_moe.experts.182.w2", "model.layers.18.block_sparse_moe.experts.183.w2", "model.layers.18.block_sparse_moe.experts.184.w2", "model.layers.18.block_sparse_moe.experts.185.w2", "model.layers.18.block_sparse_moe.experts.186.w2", "model.layers.18.block_sparse_moe.experts.187.w2", "model.layers.18.block_sparse_moe.experts.188.w2", "model.layers.18.block_sparse_moe.experts.189.w2", "model.layers.18.block_sparse_moe.experts.190.w2", "model.layers.18.block_sparse_moe.experts.191.w2", "model.layers.18.block_sparse_moe.experts.192.w2", "model.layers.18.block_sparse_moe.experts.193.w2", "model.layers.18.block_sparse_moe.experts.194.w2", "model.layers.18.block_sparse_moe.experts.195.w2", "model.layers.18.block_sparse_moe.experts.196.w2", "model.layers.18.block_sparse_moe.experts.197.w2", "model.layers.18.block_sparse_moe.experts.198.w2", "model.layers.18.block_sparse_moe.experts.199.w2", "model.layers.18.block_sparse_moe.experts.200.w2", "model.layers.18.block_sparse_moe.experts.201.w2", "model.layers.18.block_sparse_moe.experts.202.w2", "model.layers.18.block_sparse_moe.experts.203.w2", "model.layers.18.block_sparse_moe.experts.204.w2", "model.layers.18.block_sparse_moe.experts.205.w2", "model.layers.18.block_sparse_moe.experts.206.w2", "model.layers.18.block_sparse_moe.experts.207.w2", "model.layers.18.block_sparse_moe.experts.208.w2", "model.layers.18.block_sparse_moe.experts.209.w2", "model.layers.18.block_sparse_moe.experts.210.w2", "model.layers.18.block_sparse_moe.experts.211.w2", "model.layers.18.block_sparse_moe.experts.212.w2", "model.layers.18.block_sparse_moe.experts.213.w2", "model.layers.18.block_sparse_moe.experts.214.w2", "model.layers.18.block_sparse_moe.experts.215.w2", "model.layers.18.block_sparse_moe.experts.216.w2", "model.layers.18.block_sparse_moe.experts.217.w2", "model.layers.18.block_sparse_moe.experts.218.w2", "model.layers.18.block_sparse_moe.experts.219.w2", "model.layers.18.block_sparse_moe.experts.220.w2", "model.layers.18.block_sparse_moe.experts.221.w2", "model.layers.18.block_sparse_moe.experts.222.w2", "model.layers.18.block_sparse_moe.experts.223.w2", "model.layers.18.block_sparse_moe.experts.224.w2", "model.layers.18.block_sparse_moe.experts.225.w2", "model.layers.18.block_sparse_moe.experts.226.w2", "model.layers.18.block_sparse_moe.experts.227.w2", "model.layers.18.block_sparse_moe.experts.228.w2", "model.layers.18.block_sparse_moe.experts.229.w2", "model.layers.18.block_sparse_moe.experts.230.w2", "model.layers.18.block_sparse_moe.experts.231.w2", "model.layers.18.block_sparse_moe.experts.232.w2", "model.layers.18.block_sparse_moe.experts.233.w2", "model.layers.18.block_sparse_moe.experts.234.w2", "model.layers.18.block_sparse_moe.experts.235.w2", "model.layers.18.block_sparse_moe.experts.236.w2", "model.layers.18.block_sparse_moe.experts.237.w2", "model.layers.18.block_sparse_moe.experts.238.w2", "model.layers.18.block_sparse_moe.experts.239.w2", "model.layers.18.block_sparse_moe.experts.240.w2", "model.layers.18.block_sparse_moe.experts.241.w2", "model.layers.18.block_sparse_moe.experts.242.w2", "model.layers.18.block_sparse_moe.experts.243.w2", "model.layers.18.block_sparse_moe.experts.244.w2", "model.layers.18.block_sparse_moe.experts.245.w2", "model.layers.18.block_sparse_moe.experts.246.w2", "model.layers.18.block_sparse_moe.experts.247.w2", "model.layers.18.block_sparse_moe.experts.248.w2", "model.layers.18.block_sparse_moe.experts.249.w2", "model.layers.18.block_sparse_moe.experts.250.w2", "model.layers.18.block_sparse_moe.experts.251.w2", "model.layers.18.block_sparse_moe.experts.252.w2", "model.layers.18.block_sparse_moe.experts.253.w2", "model.layers.18.block_sparse_moe.experts.254.w2", "model.layers.18.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0004863351583481168, "dbits": 1207959552 } ] }, { "idx": 95, "layers": [ "model.layers.19.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0009719002991914749, "dbits": 18874368 } ] }, { "idx": 96, "layers": [ "model.layers.19.self_attn.k_proj", "model.layers.19.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0016468200832605584, "dbits": 6291456 } ] }, { "idx": 97, "layers": [ "model.layers.19.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0019354410469532013, "dbits": 18874368 } ] }, { "idx": 98, "layers": [ "model.layers.19.block_sparse_moe.experts.0.w1", "model.layers.19.block_sparse_moe.experts.1.w1", "model.layers.19.block_sparse_moe.experts.2.w1", "model.layers.19.block_sparse_moe.experts.3.w1", "model.layers.19.block_sparse_moe.experts.4.w1", "model.layers.19.block_sparse_moe.experts.5.w1", "model.layers.19.block_sparse_moe.experts.6.w1", "model.layers.19.block_sparse_moe.experts.7.w1", "model.layers.19.block_sparse_moe.experts.8.w1", "model.layers.19.block_sparse_moe.experts.9.w1", "model.layers.19.block_sparse_moe.experts.10.w1", "model.layers.19.block_sparse_moe.experts.11.w1", "model.layers.19.block_sparse_moe.experts.12.w1", "model.layers.19.block_sparse_moe.experts.13.w1", "model.layers.19.block_sparse_moe.experts.14.w1", "model.layers.19.block_sparse_moe.experts.15.w1", "model.layers.19.block_sparse_moe.experts.16.w1", "model.layers.19.block_sparse_moe.experts.17.w1", "model.layers.19.block_sparse_moe.experts.18.w1", "model.layers.19.block_sparse_moe.experts.19.w1", "model.layers.19.block_sparse_moe.experts.20.w1", "model.layers.19.block_sparse_moe.experts.21.w1", "model.layers.19.block_sparse_moe.experts.22.w1", "model.layers.19.block_sparse_moe.experts.23.w1", "model.layers.19.block_sparse_moe.experts.24.w1", "model.layers.19.block_sparse_moe.experts.25.w1", "model.layers.19.block_sparse_moe.experts.26.w1", "model.layers.19.block_sparse_moe.experts.27.w1", "model.layers.19.block_sparse_moe.experts.28.w1", "model.layers.19.block_sparse_moe.experts.29.w1", "model.layers.19.block_sparse_moe.experts.30.w1", "model.layers.19.block_sparse_moe.experts.31.w1", "model.layers.19.block_sparse_moe.experts.32.w1", "model.layers.19.block_sparse_moe.experts.33.w1", "model.layers.19.block_sparse_moe.experts.34.w1", "model.layers.19.block_sparse_moe.experts.35.w1", "model.layers.19.block_sparse_moe.experts.36.w1", "model.layers.19.block_sparse_moe.experts.37.w1", "model.layers.19.block_sparse_moe.experts.38.w1", "model.layers.19.block_sparse_moe.experts.39.w1", "model.layers.19.block_sparse_moe.experts.40.w1", "model.layers.19.block_sparse_moe.experts.41.w1", "model.layers.19.block_sparse_moe.experts.42.w1", "model.layers.19.block_sparse_moe.experts.43.w1", "model.layers.19.block_sparse_moe.experts.44.w1", "model.layers.19.block_sparse_moe.experts.45.w1", "model.layers.19.block_sparse_moe.experts.46.w1", "model.layers.19.block_sparse_moe.experts.47.w1", "model.layers.19.block_sparse_moe.experts.48.w1", "model.layers.19.block_sparse_moe.experts.49.w1", "model.layers.19.block_sparse_moe.experts.50.w1", "model.layers.19.block_sparse_moe.experts.51.w1", "model.layers.19.block_sparse_moe.experts.52.w1", "model.layers.19.block_sparse_moe.experts.53.w1", "model.layers.19.block_sparse_moe.experts.54.w1", "model.layers.19.block_sparse_moe.experts.55.w1", "model.layers.19.block_sparse_moe.experts.56.w1", "model.layers.19.block_sparse_moe.experts.57.w1", "model.layers.19.block_sparse_moe.experts.58.w1", "model.layers.19.block_sparse_moe.experts.59.w1", "model.layers.19.block_sparse_moe.experts.60.w1", "model.layers.19.block_sparse_moe.experts.61.w1", "model.layers.19.block_sparse_moe.experts.62.w1", "model.layers.19.block_sparse_moe.experts.63.w1", "model.layers.19.block_sparse_moe.experts.64.w1", "model.layers.19.block_sparse_moe.experts.65.w1", "model.layers.19.block_sparse_moe.experts.66.w1", "model.layers.19.block_sparse_moe.experts.67.w1", "model.layers.19.block_sparse_moe.experts.68.w1", "model.layers.19.block_sparse_moe.experts.69.w1", "model.layers.19.block_sparse_moe.experts.70.w1", "model.layers.19.block_sparse_moe.experts.71.w1", "model.layers.19.block_sparse_moe.experts.72.w1", "model.layers.19.block_sparse_moe.experts.73.w1", "model.layers.19.block_sparse_moe.experts.74.w1", "model.layers.19.block_sparse_moe.experts.75.w1", "model.layers.19.block_sparse_moe.experts.76.w1", "model.layers.19.block_sparse_moe.experts.77.w1", "model.layers.19.block_sparse_moe.experts.78.w1", "model.layers.19.block_sparse_moe.experts.79.w1", "model.layers.19.block_sparse_moe.experts.80.w1", "model.layers.19.block_sparse_moe.experts.81.w1", "model.layers.19.block_sparse_moe.experts.82.w1", "model.layers.19.block_sparse_moe.experts.83.w1", "model.layers.19.block_sparse_moe.experts.84.w1", "model.layers.19.block_sparse_moe.experts.85.w1", "model.layers.19.block_sparse_moe.experts.86.w1", "model.layers.19.block_sparse_moe.experts.87.w1", "model.layers.19.block_sparse_moe.experts.88.w1", "model.layers.19.block_sparse_moe.experts.89.w1", "model.layers.19.block_sparse_moe.experts.90.w1", "model.layers.19.block_sparse_moe.experts.91.w1", "model.layers.19.block_sparse_moe.experts.92.w1", "model.layers.19.block_sparse_moe.experts.93.w1", "model.layers.19.block_sparse_moe.experts.94.w1", "model.layers.19.block_sparse_moe.experts.95.w1", "model.layers.19.block_sparse_moe.experts.96.w1", "model.layers.19.block_sparse_moe.experts.97.w1", "model.layers.19.block_sparse_moe.experts.98.w1", "model.layers.19.block_sparse_moe.experts.99.w1", "model.layers.19.block_sparse_moe.experts.100.w1", "model.layers.19.block_sparse_moe.experts.101.w1", "model.layers.19.block_sparse_moe.experts.102.w1", "model.layers.19.block_sparse_moe.experts.103.w1", "model.layers.19.block_sparse_moe.experts.104.w1", "model.layers.19.block_sparse_moe.experts.105.w1", "model.layers.19.block_sparse_moe.experts.106.w1", "model.layers.19.block_sparse_moe.experts.107.w1", "model.layers.19.block_sparse_moe.experts.108.w1", "model.layers.19.block_sparse_moe.experts.109.w1", "model.layers.19.block_sparse_moe.experts.110.w1", "model.layers.19.block_sparse_moe.experts.111.w1", "model.layers.19.block_sparse_moe.experts.112.w1", "model.layers.19.block_sparse_moe.experts.113.w1", "model.layers.19.block_sparse_moe.experts.114.w1", "model.layers.19.block_sparse_moe.experts.115.w1", "model.layers.19.block_sparse_moe.experts.116.w1", "model.layers.19.block_sparse_moe.experts.117.w1", "model.layers.19.block_sparse_moe.experts.118.w1", "model.layers.19.block_sparse_moe.experts.119.w1", "model.layers.19.block_sparse_moe.experts.120.w1", "model.layers.19.block_sparse_moe.experts.121.w1", "model.layers.19.block_sparse_moe.experts.122.w1", "model.layers.19.block_sparse_moe.experts.123.w1", "model.layers.19.block_sparse_moe.experts.124.w1", "model.layers.19.block_sparse_moe.experts.125.w1", "model.layers.19.block_sparse_moe.experts.126.w1", "model.layers.19.block_sparse_moe.experts.127.w1", "model.layers.19.block_sparse_moe.experts.128.w1", "model.layers.19.block_sparse_moe.experts.129.w1", "model.layers.19.block_sparse_moe.experts.130.w1", "model.layers.19.block_sparse_moe.experts.131.w1", "model.layers.19.block_sparse_moe.experts.132.w1", "model.layers.19.block_sparse_moe.experts.133.w1", "model.layers.19.block_sparse_moe.experts.134.w1", "model.layers.19.block_sparse_moe.experts.135.w1", "model.layers.19.block_sparse_moe.experts.136.w1", "model.layers.19.block_sparse_moe.experts.137.w1", "model.layers.19.block_sparse_moe.experts.138.w1", "model.layers.19.block_sparse_moe.experts.139.w1", "model.layers.19.block_sparse_moe.experts.140.w1", "model.layers.19.block_sparse_moe.experts.141.w1", "model.layers.19.block_sparse_moe.experts.142.w1", "model.layers.19.block_sparse_moe.experts.143.w1", "model.layers.19.block_sparse_moe.experts.144.w1", "model.layers.19.block_sparse_moe.experts.145.w1", "model.layers.19.block_sparse_moe.experts.146.w1", "model.layers.19.block_sparse_moe.experts.147.w1", "model.layers.19.block_sparse_moe.experts.148.w1", "model.layers.19.block_sparse_moe.experts.149.w1", "model.layers.19.block_sparse_moe.experts.150.w1", "model.layers.19.block_sparse_moe.experts.151.w1", "model.layers.19.block_sparse_moe.experts.152.w1", "model.layers.19.block_sparse_moe.experts.153.w1", "model.layers.19.block_sparse_moe.experts.154.w1", "model.layers.19.block_sparse_moe.experts.155.w1", "model.layers.19.block_sparse_moe.experts.156.w1", "model.layers.19.block_sparse_moe.experts.157.w1", "model.layers.19.block_sparse_moe.experts.158.w1", "model.layers.19.block_sparse_moe.experts.159.w1", "model.layers.19.block_sparse_moe.experts.160.w1", "model.layers.19.block_sparse_moe.experts.161.w1", "model.layers.19.block_sparse_moe.experts.162.w1", "model.layers.19.block_sparse_moe.experts.163.w1", "model.layers.19.block_sparse_moe.experts.164.w1", "model.layers.19.block_sparse_moe.experts.165.w1", "model.layers.19.block_sparse_moe.experts.166.w1", "model.layers.19.block_sparse_moe.experts.167.w1", "model.layers.19.block_sparse_moe.experts.168.w1", "model.layers.19.block_sparse_moe.experts.169.w1", "model.layers.19.block_sparse_moe.experts.170.w1", "model.layers.19.block_sparse_moe.experts.171.w1", "model.layers.19.block_sparse_moe.experts.172.w1", "model.layers.19.block_sparse_moe.experts.173.w1", "model.layers.19.block_sparse_moe.experts.174.w1", "model.layers.19.block_sparse_moe.experts.175.w1", "model.layers.19.block_sparse_moe.experts.176.w1", "model.layers.19.block_sparse_moe.experts.177.w1", "model.layers.19.block_sparse_moe.experts.178.w1", "model.layers.19.block_sparse_moe.experts.179.w1", "model.layers.19.block_sparse_moe.experts.180.w1", "model.layers.19.block_sparse_moe.experts.181.w1", "model.layers.19.block_sparse_moe.experts.182.w1", "model.layers.19.block_sparse_moe.experts.183.w1", "model.layers.19.block_sparse_moe.experts.184.w1", "model.layers.19.block_sparse_moe.experts.185.w1", "model.layers.19.block_sparse_moe.experts.186.w1", "model.layers.19.block_sparse_moe.experts.187.w1", "model.layers.19.block_sparse_moe.experts.188.w1", "model.layers.19.block_sparse_moe.experts.189.w1", "model.layers.19.block_sparse_moe.experts.190.w1", "model.layers.19.block_sparse_moe.experts.191.w1", "model.layers.19.block_sparse_moe.experts.192.w1", "model.layers.19.block_sparse_moe.experts.193.w1", "model.layers.19.block_sparse_moe.experts.194.w1", "model.layers.19.block_sparse_moe.experts.195.w1", "model.layers.19.block_sparse_moe.experts.196.w1", "model.layers.19.block_sparse_moe.experts.197.w1", "model.layers.19.block_sparse_moe.experts.198.w1", "model.layers.19.block_sparse_moe.experts.199.w1", "model.layers.19.block_sparse_moe.experts.200.w1", "model.layers.19.block_sparse_moe.experts.201.w1", "model.layers.19.block_sparse_moe.experts.202.w1", "model.layers.19.block_sparse_moe.experts.203.w1", "model.layers.19.block_sparse_moe.experts.204.w1", "model.layers.19.block_sparse_moe.experts.205.w1", "model.layers.19.block_sparse_moe.experts.206.w1", "model.layers.19.block_sparse_moe.experts.207.w1", "model.layers.19.block_sparse_moe.experts.208.w1", "model.layers.19.block_sparse_moe.experts.209.w1", "model.layers.19.block_sparse_moe.experts.210.w1", "model.layers.19.block_sparse_moe.experts.211.w1", "model.layers.19.block_sparse_moe.experts.212.w1", "model.layers.19.block_sparse_moe.experts.213.w1", "model.layers.19.block_sparse_moe.experts.214.w1", "model.layers.19.block_sparse_moe.experts.215.w1", "model.layers.19.block_sparse_moe.experts.216.w1", "model.layers.19.block_sparse_moe.experts.217.w1", "model.layers.19.block_sparse_moe.experts.218.w1", "model.layers.19.block_sparse_moe.experts.219.w1", "model.layers.19.block_sparse_moe.experts.220.w1", "model.layers.19.block_sparse_moe.experts.221.w1", "model.layers.19.block_sparse_moe.experts.222.w1", "model.layers.19.block_sparse_moe.experts.223.w1", "model.layers.19.block_sparse_moe.experts.224.w1", "model.layers.19.block_sparse_moe.experts.225.w1", "model.layers.19.block_sparse_moe.experts.226.w1", "model.layers.19.block_sparse_moe.experts.227.w1", "model.layers.19.block_sparse_moe.experts.228.w1", "model.layers.19.block_sparse_moe.experts.229.w1", "model.layers.19.block_sparse_moe.experts.230.w1", "model.layers.19.block_sparse_moe.experts.231.w1", "model.layers.19.block_sparse_moe.experts.232.w1", "model.layers.19.block_sparse_moe.experts.233.w1", "model.layers.19.block_sparse_moe.experts.234.w1", "model.layers.19.block_sparse_moe.experts.235.w1", "model.layers.19.block_sparse_moe.experts.236.w1", "model.layers.19.block_sparse_moe.experts.237.w1", "model.layers.19.block_sparse_moe.experts.238.w1", "model.layers.19.block_sparse_moe.experts.239.w1", "model.layers.19.block_sparse_moe.experts.240.w1", "model.layers.19.block_sparse_moe.experts.241.w1", "model.layers.19.block_sparse_moe.experts.242.w1", "model.layers.19.block_sparse_moe.experts.243.w1", "model.layers.19.block_sparse_moe.experts.244.w1", "model.layers.19.block_sparse_moe.experts.245.w1", "model.layers.19.block_sparse_moe.experts.246.w1", "model.layers.19.block_sparse_moe.experts.247.w1", "model.layers.19.block_sparse_moe.experts.248.w1", "model.layers.19.block_sparse_moe.experts.249.w1", "model.layers.19.block_sparse_moe.experts.250.w1", "model.layers.19.block_sparse_moe.experts.251.w1", "model.layers.19.block_sparse_moe.experts.252.w1", "model.layers.19.block_sparse_moe.experts.253.w1", "model.layers.19.block_sparse_moe.experts.254.w1", "model.layers.19.block_sparse_moe.experts.255.w1", "model.layers.19.block_sparse_moe.experts.0.w3", "model.layers.19.block_sparse_moe.experts.1.w3", "model.layers.19.block_sparse_moe.experts.2.w3", "model.layers.19.block_sparse_moe.experts.3.w3", "model.layers.19.block_sparse_moe.experts.4.w3", "model.layers.19.block_sparse_moe.experts.5.w3", "model.layers.19.block_sparse_moe.experts.6.w3", "model.layers.19.block_sparse_moe.experts.7.w3", "model.layers.19.block_sparse_moe.experts.8.w3", "model.layers.19.block_sparse_moe.experts.9.w3", "model.layers.19.block_sparse_moe.experts.10.w3", "model.layers.19.block_sparse_moe.experts.11.w3", "model.layers.19.block_sparse_moe.experts.12.w3", "model.layers.19.block_sparse_moe.experts.13.w3", "model.layers.19.block_sparse_moe.experts.14.w3", "model.layers.19.block_sparse_moe.experts.15.w3", "model.layers.19.block_sparse_moe.experts.16.w3", "model.layers.19.block_sparse_moe.experts.17.w3", "model.layers.19.block_sparse_moe.experts.18.w3", "model.layers.19.block_sparse_moe.experts.19.w3", "model.layers.19.block_sparse_moe.experts.20.w3", "model.layers.19.block_sparse_moe.experts.21.w3", "model.layers.19.block_sparse_moe.experts.22.w3", "model.layers.19.block_sparse_moe.experts.23.w3", "model.layers.19.block_sparse_moe.experts.24.w3", "model.layers.19.block_sparse_moe.experts.25.w3", "model.layers.19.block_sparse_moe.experts.26.w3", "model.layers.19.block_sparse_moe.experts.27.w3", "model.layers.19.block_sparse_moe.experts.28.w3", "model.layers.19.block_sparse_moe.experts.29.w3", "model.layers.19.block_sparse_moe.experts.30.w3", "model.layers.19.block_sparse_moe.experts.31.w3", "model.layers.19.block_sparse_moe.experts.32.w3", "model.layers.19.block_sparse_moe.experts.33.w3", "model.layers.19.block_sparse_moe.experts.34.w3", "model.layers.19.block_sparse_moe.experts.35.w3", "model.layers.19.block_sparse_moe.experts.36.w3", "model.layers.19.block_sparse_moe.experts.37.w3", "model.layers.19.block_sparse_moe.experts.38.w3", "model.layers.19.block_sparse_moe.experts.39.w3", "model.layers.19.block_sparse_moe.experts.40.w3", "model.layers.19.block_sparse_moe.experts.41.w3", "model.layers.19.block_sparse_moe.experts.42.w3", "model.layers.19.block_sparse_moe.experts.43.w3", "model.layers.19.block_sparse_moe.experts.44.w3", "model.layers.19.block_sparse_moe.experts.45.w3", "model.layers.19.block_sparse_moe.experts.46.w3", "model.layers.19.block_sparse_moe.experts.47.w3", "model.layers.19.block_sparse_moe.experts.48.w3", "model.layers.19.block_sparse_moe.experts.49.w3", "model.layers.19.block_sparse_moe.experts.50.w3", "model.layers.19.block_sparse_moe.experts.51.w3", "model.layers.19.block_sparse_moe.experts.52.w3", "model.layers.19.block_sparse_moe.experts.53.w3", "model.layers.19.block_sparse_moe.experts.54.w3", "model.layers.19.block_sparse_moe.experts.55.w3", "model.layers.19.block_sparse_moe.experts.56.w3", "model.layers.19.block_sparse_moe.experts.57.w3", "model.layers.19.block_sparse_moe.experts.58.w3", "model.layers.19.block_sparse_moe.experts.59.w3", "model.layers.19.block_sparse_moe.experts.60.w3", "model.layers.19.block_sparse_moe.experts.61.w3", "model.layers.19.block_sparse_moe.experts.62.w3", "model.layers.19.block_sparse_moe.experts.63.w3", "model.layers.19.block_sparse_moe.experts.64.w3", "model.layers.19.block_sparse_moe.experts.65.w3", "model.layers.19.block_sparse_moe.experts.66.w3", "model.layers.19.block_sparse_moe.experts.67.w3", "model.layers.19.block_sparse_moe.experts.68.w3", "model.layers.19.block_sparse_moe.experts.69.w3", "model.layers.19.block_sparse_moe.experts.70.w3", "model.layers.19.block_sparse_moe.experts.71.w3", "model.layers.19.block_sparse_moe.experts.72.w3", "model.layers.19.block_sparse_moe.experts.73.w3", "model.layers.19.block_sparse_moe.experts.74.w3", "model.layers.19.block_sparse_moe.experts.75.w3", "model.layers.19.block_sparse_moe.experts.76.w3", "model.layers.19.block_sparse_moe.experts.77.w3", "model.layers.19.block_sparse_moe.experts.78.w3", "model.layers.19.block_sparse_moe.experts.79.w3", "model.layers.19.block_sparse_moe.experts.80.w3", "model.layers.19.block_sparse_moe.experts.81.w3", "model.layers.19.block_sparse_moe.experts.82.w3", "model.layers.19.block_sparse_moe.experts.83.w3", "model.layers.19.block_sparse_moe.experts.84.w3", "model.layers.19.block_sparse_moe.experts.85.w3", "model.layers.19.block_sparse_moe.experts.86.w3", "model.layers.19.block_sparse_moe.experts.87.w3", "model.layers.19.block_sparse_moe.experts.88.w3", "model.layers.19.block_sparse_moe.experts.89.w3", "model.layers.19.block_sparse_moe.experts.90.w3", "model.layers.19.block_sparse_moe.experts.91.w3", "model.layers.19.block_sparse_moe.experts.92.w3", "model.layers.19.block_sparse_moe.experts.93.w3", "model.layers.19.block_sparse_moe.experts.94.w3", "model.layers.19.block_sparse_moe.experts.95.w3", "model.layers.19.block_sparse_moe.experts.96.w3", "model.layers.19.block_sparse_moe.experts.97.w3", "model.layers.19.block_sparse_moe.experts.98.w3", "model.layers.19.block_sparse_moe.experts.99.w3", "model.layers.19.block_sparse_moe.experts.100.w3", "model.layers.19.block_sparse_moe.experts.101.w3", "model.layers.19.block_sparse_moe.experts.102.w3", "model.layers.19.block_sparse_moe.experts.103.w3", "model.layers.19.block_sparse_moe.experts.104.w3", "model.layers.19.block_sparse_moe.experts.105.w3", "model.layers.19.block_sparse_moe.experts.106.w3", "model.layers.19.block_sparse_moe.experts.107.w3", "model.layers.19.block_sparse_moe.experts.108.w3", "model.layers.19.block_sparse_moe.experts.109.w3", "model.layers.19.block_sparse_moe.experts.110.w3", "model.layers.19.block_sparse_moe.experts.111.w3", "model.layers.19.block_sparse_moe.experts.112.w3", "model.layers.19.block_sparse_moe.experts.113.w3", "model.layers.19.block_sparse_moe.experts.114.w3", "model.layers.19.block_sparse_moe.experts.115.w3", "model.layers.19.block_sparse_moe.experts.116.w3", "model.layers.19.block_sparse_moe.experts.117.w3", "model.layers.19.block_sparse_moe.experts.118.w3", "model.layers.19.block_sparse_moe.experts.119.w3", "model.layers.19.block_sparse_moe.experts.120.w3", "model.layers.19.block_sparse_moe.experts.121.w3", "model.layers.19.block_sparse_moe.experts.122.w3", "model.layers.19.block_sparse_moe.experts.123.w3", "model.layers.19.block_sparse_moe.experts.124.w3", "model.layers.19.block_sparse_moe.experts.125.w3", "model.layers.19.block_sparse_moe.experts.126.w3", "model.layers.19.block_sparse_moe.experts.127.w3", "model.layers.19.block_sparse_moe.experts.128.w3", "model.layers.19.block_sparse_moe.experts.129.w3", "model.layers.19.block_sparse_moe.experts.130.w3", "model.layers.19.block_sparse_moe.experts.131.w3", "model.layers.19.block_sparse_moe.experts.132.w3", "model.layers.19.block_sparse_moe.experts.133.w3", "model.layers.19.block_sparse_moe.experts.134.w3", "model.layers.19.block_sparse_moe.experts.135.w3", "model.layers.19.block_sparse_moe.experts.136.w3", "model.layers.19.block_sparse_moe.experts.137.w3", "model.layers.19.block_sparse_moe.experts.138.w3", "model.layers.19.block_sparse_moe.experts.139.w3", "model.layers.19.block_sparse_moe.experts.140.w3", "model.layers.19.block_sparse_moe.experts.141.w3", "model.layers.19.block_sparse_moe.experts.142.w3", "model.layers.19.block_sparse_moe.experts.143.w3", "model.layers.19.block_sparse_moe.experts.144.w3", "model.layers.19.block_sparse_moe.experts.145.w3", "model.layers.19.block_sparse_moe.experts.146.w3", "model.layers.19.block_sparse_moe.experts.147.w3", "model.layers.19.block_sparse_moe.experts.148.w3", "model.layers.19.block_sparse_moe.experts.149.w3", "model.layers.19.block_sparse_moe.experts.150.w3", "model.layers.19.block_sparse_moe.experts.151.w3", "model.layers.19.block_sparse_moe.experts.152.w3", "model.layers.19.block_sparse_moe.experts.153.w3", "model.layers.19.block_sparse_moe.experts.154.w3", "model.layers.19.block_sparse_moe.experts.155.w3", "model.layers.19.block_sparse_moe.experts.156.w3", "model.layers.19.block_sparse_moe.experts.157.w3", "model.layers.19.block_sparse_moe.experts.158.w3", "model.layers.19.block_sparse_moe.experts.159.w3", "model.layers.19.block_sparse_moe.experts.160.w3", "model.layers.19.block_sparse_moe.experts.161.w3", "model.layers.19.block_sparse_moe.experts.162.w3", "model.layers.19.block_sparse_moe.experts.163.w3", "model.layers.19.block_sparse_moe.experts.164.w3", "model.layers.19.block_sparse_moe.experts.165.w3", "model.layers.19.block_sparse_moe.experts.166.w3", "model.layers.19.block_sparse_moe.experts.167.w3", "model.layers.19.block_sparse_moe.experts.168.w3", "model.layers.19.block_sparse_moe.experts.169.w3", "model.layers.19.block_sparse_moe.experts.170.w3", "model.layers.19.block_sparse_moe.experts.171.w3", "model.layers.19.block_sparse_moe.experts.172.w3", "model.layers.19.block_sparse_moe.experts.173.w3", "model.layers.19.block_sparse_moe.experts.174.w3", "model.layers.19.block_sparse_moe.experts.175.w3", "model.layers.19.block_sparse_moe.experts.176.w3", "model.layers.19.block_sparse_moe.experts.177.w3", "model.layers.19.block_sparse_moe.experts.178.w3", "model.layers.19.block_sparse_moe.experts.179.w3", "model.layers.19.block_sparse_moe.experts.180.w3", "model.layers.19.block_sparse_moe.experts.181.w3", "model.layers.19.block_sparse_moe.experts.182.w3", "model.layers.19.block_sparse_moe.experts.183.w3", "model.layers.19.block_sparse_moe.experts.184.w3", "model.layers.19.block_sparse_moe.experts.185.w3", "model.layers.19.block_sparse_moe.experts.186.w3", "model.layers.19.block_sparse_moe.experts.187.w3", "model.layers.19.block_sparse_moe.experts.188.w3", "model.layers.19.block_sparse_moe.experts.189.w3", "model.layers.19.block_sparse_moe.experts.190.w3", "model.layers.19.block_sparse_moe.experts.191.w3", "model.layers.19.block_sparse_moe.experts.192.w3", "model.layers.19.block_sparse_moe.experts.193.w3", "model.layers.19.block_sparse_moe.experts.194.w3", "model.layers.19.block_sparse_moe.experts.195.w3", "model.layers.19.block_sparse_moe.experts.196.w3", "model.layers.19.block_sparse_moe.experts.197.w3", "model.layers.19.block_sparse_moe.experts.198.w3", "model.layers.19.block_sparse_moe.experts.199.w3", "model.layers.19.block_sparse_moe.experts.200.w3", "model.layers.19.block_sparse_moe.experts.201.w3", "model.layers.19.block_sparse_moe.experts.202.w3", "model.layers.19.block_sparse_moe.experts.203.w3", "model.layers.19.block_sparse_moe.experts.204.w3", "model.layers.19.block_sparse_moe.experts.205.w3", "model.layers.19.block_sparse_moe.experts.206.w3", "model.layers.19.block_sparse_moe.experts.207.w3", "model.layers.19.block_sparse_moe.experts.208.w3", "model.layers.19.block_sparse_moe.experts.209.w3", "model.layers.19.block_sparse_moe.experts.210.w3", "model.layers.19.block_sparse_moe.experts.211.w3", "model.layers.19.block_sparse_moe.experts.212.w3", "model.layers.19.block_sparse_moe.experts.213.w3", "model.layers.19.block_sparse_moe.experts.214.w3", "model.layers.19.block_sparse_moe.experts.215.w3", "model.layers.19.block_sparse_moe.experts.216.w3", "model.layers.19.block_sparse_moe.experts.217.w3", "model.layers.19.block_sparse_moe.experts.218.w3", "model.layers.19.block_sparse_moe.experts.219.w3", "model.layers.19.block_sparse_moe.experts.220.w3", "model.layers.19.block_sparse_moe.experts.221.w3", "model.layers.19.block_sparse_moe.experts.222.w3", "model.layers.19.block_sparse_moe.experts.223.w3", "model.layers.19.block_sparse_moe.experts.224.w3", "model.layers.19.block_sparse_moe.experts.225.w3", "model.layers.19.block_sparse_moe.experts.226.w3", "model.layers.19.block_sparse_moe.experts.227.w3", "model.layers.19.block_sparse_moe.experts.228.w3", "model.layers.19.block_sparse_moe.experts.229.w3", "model.layers.19.block_sparse_moe.experts.230.w3", "model.layers.19.block_sparse_moe.experts.231.w3", "model.layers.19.block_sparse_moe.experts.232.w3", "model.layers.19.block_sparse_moe.experts.233.w3", "model.layers.19.block_sparse_moe.experts.234.w3", "model.layers.19.block_sparse_moe.experts.235.w3", "model.layers.19.block_sparse_moe.experts.236.w3", "model.layers.19.block_sparse_moe.experts.237.w3", "model.layers.19.block_sparse_moe.experts.238.w3", "model.layers.19.block_sparse_moe.experts.239.w3", "model.layers.19.block_sparse_moe.experts.240.w3", "model.layers.19.block_sparse_moe.experts.241.w3", "model.layers.19.block_sparse_moe.experts.242.w3", "model.layers.19.block_sparse_moe.experts.243.w3", "model.layers.19.block_sparse_moe.experts.244.w3", "model.layers.19.block_sparse_moe.experts.245.w3", "model.layers.19.block_sparse_moe.experts.246.w3", "model.layers.19.block_sparse_moe.experts.247.w3", "model.layers.19.block_sparse_moe.experts.248.w3", "model.layers.19.block_sparse_moe.experts.249.w3", "model.layers.19.block_sparse_moe.experts.250.w3", "model.layers.19.block_sparse_moe.experts.251.w3", "model.layers.19.block_sparse_moe.experts.252.w3", "model.layers.19.block_sparse_moe.experts.253.w3", "model.layers.19.block_sparse_moe.experts.254.w3", "model.layers.19.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0007880110293627118, "dbits": 2415919104 } ] }, { "idx": 99, "layers": [ "model.layers.19.block_sparse_moe.experts.0.w2", "model.layers.19.block_sparse_moe.experts.1.w2", "model.layers.19.block_sparse_moe.experts.2.w2", "model.layers.19.block_sparse_moe.experts.3.w2", "model.layers.19.block_sparse_moe.experts.4.w2", "model.layers.19.block_sparse_moe.experts.5.w2", "model.layers.19.block_sparse_moe.experts.6.w2", "model.layers.19.block_sparse_moe.experts.7.w2", "model.layers.19.block_sparse_moe.experts.8.w2", "model.layers.19.block_sparse_moe.experts.9.w2", "model.layers.19.block_sparse_moe.experts.10.w2", "model.layers.19.block_sparse_moe.experts.11.w2", "model.layers.19.block_sparse_moe.experts.12.w2", "model.layers.19.block_sparse_moe.experts.13.w2", "model.layers.19.block_sparse_moe.experts.14.w2", "model.layers.19.block_sparse_moe.experts.15.w2", "model.layers.19.block_sparse_moe.experts.16.w2", "model.layers.19.block_sparse_moe.experts.17.w2", "model.layers.19.block_sparse_moe.experts.18.w2", "model.layers.19.block_sparse_moe.experts.19.w2", "model.layers.19.block_sparse_moe.experts.20.w2", "model.layers.19.block_sparse_moe.experts.21.w2", "model.layers.19.block_sparse_moe.experts.22.w2", "model.layers.19.block_sparse_moe.experts.23.w2", "model.layers.19.block_sparse_moe.experts.24.w2", "model.layers.19.block_sparse_moe.experts.25.w2", "model.layers.19.block_sparse_moe.experts.26.w2", "model.layers.19.block_sparse_moe.experts.27.w2", "model.layers.19.block_sparse_moe.experts.28.w2", "model.layers.19.block_sparse_moe.experts.29.w2", "model.layers.19.block_sparse_moe.experts.30.w2", "model.layers.19.block_sparse_moe.experts.31.w2", "model.layers.19.block_sparse_moe.experts.32.w2", "model.layers.19.block_sparse_moe.experts.33.w2", "model.layers.19.block_sparse_moe.experts.34.w2", "model.layers.19.block_sparse_moe.experts.35.w2", "model.layers.19.block_sparse_moe.experts.36.w2", "model.layers.19.block_sparse_moe.experts.37.w2", "model.layers.19.block_sparse_moe.experts.38.w2", "model.layers.19.block_sparse_moe.experts.39.w2", "model.layers.19.block_sparse_moe.experts.40.w2", "model.layers.19.block_sparse_moe.experts.41.w2", "model.layers.19.block_sparse_moe.experts.42.w2", "model.layers.19.block_sparse_moe.experts.43.w2", "model.layers.19.block_sparse_moe.experts.44.w2", "model.layers.19.block_sparse_moe.experts.45.w2", "model.layers.19.block_sparse_moe.experts.46.w2", "model.layers.19.block_sparse_moe.experts.47.w2", "model.layers.19.block_sparse_moe.experts.48.w2", "model.layers.19.block_sparse_moe.experts.49.w2", "model.layers.19.block_sparse_moe.experts.50.w2", "model.layers.19.block_sparse_moe.experts.51.w2", "model.layers.19.block_sparse_moe.experts.52.w2", "model.layers.19.block_sparse_moe.experts.53.w2", "model.layers.19.block_sparse_moe.experts.54.w2", "model.layers.19.block_sparse_moe.experts.55.w2", "model.layers.19.block_sparse_moe.experts.56.w2", "model.layers.19.block_sparse_moe.experts.57.w2", "model.layers.19.block_sparse_moe.experts.58.w2", "model.layers.19.block_sparse_moe.experts.59.w2", "model.layers.19.block_sparse_moe.experts.60.w2", "model.layers.19.block_sparse_moe.experts.61.w2", "model.layers.19.block_sparse_moe.experts.62.w2", "model.layers.19.block_sparse_moe.experts.63.w2", "model.layers.19.block_sparse_moe.experts.64.w2", "model.layers.19.block_sparse_moe.experts.65.w2", "model.layers.19.block_sparse_moe.experts.66.w2", "model.layers.19.block_sparse_moe.experts.67.w2", "model.layers.19.block_sparse_moe.experts.68.w2", "model.layers.19.block_sparse_moe.experts.69.w2", "model.layers.19.block_sparse_moe.experts.70.w2", "model.layers.19.block_sparse_moe.experts.71.w2", "model.layers.19.block_sparse_moe.experts.72.w2", "model.layers.19.block_sparse_moe.experts.73.w2", "model.layers.19.block_sparse_moe.experts.74.w2", "model.layers.19.block_sparse_moe.experts.75.w2", "model.layers.19.block_sparse_moe.experts.76.w2", "model.layers.19.block_sparse_moe.experts.77.w2", "model.layers.19.block_sparse_moe.experts.78.w2", "model.layers.19.block_sparse_moe.experts.79.w2", "model.layers.19.block_sparse_moe.experts.80.w2", "model.layers.19.block_sparse_moe.experts.81.w2", "model.layers.19.block_sparse_moe.experts.82.w2", "model.layers.19.block_sparse_moe.experts.83.w2", "model.layers.19.block_sparse_moe.experts.84.w2", "model.layers.19.block_sparse_moe.experts.85.w2", "model.layers.19.block_sparse_moe.experts.86.w2", "model.layers.19.block_sparse_moe.experts.87.w2", "model.layers.19.block_sparse_moe.experts.88.w2", "model.layers.19.block_sparse_moe.experts.89.w2", "model.layers.19.block_sparse_moe.experts.90.w2", "model.layers.19.block_sparse_moe.experts.91.w2", "model.layers.19.block_sparse_moe.experts.92.w2", "model.layers.19.block_sparse_moe.experts.93.w2", "model.layers.19.block_sparse_moe.experts.94.w2", "model.layers.19.block_sparse_moe.experts.95.w2", "model.layers.19.block_sparse_moe.experts.96.w2", "model.layers.19.block_sparse_moe.experts.97.w2", "model.layers.19.block_sparse_moe.experts.98.w2", "model.layers.19.block_sparse_moe.experts.99.w2", "model.layers.19.block_sparse_moe.experts.100.w2", "model.layers.19.block_sparse_moe.experts.101.w2", "model.layers.19.block_sparse_moe.experts.102.w2", "model.layers.19.block_sparse_moe.experts.103.w2", "model.layers.19.block_sparse_moe.experts.104.w2", "model.layers.19.block_sparse_moe.experts.105.w2", "model.layers.19.block_sparse_moe.experts.106.w2", "model.layers.19.block_sparse_moe.experts.107.w2", "model.layers.19.block_sparse_moe.experts.108.w2", "model.layers.19.block_sparse_moe.experts.109.w2", "model.layers.19.block_sparse_moe.experts.110.w2", "model.layers.19.block_sparse_moe.experts.111.w2", "model.layers.19.block_sparse_moe.experts.112.w2", "model.layers.19.block_sparse_moe.experts.113.w2", "model.layers.19.block_sparse_moe.experts.114.w2", "model.layers.19.block_sparse_moe.experts.115.w2", "model.layers.19.block_sparse_moe.experts.116.w2", "model.layers.19.block_sparse_moe.experts.117.w2", "model.layers.19.block_sparse_moe.experts.118.w2", "model.layers.19.block_sparse_moe.experts.119.w2", "model.layers.19.block_sparse_moe.experts.120.w2", "model.layers.19.block_sparse_moe.experts.121.w2", "model.layers.19.block_sparse_moe.experts.122.w2", "model.layers.19.block_sparse_moe.experts.123.w2", "model.layers.19.block_sparse_moe.experts.124.w2", "model.layers.19.block_sparse_moe.experts.125.w2", "model.layers.19.block_sparse_moe.experts.126.w2", "model.layers.19.block_sparse_moe.experts.127.w2", "model.layers.19.block_sparse_moe.experts.128.w2", "model.layers.19.block_sparse_moe.experts.129.w2", "model.layers.19.block_sparse_moe.experts.130.w2", "model.layers.19.block_sparse_moe.experts.131.w2", "model.layers.19.block_sparse_moe.experts.132.w2", "model.layers.19.block_sparse_moe.experts.133.w2", "model.layers.19.block_sparse_moe.experts.134.w2", "model.layers.19.block_sparse_moe.experts.135.w2", "model.layers.19.block_sparse_moe.experts.136.w2", "model.layers.19.block_sparse_moe.experts.137.w2", "model.layers.19.block_sparse_moe.experts.138.w2", "model.layers.19.block_sparse_moe.experts.139.w2", "model.layers.19.block_sparse_moe.experts.140.w2", "model.layers.19.block_sparse_moe.experts.141.w2", "model.layers.19.block_sparse_moe.experts.142.w2", "model.layers.19.block_sparse_moe.experts.143.w2", "model.layers.19.block_sparse_moe.experts.144.w2", "model.layers.19.block_sparse_moe.experts.145.w2", "model.layers.19.block_sparse_moe.experts.146.w2", "model.layers.19.block_sparse_moe.experts.147.w2", "model.layers.19.block_sparse_moe.experts.148.w2", "model.layers.19.block_sparse_moe.experts.149.w2", "model.layers.19.block_sparse_moe.experts.150.w2", "model.layers.19.block_sparse_moe.experts.151.w2", "model.layers.19.block_sparse_moe.experts.152.w2", "model.layers.19.block_sparse_moe.experts.153.w2", "model.layers.19.block_sparse_moe.experts.154.w2", "model.layers.19.block_sparse_moe.experts.155.w2", "model.layers.19.block_sparse_moe.experts.156.w2", "model.layers.19.block_sparse_moe.experts.157.w2", "model.layers.19.block_sparse_moe.experts.158.w2", "model.layers.19.block_sparse_moe.experts.159.w2", "model.layers.19.block_sparse_moe.experts.160.w2", "model.layers.19.block_sparse_moe.experts.161.w2", "model.layers.19.block_sparse_moe.experts.162.w2", "model.layers.19.block_sparse_moe.experts.163.w2", "model.layers.19.block_sparse_moe.experts.164.w2", "model.layers.19.block_sparse_moe.experts.165.w2", "model.layers.19.block_sparse_moe.experts.166.w2", "model.layers.19.block_sparse_moe.experts.167.w2", "model.layers.19.block_sparse_moe.experts.168.w2", "model.layers.19.block_sparse_moe.experts.169.w2", "model.layers.19.block_sparse_moe.experts.170.w2", "model.layers.19.block_sparse_moe.experts.171.w2", "model.layers.19.block_sparse_moe.experts.172.w2", "model.layers.19.block_sparse_moe.experts.173.w2", "model.layers.19.block_sparse_moe.experts.174.w2", "model.layers.19.block_sparse_moe.experts.175.w2", "model.layers.19.block_sparse_moe.experts.176.w2", "model.layers.19.block_sparse_moe.experts.177.w2", "model.layers.19.block_sparse_moe.experts.178.w2", "model.layers.19.block_sparse_moe.experts.179.w2", "model.layers.19.block_sparse_moe.experts.180.w2", "model.layers.19.block_sparse_moe.experts.181.w2", "model.layers.19.block_sparse_moe.experts.182.w2", "model.layers.19.block_sparse_moe.experts.183.w2", "model.layers.19.block_sparse_moe.experts.184.w2", "model.layers.19.block_sparse_moe.experts.185.w2", "model.layers.19.block_sparse_moe.experts.186.w2", "model.layers.19.block_sparse_moe.experts.187.w2", "model.layers.19.block_sparse_moe.experts.188.w2", "model.layers.19.block_sparse_moe.experts.189.w2", "model.layers.19.block_sparse_moe.experts.190.w2", "model.layers.19.block_sparse_moe.experts.191.w2", "model.layers.19.block_sparse_moe.experts.192.w2", "model.layers.19.block_sparse_moe.experts.193.w2", "model.layers.19.block_sparse_moe.experts.194.w2", "model.layers.19.block_sparse_moe.experts.195.w2", "model.layers.19.block_sparse_moe.experts.196.w2", "model.layers.19.block_sparse_moe.experts.197.w2", "model.layers.19.block_sparse_moe.experts.198.w2", "model.layers.19.block_sparse_moe.experts.199.w2", "model.layers.19.block_sparse_moe.experts.200.w2", "model.layers.19.block_sparse_moe.experts.201.w2", "model.layers.19.block_sparse_moe.experts.202.w2", "model.layers.19.block_sparse_moe.experts.203.w2", "model.layers.19.block_sparse_moe.experts.204.w2", "model.layers.19.block_sparse_moe.experts.205.w2", "model.layers.19.block_sparse_moe.experts.206.w2", "model.layers.19.block_sparse_moe.experts.207.w2", "model.layers.19.block_sparse_moe.experts.208.w2", "model.layers.19.block_sparse_moe.experts.209.w2", "model.layers.19.block_sparse_moe.experts.210.w2", "model.layers.19.block_sparse_moe.experts.211.w2", "model.layers.19.block_sparse_moe.experts.212.w2", "model.layers.19.block_sparse_moe.experts.213.w2", "model.layers.19.block_sparse_moe.experts.214.w2", "model.layers.19.block_sparse_moe.experts.215.w2", "model.layers.19.block_sparse_moe.experts.216.w2", "model.layers.19.block_sparse_moe.experts.217.w2", "model.layers.19.block_sparse_moe.experts.218.w2", "model.layers.19.block_sparse_moe.experts.219.w2", "model.layers.19.block_sparse_moe.experts.220.w2", "model.layers.19.block_sparse_moe.experts.221.w2", "model.layers.19.block_sparse_moe.experts.222.w2", "model.layers.19.block_sparse_moe.experts.223.w2", "model.layers.19.block_sparse_moe.experts.224.w2", "model.layers.19.block_sparse_moe.experts.225.w2", "model.layers.19.block_sparse_moe.experts.226.w2", "model.layers.19.block_sparse_moe.experts.227.w2", "model.layers.19.block_sparse_moe.experts.228.w2", "model.layers.19.block_sparse_moe.experts.229.w2", "model.layers.19.block_sparse_moe.experts.230.w2", "model.layers.19.block_sparse_moe.experts.231.w2", "model.layers.19.block_sparse_moe.experts.232.w2", "model.layers.19.block_sparse_moe.experts.233.w2", "model.layers.19.block_sparse_moe.experts.234.w2", "model.layers.19.block_sparse_moe.experts.235.w2", "model.layers.19.block_sparse_moe.experts.236.w2", "model.layers.19.block_sparse_moe.experts.237.w2", "model.layers.19.block_sparse_moe.experts.238.w2", "model.layers.19.block_sparse_moe.experts.239.w2", "model.layers.19.block_sparse_moe.experts.240.w2", "model.layers.19.block_sparse_moe.experts.241.w2", "model.layers.19.block_sparse_moe.experts.242.w2", "model.layers.19.block_sparse_moe.experts.243.w2", "model.layers.19.block_sparse_moe.experts.244.w2", "model.layers.19.block_sparse_moe.experts.245.w2", "model.layers.19.block_sparse_moe.experts.246.w2", "model.layers.19.block_sparse_moe.experts.247.w2", "model.layers.19.block_sparse_moe.experts.248.w2", "model.layers.19.block_sparse_moe.experts.249.w2", "model.layers.19.block_sparse_moe.experts.250.w2", "model.layers.19.block_sparse_moe.experts.251.w2", "model.layers.19.block_sparse_moe.experts.252.w2", "model.layers.19.block_sparse_moe.experts.253.w2", "model.layers.19.block_sparse_moe.experts.254.w2", "model.layers.19.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00010529495775701836, "dbits": 1207959552 } ] }, { "idx": 100, "layers": [ "model.layers.20.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0003868676722049713, "dbits": 18874368 } ] }, { "idx": 101, "layers": [ "model.layers.20.self_attn.k_proj", "model.layers.20.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0081009451299906, "dbits": 6291456 } ] }, { "idx": 102, "layers": [ "model.layers.20.self_attn.o_proj" ], "candidates": [ { "dkld": -0.006502729281783137, "dbits": 18874368 } ] }, { "idx": 103, "layers": [ "model.layers.20.block_sparse_moe.experts.0.w1", "model.layers.20.block_sparse_moe.experts.1.w1", "model.layers.20.block_sparse_moe.experts.2.w1", "model.layers.20.block_sparse_moe.experts.3.w1", "model.layers.20.block_sparse_moe.experts.4.w1", "model.layers.20.block_sparse_moe.experts.5.w1", "model.layers.20.block_sparse_moe.experts.6.w1", "model.layers.20.block_sparse_moe.experts.7.w1", "model.layers.20.block_sparse_moe.experts.8.w1", "model.layers.20.block_sparse_moe.experts.9.w1", "model.layers.20.block_sparse_moe.experts.10.w1", "model.layers.20.block_sparse_moe.experts.11.w1", "model.layers.20.block_sparse_moe.experts.12.w1", "model.layers.20.block_sparse_moe.experts.13.w1", "model.layers.20.block_sparse_moe.experts.14.w1", "model.layers.20.block_sparse_moe.experts.15.w1", "model.layers.20.block_sparse_moe.experts.16.w1", "model.layers.20.block_sparse_moe.experts.17.w1", "model.layers.20.block_sparse_moe.experts.18.w1", "model.layers.20.block_sparse_moe.experts.19.w1", "model.layers.20.block_sparse_moe.experts.20.w1", "model.layers.20.block_sparse_moe.experts.21.w1", "model.layers.20.block_sparse_moe.experts.22.w1", "model.layers.20.block_sparse_moe.experts.23.w1", "model.layers.20.block_sparse_moe.experts.24.w1", "model.layers.20.block_sparse_moe.experts.25.w1", "model.layers.20.block_sparse_moe.experts.26.w1", "model.layers.20.block_sparse_moe.experts.27.w1", "model.layers.20.block_sparse_moe.experts.28.w1", "model.layers.20.block_sparse_moe.experts.29.w1", "model.layers.20.block_sparse_moe.experts.30.w1", "model.layers.20.block_sparse_moe.experts.31.w1", "model.layers.20.block_sparse_moe.experts.32.w1", "model.layers.20.block_sparse_moe.experts.33.w1", "model.layers.20.block_sparse_moe.experts.34.w1", "model.layers.20.block_sparse_moe.experts.35.w1", "model.layers.20.block_sparse_moe.experts.36.w1", "model.layers.20.block_sparse_moe.experts.37.w1", "model.layers.20.block_sparse_moe.experts.38.w1", "model.layers.20.block_sparse_moe.experts.39.w1", "model.layers.20.block_sparse_moe.experts.40.w1", "model.layers.20.block_sparse_moe.experts.41.w1", "model.layers.20.block_sparse_moe.experts.42.w1", "model.layers.20.block_sparse_moe.experts.43.w1", "model.layers.20.block_sparse_moe.experts.44.w1", "model.layers.20.block_sparse_moe.experts.45.w1", "model.layers.20.block_sparse_moe.experts.46.w1", "model.layers.20.block_sparse_moe.experts.47.w1", "model.layers.20.block_sparse_moe.experts.48.w1", "model.layers.20.block_sparse_moe.experts.49.w1", "model.layers.20.block_sparse_moe.experts.50.w1", "model.layers.20.block_sparse_moe.experts.51.w1", "model.layers.20.block_sparse_moe.experts.52.w1", "model.layers.20.block_sparse_moe.experts.53.w1", "model.layers.20.block_sparse_moe.experts.54.w1", "model.layers.20.block_sparse_moe.experts.55.w1", "model.layers.20.block_sparse_moe.experts.56.w1", "model.layers.20.block_sparse_moe.experts.57.w1", "model.layers.20.block_sparse_moe.experts.58.w1", "model.layers.20.block_sparse_moe.experts.59.w1", "model.layers.20.block_sparse_moe.experts.60.w1", "model.layers.20.block_sparse_moe.experts.61.w1", "model.layers.20.block_sparse_moe.experts.62.w1", "model.layers.20.block_sparse_moe.experts.63.w1", "model.layers.20.block_sparse_moe.experts.64.w1", "model.layers.20.block_sparse_moe.experts.65.w1", "model.layers.20.block_sparse_moe.experts.66.w1", "model.layers.20.block_sparse_moe.experts.67.w1", "model.layers.20.block_sparse_moe.experts.68.w1", "model.layers.20.block_sparse_moe.experts.69.w1", "model.layers.20.block_sparse_moe.experts.70.w1", "model.layers.20.block_sparse_moe.experts.71.w1", "model.layers.20.block_sparse_moe.experts.72.w1", "model.layers.20.block_sparse_moe.experts.73.w1", "model.layers.20.block_sparse_moe.experts.74.w1", "model.layers.20.block_sparse_moe.experts.75.w1", "model.layers.20.block_sparse_moe.experts.76.w1", "model.layers.20.block_sparse_moe.experts.77.w1", "model.layers.20.block_sparse_moe.experts.78.w1", "model.layers.20.block_sparse_moe.experts.79.w1", "model.layers.20.block_sparse_moe.experts.80.w1", "model.layers.20.block_sparse_moe.experts.81.w1", "model.layers.20.block_sparse_moe.experts.82.w1", "model.layers.20.block_sparse_moe.experts.83.w1", "model.layers.20.block_sparse_moe.experts.84.w1", "model.layers.20.block_sparse_moe.experts.85.w1", "model.layers.20.block_sparse_moe.experts.86.w1", "model.layers.20.block_sparse_moe.experts.87.w1", "model.layers.20.block_sparse_moe.experts.88.w1", "model.layers.20.block_sparse_moe.experts.89.w1", "model.layers.20.block_sparse_moe.experts.90.w1", "model.layers.20.block_sparse_moe.experts.91.w1", "model.layers.20.block_sparse_moe.experts.92.w1", "model.layers.20.block_sparse_moe.experts.93.w1", "model.layers.20.block_sparse_moe.experts.94.w1", "model.layers.20.block_sparse_moe.experts.95.w1", "model.layers.20.block_sparse_moe.experts.96.w1", "model.layers.20.block_sparse_moe.experts.97.w1", "model.layers.20.block_sparse_moe.experts.98.w1", "model.layers.20.block_sparse_moe.experts.99.w1", "model.layers.20.block_sparse_moe.experts.100.w1", "model.layers.20.block_sparse_moe.experts.101.w1", "model.layers.20.block_sparse_moe.experts.102.w1", "model.layers.20.block_sparse_moe.experts.103.w1", "model.layers.20.block_sparse_moe.experts.104.w1", "model.layers.20.block_sparse_moe.experts.105.w1", "model.layers.20.block_sparse_moe.experts.106.w1", "model.layers.20.block_sparse_moe.experts.107.w1", "model.layers.20.block_sparse_moe.experts.108.w1", "model.layers.20.block_sparse_moe.experts.109.w1", "model.layers.20.block_sparse_moe.experts.110.w1", "model.layers.20.block_sparse_moe.experts.111.w1", "model.layers.20.block_sparse_moe.experts.112.w1", "model.layers.20.block_sparse_moe.experts.113.w1", "model.layers.20.block_sparse_moe.experts.114.w1", "model.layers.20.block_sparse_moe.experts.115.w1", "model.layers.20.block_sparse_moe.experts.116.w1", "model.layers.20.block_sparse_moe.experts.117.w1", "model.layers.20.block_sparse_moe.experts.118.w1", "model.layers.20.block_sparse_moe.experts.119.w1", "model.layers.20.block_sparse_moe.experts.120.w1", "model.layers.20.block_sparse_moe.experts.121.w1", "model.layers.20.block_sparse_moe.experts.122.w1", "model.layers.20.block_sparse_moe.experts.123.w1", "model.layers.20.block_sparse_moe.experts.124.w1", "model.layers.20.block_sparse_moe.experts.125.w1", "model.layers.20.block_sparse_moe.experts.126.w1", "model.layers.20.block_sparse_moe.experts.127.w1", "model.layers.20.block_sparse_moe.experts.128.w1", "model.layers.20.block_sparse_moe.experts.129.w1", "model.layers.20.block_sparse_moe.experts.130.w1", "model.layers.20.block_sparse_moe.experts.131.w1", "model.layers.20.block_sparse_moe.experts.132.w1", "model.layers.20.block_sparse_moe.experts.133.w1", "model.layers.20.block_sparse_moe.experts.134.w1", "model.layers.20.block_sparse_moe.experts.135.w1", "model.layers.20.block_sparse_moe.experts.136.w1", "model.layers.20.block_sparse_moe.experts.137.w1", "model.layers.20.block_sparse_moe.experts.138.w1", "model.layers.20.block_sparse_moe.experts.139.w1", "model.layers.20.block_sparse_moe.experts.140.w1", "model.layers.20.block_sparse_moe.experts.141.w1", "model.layers.20.block_sparse_moe.experts.142.w1", "model.layers.20.block_sparse_moe.experts.143.w1", "model.layers.20.block_sparse_moe.experts.144.w1", "model.layers.20.block_sparse_moe.experts.145.w1", "model.layers.20.block_sparse_moe.experts.146.w1", "model.layers.20.block_sparse_moe.experts.147.w1", "model.layers.20.block_sparse_moe.experts.148.w1", "model.layers.20.block_sparse_moe.experts.149.w1", "model.layers.20.block_sparse_moe.experts.150.w1", "model.layers.20.block_sparse_moe.experts.151.w1", "model.layers.20.block_sparse_moe.experts.152.w1", "model.layers.20.block_sparse_moe.experts.153.w1", "model.layers.20.block_sparse_moe.experts.154.w1", "model.layers.20.block_sparse_moe.experts.155.w1", "model.layers.20.block_sparse_moe.experts.156.w1", "model.layers.20.block_sparse_moe.experts.157.w1", "model.layers.20.block_sparse_moe.experts.158.w1", "model.layers.20.block_sparse_moe.experts.159.w1", "model.layers.20.block_sparse_moe.experts.160.w1", "model.layers.20.block_sparse_moe.experts.161.w1", "model.layers.20.block_sparse_moe.experts.162.w1", "model.layers.20.block_sparse_moe.experts.163.w1", "model.layers.20.block_sparse_moe.experts.164.w1", "model.layers.20.block_sparse_moe.experts.165.w1", "model.layers.20.block_sparse_moe.experts.166.w1", "model.layers.20.block_sparse_moe.experts.167.w1", "model.layers.20.block_sparse_moe.experts.168.w1", "model.layers.20.block_sparse_moe.experts.169.w1", "model.layers.20.block_sparse_moe.experts.170.w1", "model.layers.20.block_sparse_moe.experts.171.w1", "model.layers.20.block_sparse_moe.experts.172.w1", "model.layers.20.block_sparse_moe.experts.173.w1", "model.layers.20.block_sparse_moe.experts.174.w1", "model.layers.20.block_sparse_moe.experts.175.w1", "model.layers.20.block_sparse_moe.experts.176.w1", "model.layers.20.block_sparse_moe.experts.177.w1", "model.layers.20.block_sparse_moe.experts.178.w1", "model.layers.20.block_sparse_moe.experts.179.w1", "model.layers.20.block_sparse_moe.experts.180.w1", "model.layers.20.block_sparse_moe.experts.181.w1", "model.layers.20.block_sparse_moe.experts.182.w1", "model.layers.20.block_sparse_moe.experts.183.w1", "model.layers.20.block_sparse_moe.experts.184.w1", "model.layers.20.block_sparse_moe.experts.185.w1", "model.layers.20.block_sparse_moe.experts.186.w1", "model.layers.20.block_sparse_moe.experts.187.w1", "model.layers.20.block_sparse_moe.experts.188.w1", "model.layers.20.block_sparse_moe.experts.189.w1", "model.layers.20.block_sparse_moe.experts.190.w1", "model.layers.20.block_sparse_moe.experts.191.w1", "model.layers.20.block_sparse_moe.experts.192.w1", "model.layers.20.block_sparse_moe.experts.193.w1", "model.layers.20.block_sparse_moe.experts.194.w1", "model.layers.20.block_sparse_moe.experts.195.w1", "model.layers.20.block_sparse_moe.experts.196.w1", "model.layers.20.block_sparse_moe.experts.197.w1", "model.layers.20.block_sparse_moe.experts.198.w1", "model.layers.20.block_sparse_moe.experts.199.w1", "model.layers.20.block_sparse_moe.experts.200.w1", "model.layers.20.block_sparse_moe.experts.201.w1", "model.layers.20.block_sparse_moe.experts.202.w1", "model.layers.20.block_sparse_moe.experts.203.w1", "model.layers.20.block_sparse_moe.experts.204.w1", "model.layers.20.block_sparse_moe.experts.205.w1", "model.layers.20.block_sparse_moe.experts.206.w1", "model.layers.20.block_sparse_moe.experts.207.w1", "model.layers.20.block_sparse_moe.experts.208.w1", "model.layers.20.block_sparse_moe.experts.209.w1", "model.layers.20.block_sparse_moe.experts.210.w1", "model.layers.20.block_sparse_moe.experts.211.w1", "model.layers.20.block_sparse_moe.experts.212.w1", "model.layers.20.block_sparse_moe.experts.213.w1", "model.layers.20.block_sparse_moe.experts.214.w1", "model.layers.20.block_sparse_moe.experts.215.w1", "model.layers.20.block_sparse_moe.experts.216.w1", "model.layers.20.block_sparse_moe.experts.217.w1", "model.layers.20.block_sparse_moe.experts.218.w1", "model.layers.20.block_sparse_moe.experts.219.w1", "model.layers.20.block_sparse_moe.experts.220.w1", "model.layers.20.block_sparse_moe.experts.221.w1", "model.layers.20.block_sparse_moe.experts.222.w1", "model.layers.20.block_sparse_moe.experts.223.w1", "model.layers.20.block_sparse_moe.experts.224.w1", "model.layers.20.block_sparse_moe.experts.225.w1", "model.layers.20.block_sparse_moe.experts.226.w1", "model.layers.20.block_sparse_moe.experts.227.w1", "model.layers.20.block_sparse_moe.experts.228.w1", "model.layers.20.block_sparse_moe.experts.229.w1", "model.layers.20.block_sparse_moe.experts.230.w1", "model.layers.20.block_sparse_moe.experts.231.w1", "model.layers.20.block_sparse_moe.experts.232.w1", "model.layers.20.block_sparse_moe.experts.233.w1", "model.layers.20.block_sparse_moe.experts.234.w1", "model.layers.20.block_sparse_moe.experts.235.w1", "model.layers.20.block_sparse_moe.experts.236.w1", "model.layers.20.block_sparse_moe.experts.237.w1", "model.layers.20.block_sparse_moe.experts.238.w1", "model.layers.20.block_sparse_moe.experts.239.w1", "model.layers.20.block_sparse_moe.experts.240.w1", "model.layers.20.block_sparse_moe.experts.241.w1", "model.layers.20.block_sparse_moe.experts.242.w1", "model.layers.20.block_sparse_moe.experts.243.w1", "model.layers.20.block_sparse_moe.experts.244.w1", "model.layers.20.block_sparse_moe.experts.245.w1", "model.layers.20.block_sparse_moe.experts.246.w1", "model.layers.20.block_sparse_moe.experts.247.w1", "model.layers.20.block_sparse_moe.experts.248.w1", "model.layers.20.block_sparse_moe.experts.249.w1", "model.layers.20.block_sparse_moe.experts.250.w1", "model.layers.20.block_sparse_moe.experts.251.w1", "model.layers.20.block_sparse_moe.experts.252.w1", "model.layers.20.block_sparse_moe.experts.253.w1", "model.layers.20.block_sparse_moe.experts.254.w1", "model.layers.20.block_sparse_moe.experts.255.w1", "model.layers.20.block_sparse_moe.experts.0.w3", "model.layers.20.block_sparse_moe.experts.1.w3", "model.layers.20.block_sparse_moe.experts.2.w3", "model.layers.20.block_sparse_moe.experts.3.w3", "model.layers.20.block_sparse_moe.experts.4.w3", "model.layers.20.block_sparse_moe.experts.5.w3", "model.layers.20.block_sparse_moe.experts.6.w3", "model.layers.20.block_sparse_moe.experts.7.w3", "model.layers.20.block_sparse_moe.experts.8.w3", "model.layers.20.block_sparse_moe.experts.9.w3", "model.layers.20.block_sparse_moe.experts.10.w3", "model.layers.20.block_sparse_moe.experts.11.w3", "model.layers.20.block_sparse_moe.experts.12.w3", "model.layers.20.block_sparse_moe.experts.13.w3", "model.layers.20.block_sparse_moe.experts.14.w3", "model.layers.20.block_sparse_moe.experts.15.w3", "model.layers.20.block_sparse_moe.experts.16.w3", "model.layers.20.block_sparse_moe.experts.17.w3", "model.layers.20.block_sparse_moe.experts.18.w3", "model.layers.20.block_sparse_moe.experts.19.w3", "model.layers.20.block_sparse_moe.experts.20.w3", "model.layers.20.block_sparse_moe.experts.21.w3", "model.layers.20.block_sparse_moe.experts.22.w3", "model.layers.20.block_sparse_moe.experts.23.w3", "model.layers.20.block_sparse_moe.experts.24.w3", "model.layers.20.block_sparse_moe.experts.25.w3", "model.layers.20.block_sparse_moe.experts.26.w3", "model.layers.20.block_sparse_moe.experts.27.w3", "model.layers.20.block_sparse_moe.experts.28.w3", "model.layers.20.block_sparse_moe.experts.29.w3", "model.layers.20.block_sparse_moe.experts.30.w3", "model.layers.20.block_sparse_moe.experts.31.w3", "model.layers.20.block_sparse_moe.experts.32.w3", "model.layers.20.block_sparse_moe.experts.33.w3", "model.layers.20.block_sparse_moe.experts.34.w3", "model.layers.20.block_sparse_moe.experts.35.w3", "model.layers.20.block_sparse_moe.experts.36.w3", "model.layers.20.block_sparse_moe.experts.37.w3", "model.layers.20.block_sparse_moe.experts.38.w3", "model.layers.20.block_sparse_moe.experts.39.w3", "model.layers.20.block_sparse_moe.experts.40.w3", "model.layers.20.block_sparse_moe.experts.41.w3", "model.layers.20.block_sparse_moe.experts.42.w3", "model.layers.20.block_sparse_moe.experts.43.w3", "model.layers.20.block_sparse_moe.experts.44.w3", "model.layers.20.block_sparse_moe.experts.45.w3", "model.layers.20.block_sparse_moe.experts.46.w3", "model.layers.20.block_sparse_moe.experts.47.w3", "model.layers.20.block_sparse_moe.experts.48.w3", "model.layers.20.block_sparse_moe.experts.49.w3", "model.layers.20.block_sparse_moe.experts.50.w3", "model.layers.20.block_sparse_moe.experts.51.w3", "model.layers.20.block_sparse_moe.experts.52.w3", "model.layers.20.block_sparse_moe.experts.53.w3", "model.layers.20.block_sparse_moe.experts.54.w3", "model.layers.20.block_sparse_moe.experts.55.w3", "model.layers.20.block_sparse_moe.experts.56.w3", "model.layers.20.block_sparse_moe.experts.57.w3", "model.layers.20.block_sparse_moe.experts.58.w3", "model.layers.20.block_sparse_moe.experts.59.w3", "model.layers.20.block_sparse_moe.experts.60.w3", "model.layers.20.block_sparse_moe.experts.61.w3", "model.layers.20.block_sparse_moe.experts.62.w3", "model.layers.20.block_sparse_moe.experts.63.w3", "model.layers.20.block_sparse_moe.experts.64.w3", "model.layers.20.block_sparse_moe.experts.65.w3", "model.layers.20.block_sparse_moe.experts.66.w3", "model.layers.20.block_sparse_moe.experts.67.w3", "model.layers.20.block_sparse_moe.experts.68.w3", "model.layers.20.block_sparse_moe.experts.69.w3", "model.layers.20.block_sparse_moe.experts.70.w3", "model.layers.20.block_sparse_moe.experts.71.w3", "model.layers.20.block_sparse_moe.experts.72.w3", "model.layers.20.block_sparse_moe.experts.73.w3", "model.layers.20.block_sparse_moe.experts.74.w3", "model.layers.20.block_sparse_moe.experts.75.w3", "model.layers.20.block_sparse_moe.experts.76.w3", "model.layers.20.block_sparse_moe.experts.77.w3", "model.layers.20.block_sparse_moe.experts.78.w3", "model.layers.20.block_sparse_moe.experts.79.w3", "model.layers.20.block_sparse_moe.experts.80.w3", "model.layers.20.block_sparse_moe.experts.81.w3", "model.layers.20.block_sparse_moe.experts.82.w3", "model.layers.20.block_sparse_moe.experts.83.w3", "model.layers.20.block_sparse_moe.experts.84.w3", "model.layers.20.block_sparse_moe.experts.85.w3", "model.layers.20.block_sparse_moe.experts.86.w3", "model.layers.20.block_sparse_moe.experts.87.w3", "model.layers.20.block_sparse_moe.experts.88.w3", "model.layers.20.block_sparse_moe.experts.89.w3", "model.layers.20.block_sparse_moe.experts.90.w3", "model.layers.20.block_sparse_moe.experts.91.w3", "model.layers.20.block_sparse_moe.experts.92.w3", "model.layers.20.block_sparse_moe.experts.93.w3", "model.layers.20.block_sparse_moe.experts.94.w3", "model.layers.20.block_sparse_moe.experts.95.w3", "model.layers.20.block_sparse_moe.experts.96.w3", "model.layers.20.block_sparse_moe.experts.97.w3", "model.layers.20.block_sparse_moe.experts.98.w3", "model.layers.20.block_sparse_moe.experts.99.w3", "model.layers.20.block_sparse_moe.experts.100.w3", "model.layers.20.block_sparse_moe.experts.101.w3", "model.layers.20.block_sparse_moe.experts.102.w3", "model.layers.20.block_sparse_moe.experts.103.w3", "model.layers.20.block_sparse_moe.experts.104.w3", "model.layers.20.block_sparse_moe.experts.105.w3", "model.layers.20.block_sparse_moe.experts.106.w3", "model.layers.20.block_sparse_moe.experts.107.w3", "model.layers.20.block_sparse_moe.experts.108.w3", "model.layers.20.block_sparse_moe.experts.109.w3", "model.layers.20.block_sparse_moe.experts.110.w3", "model.layers.20.block_sparse_moe.experts.111.w3", "model.layers.20.block_sparse_moe.experts.112.w3", "model.layers.20.block_sparse_moe.experts.113.w3", "model.layers.20.block_sparse_moe.experts.114.w3", "model.layers.20.block_sparse_moe.experts.115.w3", "model.layers.20.block_sparse_moe.experts.116.w3", "model.layers.20.block_sparse_moe.experts.117.w3", "model.layers.20.block_sparse_moe.experts.118.w3", "model.layers.20.block_sparse_moe.experts.119.w3", "model.layers.20.block_sparse_moe.experts.120.w3", "model.layers.20.block_sparse_moe.experts.121.w3", "model.layers.20.block_sparse_moe.experts.122.w3", "model.layers.20.block_sparse_moe.experts.123.w3", "model.layers.20.block_sparse_moe.experts.124.w3", "model.layers.20.block_sparse_moe.experts.125.w3", "model.layers.20.block_sparse_moe.experts.126.w3", "model.layers.20.block_sparse_moe.experts.127.w3", "model.layers.20.block_sparse_moe.experts.128.w3", "model.layers.20.block_sparse_moe.experts.129.w3", "model.layers.20.block_sparse_moe.experts.130.w3", "model.layers.20.block_sparse_moe.experts.131.w3", "model.layers.20.block_sparse_moe.experts.132.w3", "model.layers.20.block_sparse_moe.experts.133.w3", "model.layers.20.block_sparse_moe.experts.134.w3", "model.layers.20.block_sparse_moe.experts.135.w3", "model.layers.20.block_sparse_moe.experts.136.w3", "model.layers.20.block_sparse_moe.experts.137.w3", "model.layers.20.block_sparse_moe.experts.138.w3", "model.layers.20.block_sparse_moe.experts.139.w3", "model.layers.20.block_sparse_moe.experts.140.w3", "model.layers.20.block_sparse_moe.experts.141.w3", "model.layers.20.block_sparse_moe.experts.142.w3", "model.layers.20.block_sparse_moe.experts.143.w3", "model.layers.20.block_sparse_moe.experts.144.w3", "model.layers.20.block_sparse_moe.experts.145.w3", "model.layers.20.block_sparse_moe.experts.146.w3", "model.layers.20.block_sparse_moe.experts.147.w3", "model.layers.20.block_sparse_moe.experts.148.w3", "model.layers.20.block_sparse_moe.experts.149.w3", "model.layers.20.block_sparse_moe.experts.150.w3", "model.layers.20.block_sparse_moe.experts.151.w3", "model.layers.20.block_sparse_moe.experts.152.w3", "model.layers.20.block_sparse_moe.experts.153.w3", "model.layers.20.block_sparse_moe.experts.154.w3", "model.layers.20.block_sparse_moe.experts.155.w3", "model.layers.20.block_sparse_moe.experts.156.w3", "model.layers.20.block_sparse_moe.experts.157.w3", "model.layers.20.block_sparse_moe.experts.158.w3", "model.layers.20.block_sparse_moe.experts.159.w3", "model.layers.20.block_sparse_moe.experts.160.w3", "model.layers.20.block_sparse_moe.experts.161.w3", "model.layers.20.block_sparse_moe.experts.162.w3", "model.layers.20.block_sparse_moe.experts.163.w3", "model.layers.20.block_sparse_moe.experts.164.w3", "model.layers.20.block_sparse_moe.experts.165.w3", "model.layers.20.block_sparse_moe.experts.166.w3", "model.layers.20.block_sparse_moe.experts.167.w3", "model.layers.20.block_sparse_moe.experts.168.w3", "model.layers.20.block_sparse_moe.experts.169.w3", "model.layers.20.block_sparse_moe.experts.170.w3", "model.layers.20.block_sparse_moe.experts.171.w3", "model.layers.20.block_sparse_moe.experts.172.w3", "model.layers.20.block_sparse_moe.experts.173.w3", "model.layers.20.block_sparse_moe.experts.174.w3", "model.layers.20.block_sparse_moe.experts.175.w3", "model.layers.20.block_sparse_moe.experts.176.w3", "model.layers.20.block_sparse_moe.experts.177.w3", "model.layers.20.block_sparse_moe.experts.178.w3", "model.layers.20.block_sparse_moe.experts.179.w3", "model.layers.20.block_sparse_moe.experts.180.w3", "model.layers.20.block_sparse_moe.experts.181.w3", "model.layers.20.block_sparse_moe.experts.182.w3", "model.layers.20.block_sparse_moe.experts.183.w3", "model.layers.20.block_sparse_moe.experts.184.w3", "model.layers.20.block_sparse_moe.experts.185.w3", "model.layers.20.block_sparse_moe.experts.186.w3", "model.layers.20.block_sparse_moe.experts.187.w3", "model.layers.20.block_sparse_moe.experts.188.w3", "model.layers.20.block_sparse_moe.experts.189.w3", "model.layers.20.block_sparse_moe.experts.190.w3", "model.layers.20.block_sparse_moe.experts.191.w3", "model.layers.20.block_sparse_moe.experts.192.w3", "model.layers.20.block_sparse_moe.experts.193.w3", "model.layers.20.block_sparse_moe.experts.194.w3", "model.layers.20.block_sparse_moe.experts.195.w3", "model.layers.20.block_sparse_moe.experts.196.w3", "model.layers.20.block_sparse_moe.experts.197.w3", "model.layers.20.block_sparse_moe.experts.198.w3", "model.layers.20.block_sparse_moe.experts.199.w3", "model.layers.20.block_sparse_moe.experts.200.w3", "model.layers.20.block_sparse_moe.experts.201.w3", "model.layers.20.block_sparse_moe.experts.202.w3", "model.layers.20.block_sparse_moe.experts.203.w3", "model.layers.20.block_sparse_moe.experts.204.w3", "model.layers.20.block_sparse_moe.experts.205.w3", "model.layers.20.block_sparse_moe.experts.206.w3", "model.layers.20.block_sparse_moe.experts.207.w3", "model.layers.20.block_sparse_moe.experts.208.w3", "model.layers.20.block_sparse_moe.experts.209.w3", "model.layers.20.block_sparse_moe.experts.210.w3", "model.layers.20.block_sparse_moe.experts.211.w3", "model.layers.20.block_sparse_moe.experts.212.w3", "model.layers.20.block_sparse_moe.experts.213.w3", "model.layers.20.block_sparse_moe.experts.214.w3", "model.layers.20.block_sparse_moe.experts.215.w3", "model.layers.20.block_sparse_moe.experts.216.w3", "model.layers.20.block_sparse_moe.experts.217.w3", "model.layers.20.block_sparse_moe.experts.218.w3", "model.layers.20.block_sparse_moe.experts.219.w3", "model.layers.20.block_sparse_moe.experts.220.w3", "model.layers.20.block_sparse_moe.experts.221.w3", "model.layers.20.block_sparse_moe.experts.222.w3", "model.layers.20.block_sparse_moe.experts.223.w3", "model.layers.20.block_sparse_moe.experts.224.w3", "model.layers.20.block_sparse_moe.experts.225.w3", "model.layers.20.block_sparse_moe.experts.226.w3", "model.layers.20.block_sparse_moe.experts.227.w3", "model.layers.20.block_sparse_moe.experts.228.w3", "model.layers.20.block_sparse_moe.experts.229.w3", "model.layers.20.block_sparse_moe.experts.230.w3", "model.layers.20.block_sparse_moe.experts.231.w3", "model.layers.20.block_sparse_moe.experts.232.w3", "model.layers.20.block_sparse_moe.experts.233.w3", "model.layers.20.block_sparse_moe.experts.234.w3", "model.layers.20.block_sparse_moe.experts.235.w3", "model.layers.20.block_sparse_moe.experts.236.w3", "model.layers.20.block_sparse_moe.experts.237.w3", "model.layers.20.block_sparse_moe.experts.238.w3", "model.layers.20.block_sparse_moe.experts.239.w3", "model.layers.20.block_sparse_moe.experts.240.w3", "model.layers.20.block_sparse_moe.experts.241.w3", "model.layers.20.block_sparse_moe.experts.242.w3", "model.layers.20.block_sparse_moe.experts.243.w3", "model.layers.20.block_sparse_moe.experts.244.w3", "model.layers.20.block_sparse_moe.experts.245.w3", "model.layers.20.block_sparse_moe.experts.246.w3", "model.layers.20.block_sparse_moe.experts.247.w3", "model.layers.20.block_sparse_moe.experts.248.w3", "model.layers.20.block_sparse_moe.experts.249.w3", "model.layers.20.block_sparse_moe.experts.250.w3", "model.layers.20.block_sparse_moe.experts.251.w3", "model.layers.20.block_sparse_moe.experts.252.w3", "model.layers.20.block_sparse_moe.experts.253.w3", "model.layers.20.block_sparse_moe.experts.254.w3", "model.layers.20.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00075463466346265, "dbits": 2415919104 } ] }, { "idx": 104, "layers": [ "model.layers.20.block_sparse_moe.experts.0.w2", "model.layers.20.block_sparse_moe.experts.1.w2", "model.layers.20.block_sparse_moe.experts.2.w2", "model.layers.20.block_sparse_moe.experts.3.w2", "model.layers.20.block_sparse_moe.experts.4.w2", "model.layers.20.block_sparse_moe.experts.5.w2", "model.layers.20.block_sparse_moe.experts.6.w2", "model.layers.20.block_sparse_moe.experts.7.w2", "model.layers.20.block_sparse_moe.experts.8.w2", "model.layers.20.block_sparse_moe.experts.9.w2", "model.layers.20.block_sparse_moe.experts.10.w2", "model.layers.20.block_sparse_moe.experts.11.w2", "model.layers.20.block_sparse_moe.experts.12.w2", "model.layers.20.block_sparse_moe.experts.13.w2", "model.layers.20.block_sparse_moe.experts.14.w2", "model.layers.20.block_sparse_moe.experts.15.w2", "model.layers.20.block_sparse_moe.experts.16.w2", "model.layers.20.block_sparse_moe.experts.17.w2", "model.layers.20.block_sparse_moe.experts.18.w2", "model.layers.20.block_sparse_moe.experts.19.w2", "model.layers.20.block_sparse_moe.experts.20.w2", "model.layers.20.block_sparse_moe.experts.21.w2", "model.layers.20.block_sparse_moe.experts.22.w2", "model.layers.20.block_sparse_moe.experts.23.w2", "model.layers.20.block_sparse_moe.experts.24.w2", "model.layers.20.block_sparse_moe.experts.25.w2", "model.layers.20.block_sparse_moe.experts.26.w2", "model.layers.20.block_sparse_moe.experts.27.w2", "model.layers.20.block_sparse_moe.experts.28.w2", "model.layers.20.block_sparse_moe.experts.29.w2", "model.layers.20.block_sparse_moe.experts.30.w2", "model.layers.20.block_sparse_moe.experts.31.w2", "model.layers.20.block_sparse_moe.experts.32.w2", "model.layers.20.block_sparse_moe.experts.33.w2", "model.layers.20.block_sparse_moe.experts.34.w2", "model.layers.20.block_sparse_moe.experts.35.w2", "model.layers.20.block_sparse_moe.experts.36.w2", "model.layers.20.block_sparse_moe.experts.37.w2", "model.layers.20.block_sparse_moe.experts.38.w2", "model.layers.20.block_sparse_moe.experts.39.w2", "model.layers.20.block_sparse_moe.experts.40.w2", "model.layers.20.block_sparse_moe.experts.41.w2", "model.layers.20.block_sparse_moe.experts.42.w2", "model.layers.20.block_sparse_moe.experts.43.w2", "model.layers.20.block_sparse_moe.experts.44.w2", "model.layers.20.block_sparse_moe.experts.45.w2", "model.layers.20.block_sparse_moe.experts.46.w2", "model.layers.20.block_sparse_moe.experts.47.w2", "model.layers.20.block_sparse_moe.experts.48.w2", "model.layers.20.block_sparse_moe.experts.49.w2", "model.layers.20.block_sparse_moe.experts.50.w2", "model.layers.20.block_sparse_moe.experts.51.w2", "model.layers.20.block_sparse_moe.experts.52.w2", "model.layers.20.block_sparse_moe.experts.53.w2", "model.layers.20.block_sparse_moe.experts.54.w2", "model.layers.20.block_sparse_moe.experts.55.w2", "model.layers.20.block_sparse_moe.experts.56.w2", "model.layers.20.block_sparse_moe.experts.57.w2", "model.layers.20.block_sparse_moe.experts.58.w2", "model.layers.20.block_sparse_moe.experts.59.w2", "model.layers.20.block_sparse_moe.experts.60.w2", "model.layers.20.block_sparse_moe.experts.61.w2", "model.layers.20.block_sparse_moe.experts.62.w2", "model.layers.20.block_sparse_moe.experts.63.w2", "model.layers.20.block_sparse_moe.experts.64.w2", "model.layers.20.block_sparse_moe.experts.65.w2", "model.layers.20.block_sparse_moe.experts.66.w2", "model.layers.20.block_sparse_moe.experts.67.w2", "model.layers.20.block_sparse_moe.experts.68.w2", "model.layers.20.block_sparse_moe.experts.69.w2", "model.layers.20.block_sparse_moe.experts.70.w2", "model.layers.20.block_sparse_moe.experts.71.w2", "model.layers.20.block_sparse_moe.experts.72.w2", "model.layers.20.block_sparse_moe.experts.73.w2", "model.layers.20.block_sparse_moe.experts.74.w2", "model.layers.20.block_sparse_moe.experts.75.w2", "model.layers.20.block_sparse_moe.experts.76.w2", "model.layers.20.block_sparse_moe.experts.77.w2", "model.layers.20.block_sparse_moe.experts.78.w2", "model.layers.20.block_sparse_moe.experts.79.w2", "model.layers.20.block_sparse_moe.experts.80.w2", "model.layers.20.block_sparse_moe.experts.81.w2", "model.layers.20.block_sparse_moe.experts.82.w2", "model.layers.20.block_sparse_moe.experts.83.w2", "model.layers.20.block_sparse_moe.experts.84.w2", "model.layers.20.block_sparse_moe.experts.85.w2", "model.layers.20.block_sparse_moe.experts.86.w2", "model.layers.20.block_sparse_moe.experts.87.w2", "model.layers.20.block_sparse_moe.experts.88.w2", "model.layers.20.block_sparse_moe.experts.89.w2", "model.layers.20.block_sparse_moe.experts.90.w2", "model.layers.20.block_sparse_moe.experts.91.w2", "model.layers.20.block_sparse_moe.experts.92.w2", "model.layers.20.block_sparse_moe.experts.93.w2", "model.layers.20.block_sparse_moe.experts.94.w2", "model.layers.20.block_sparse_moe.experts.95.w2", "model.layers.20.block_sparse_moe.experts.96.w2", "model.layers.20.block_sparse_moe.experts.97.w2", "model.layers.20.block_sparse_moe.experts.98.w2", "model.layers.20.block_sparse_moe.experts.99.w2", "model.layers.20.block_sparse_moe.experts.100.w2", "model.layers.20.block_sparse_moe.experts.101.w2", "model.layers.20.block_sparse_moe.experts.102.w2", "model.layers.20.block_sparse_moe.experts.103.w2", "model.layers.20.block_sparse_moe.experts.104.w2", "model.layers.20.block_sparse_moe.experts.105.w2", "model.layers.20.block_sparse_moe.experts.106.w2", "model.layers.20.block_sparse_moe.experts.107.w2", "model.layers.20.block_sparse_moe.experts.108.w2", "model.layers.20.block_sparse_moe.experts.109.w2", "model.layers.20.block_sparse_moe.experts.110.w2", "model.layers.20.block_sparse_moe.experts.111.w2", "model.layers.20.block_sparse_moe.experts.112.w2", "model.layers.20.block_sparse_moe.experts.113.w2", "model.layers.20.block_sparse_moe.experts.114.w2", "model.layers.20.block_sparse_moe.experts.115.w2", "model.layers.20.block_sparse_moe.experts.116.w2", "model.layers.20.block_sparse_moe.experts.117.w2", "model.layers.20.block_sparse_moe.experts.118.w2", "model.layers.20.block_sparse_moe.experts.119.w2", "model.layers.20.block_sparse_moe.experts.120.w2", "model.layers.20.block_sparse_moe.experts.121.w2", "model.layers.20.block_sparse_moe.experts.122.w2", "model.layers.20.block_sparse_moe.experts.123.w2", "model.layers.20.block_sparse_moe.experts.124.w2", "model.layers.20.block_sparse_moe.experts.125.w2", "model.layers.20.block_sparse_moe.experts.126.w2", "model.layers.20.block_sparse_moe.experts.127.w2", "model.layers.20.block_sparse_moe.experts.128.w2", "model.layers.20.block_sparse_moe.experts.129.w2", "model.layers.20.block_sparse_moe.experts.130.w2", "model.layers.20.block_sparse_moe.experts.131.w2", "model.layers.20.block_sparse_moe.experts.132.w2", "model.layers.20.block_sparse_moe.experts.133.w2", "model.layers.20.block_sparse_moe.experts.134.w2", "model.layers.20.block_sparse_moe.experts.135.w2", "model.layers.20.block_sparse_moe.experts.136.w2", "model.layers.20.block_sparse_moe.experts.137.w2", "model.layers.20.block_sparse_moe.experts.138.w2", "model.layers.20.block_sparse_moe.experts.139.w2", "model.layers.20.block_sparse_moe.experts.140.w2", "model.layers.20.block_sparse_moe.experts.141.w2", "model.layers.20.block_sparse_moe.experts.142.w2", "model.layers.20.block_sparse_moe.experts.143.w2", "model.layers.20.block_sparse_moe.experts.144.w2", "model.layers.20.block_sparse_moe.experts.145.w2", "model.layers.20.block_sparse_moe.experts.146.w2", "model.layers.20.block_sparse_moe.experts.147.w2", "model.layers.20.block_sparse_moe.experts.148.w2", "model.layers.20.block_sparse_moe.experts.149.w2", "model.layers.20.block_sparse_moe.experts.150.w2", "model.layers.20.block_sparse_moe.experts.151.w2", "model.layers.20.block_sparse_moe.experts.152.w2", "model.layers.20.block_sparse_moe.experts.153.w2", "model.layers.20.block_sparse_moe.experts.154.w2", "model.layers.20.block_sparse_moe.experts.155.w2", "model.layers.20.block_sparse_moe.experts.156.w2", "model.layers.20.block_sparse_moe.experts.157.w2", "model.layers.20.block_sparse_moe.experts.158.w2", "model.layers.20.block_sparse_moe.experts.159.w2", "model.layers.20.block_sparse_moe.experts.160.w2", "model.layers.20.block_sparse_moe.experts.161.w2", "model.layers.20.block_sparse_moe.experts.162.w2", "model.layers.20.block_sparse_moe.experts.163.w2", "model.layers.20.block_sparse_moe.experts.164.w2", "model.layers.20.block_sparse_moe.experts.165.w2", "model.layers.20.block_sparse_moe.experts.166.w2", "model.layers.20.block_sparse_moe.experts.167.w2", "model.layers.20.block_sparse_moe.experts.168.w2", "model.layers.20.block_sparse_moe.experts.169.w2", "model.layers.20.block_sparse_moe.experts.170.w2", "model.layers.20.block_sparse_moe.experts.171.w2", "model.layers.20.block_sparse_moe.experts.172.w2", "model.layers.20.block_sparse_moe.experts.173.w2", "model.layers.20.block_sparse_moe.experts.174.w2", "model.layers.20.block_sparse_moe.experts.175.w2", "model.layers.20.block_sparse_moe.experts.176.w2", "model.layers.20.block_sparse_moe.experts.177.w2", "model.layers.20.block_sparse_moe.experts.178.w2", "model.layers.20.block_sparse_moe.experts.179.w2", "model.layers.20.block_sparse_moe.experts.180.w2", "model.layers.20.block_sparse_moe.experts.181.w2", "model.layers.20.block_sparse_moe.experts.182.w2", "model.layers.20.block_sparse_moe.experts.183.w2", "model.layers.20.block_sparse_moe.experts.184.w2", "model.layers.20.block_sparse_moe.experts.185.w2", "model.layers.20.block_sparse_moe.experts.186.w2", "model.layers.20.block_sparse_moe.experts.187.w2", "model.layers.20.block_sparse_moe.experts.188.w2", "model.layers.20.block_sparse_moe.experts.189.w2", "model.layers.20.block_sparse_moe.experts.190.w2", "model.layers.20.block_sparse_moe.experts.191.w2", "model.layers.20.block_sparse_moe.experts.192.w2", "model.layers.20.block_sparse_moe.experts.193.w2", "model.layers.20.block_sparse_moe.experts.194.w2", "model.layers.20.block_sparse_moe.experts.195.w2", "model.layers.20.block_sparse_moe.experts.196.w2", "model.layers.20.block_sparse_moe.experts.197.w2", "model.layers.20.block_sparse_moe.experts.198.w2", "model.layers.20.block_sparse_moe.experts.199.w2", "model.layers.20.block_sparse_moe.experts.200.w2", "model.layers.20.block_sparse_moe.experts.201.w2", "model.layers.20.block_sparse_moe.experts.202.w2", "model.layers.20.block_sparse_moe.experts.203.w2", "model.layers.20.block_sparse_moe.experts.204.w2", "model.layers.20.block_sparse_moe.experts.205.w2", "model.layers.20.block_sparse_moe.experts.206.w2", "model.layers.20.block_sparse_moe.experts.207.w2", "model.layers.20.block_sparse_moe.experts.208.w2", "model.layers.20.block_sparse_moe.experts.209.w2", "model.layers.20.block_sparse_moe.experts.210.w2", "model.layers.20.block_sparse_moe.experts.211.w2", "model.layers.20.block_sparse_moe.experts.212.w2", "model.layers.20.block_sparse_moe.experts.213.w2", "model.layers.20.block_sparse_moe.experts.214.w2", "model.layers.20.block_sparse_moe.experts.215.w2", "model.layers.20.block_sparse_moe.experts.216.w2", "model.layers.20.block_sparse_moe.experts.217.w2", "model.layers.20.block_sparse_moe.experts.218.w2", "model.layers.20.block_sparse_moe.experts.219.w2", "model.layers.20.block_sparse_moe.experts.220.w2", "model.layers.20.block_sparse_moe.experts.221.w2", "model.layers.20.block_sparse_moe.experts.222.w2", "model.layers.20.block_sparse_moe.experts.223.w2", "model.layers.20.block_sparse_moe.experts.224.w2", "model.layers.20.block_sparse_moe.experts.225.w2", "model.layers.20.block_sparse_moe.experts.226.w2", "model.layers.20.block_sparse_moe.experts.227.w2", "model.layers.20.block_sparse_moe.experts.228.w2", "model.layers.20.block_sparse_moe.experts.229.w2", "model.layers.20.block_sparse_moe.experts.230.w2", "model.layers.20.block_sparse_moe.experts.231.w2", "model.layers.20.block_sparse_moe.experts.232.w2", "model.layers.20.block_sparse_moe.experts.233.w2", "model.layers.20.block_sparse_moe.experts.234.w2", "model.layers.20.block_sparse_moe.experts.235.w2", "model.layers.20.block_sparse_moe.experts.236.w2", "model.layers.20.block_sparse_moe.experts.237.w2", "model.layers.20.block_sparse_moe.experts.238.w2", "model.layers.20.block_sparse_moe.experts.239.w2", "model.layers.20.block_sparse_moe.experts.240.w2", "model.layers.20.block_sparse_moe.experts.241.w2", "model.layers.20.block_sparse_moe.experts.242.w2", "model.layers.20.block_sparse_moe.experts.243.w2", "model.layers.20.block_sparse_moe.experts.244.w2", "model.layers.20.block_sparse_moe.experts.245.w2", "model.layers.20.block_sparse_moe.experts.246.w2", "model.layers.20.block_sparse_moe.experts.247.w2", "model.layers.20.block_sparse_moe.experts.248.w2", "model.layers.20.block_sparse_moe.experts.249.w2", "model.layers.20.block_sparse_moe.experts.250.w2", "model.layers.20.block_sparse_moe.experts.251.w2", "model.layers.20.block_sparse_moe.experts.252.w2", "model.layers.20.block_sparse_moe.experts.253.w2", "model.layers.20.block_sparse_moe.experts.254.w2", "model.layers.20.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00023384355008604363, "dbits": 1207959552 } ] }, { "idx": 105, "layers": [ "model.layers.21.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0005420021712779777, "dbits": 18874368 } ] }, { "idx": 106, "layers": [ "model.layers.21.self_attn.k_proj", "model.layers.21.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0038930419832468033, "dbits": 6291456 } ] }, { "idx": 107, "layers": [ "model.layers.21.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0006419323384762032, "dbits": 18874368 } ] }, { "idx": 108, "layers": [ "model.layers.21.block_sparse_moe.experts.0.w1", "model.layers.21.block_sparse_moe.experts.1.w1", "model.layers.21.block_sparse_moe.experts.2.w1", "model.layers.21.block_sparse_moe.experts.3.w1", "model.layers.21.block_sparse_moe.experts.4.w1", "model.layers.21.block_sparse_moe.experts.5.w1", "model.layers.21.block_sparse_moe.experts.6.w1", "model.layers.21.block_sparse_moe.experts.7.w1", "model.layers.21.block_sparse_moe.experts.8.w1", "model.layers.21.block_sparse_moe.experts.9.w1", "model.layers.21.block_sparse_moe.experts.10.w1", "model.layers.21.block_sparse_moe.experts.11.w1", "model.layers.21.block_sparse_moe.experts.12.w1", "model.layers.21.block_sparse_moe.experts.13.w1", "model.layers.21.block_sparse_moe.experts.14.w1", "model.layers.21.block_sparse_moe.experts.15.w1", "model.layers.21.block_sparse_moe.experts.16.w1", "model.layers.21.block_sparse_moe.experts.17.w1", "model.layers.21.block_sparse_moe.experts.18.w1", "model.layers.21.block_sparse_moe.experts.19.w1", "model.layers.21.block_sparse_moe.experts.20.w1", "model.layers.21.block_sparse_moe.experts.21.w1", "model.layers.21.block_sparse_moe.experts.22.w1", "model.layers.21.block_sparse_moe.experts.23.w1", "model.layers.21.block_sparse_moe.experts.24.w1", "model.layers.21.block_sparse_moe.experts.25.w1", "model.layers.21.block_sparse_moe.experts.26.w1", "model.layers.21.block_sparse_moe.experts.27.w1", "model.layers.21.block_sparse_moe.experts.28.w1", "model.layers.21.block_sparse_moe.experts.29.w1", "model.layers.21.block_sparse_moe.experts.30.w1", "model.layers.21.block_sparse_moe.experts.31.w1", "model.layers.21.block_sparse_moe.experts.32.w1", "model.layers.21.block_sparse_moe.experts.33.w1", "model.layers.21.block_sparse_moe.experts.34.w1", "model.layers.21.block_sparse_moe.experts.35.w1", "model.layers.21.block_sparse_moe.experts.36.w1", "model.layers.21.block_sparse_moe.experts.37.w1", "model.layers.21.block_sparse_moe.experts.38.w1", "model.layers.21.block_sparse_moe.experts.39.w1", "model.layers.21.block_sparse_moe.experts.40.w1", "model.layers.21.block_sparse_moe.experts.41.w1", "model.layers.21.block_sparse_moe.experts.42.w1", "model.layers.21.block_sparse_moe.experts.43.w1", "model.layers.21.block_sparse_moe.experts.44.w1", "model.layers.21.block_sparse_moe.experts.45.w1", "model.layers.21.block_sparse_moe.experts.46.w1", "model.layers.21.block_sparse_moe.experts.47.w1", "model.layers.21.block_sparse_moe.experts.48.w1", "model.layers.21.block_sparse_moe.experts.49.w1", "model.layers.21.block_sparse_moe.experts.50.w1", "model.layers.21.block_sparse_moe.experts.51.w1", "model.layers.21.block_sparse_moe.experts.52.w1", "model.layers.21.block_sparse_moe.experts.53.w1", "model.layers.21.block_sparse_moe.experts.54.w1", "model.layers.21.block_sparse_moe.experts.55.w1", "model.layers.21.block_sparse_moe.experts.56.w1", "model.layers.21.block_sparse_moe.experts.57.w1", "model.layers.21.block_sparse_moe.experts.58.w1", "model.layers.21.block_sparse_moe.experts.59.w1", "model.layers.21.block_sparse_moe.experts.60.w1", "model.layers.21.block_sparse_moe.experts.61.w1", "model.layers.21.block_sparse_moe.experts.62.w1", "model.layers.21.block_sparse_moe.experts.63.w1", "model.layers.21.block_sparse_moe.experts.64.w1", "model.layers.21.block_sparse_moe.experts.65.w1", "model.layers.21.block_sparse_moe.experts.66.w1", "model.layers.21.block_sparse_moe.experts.67.w1", "model.layers.21.block_sparse_moe.experts.68.w1", "model.layers.21.block_sparse_moe.experts.69.w1", "model.layers.21.block_sparse_moe.experts.70.w1", "model.layers.21.block_sparse_moe.experts.71.w1", "model.layers.21.block_sparse_moe.experts.72.w1", "model.layers.21.block_sparse_moe.experts.73.w1", "model.layers.21.block_sparse_moe.experts.74.w1", "model.layers.21.block_sparse_moe.experts.75.w1", "model.layers.21.block_sparse_moe.experts.76.w1", "model.layers.21.block_sparse_moe.experts.77.w1", "model.layers.21.block_sparse_moe.experts.78.w1", "model.layers.21.block_sparse_moe.experts.79.w1", "model.layers.21.block_sparse_moe.experts.80.w1", "model.layers.21.block_sparse_moe.experts.81.w1", "model.layers.21.block_sparse_moe.experts.82.w1", "model.layers.21.block_sparse_moe.experts.83.w1", "model.layers.21.block_sparse_moe.experts.84.w1", "model.layers.21.block_sparse_moe.experts.85.w1", "model.layers.21.block_sparse_moe.experts.86.w1", "model.layers.21.block_sparse_moe.experts.87.w1", "model.layers.21.block_sparse_moe.experts.88.w1", "model.layers.21.block_sparse_moe.experts.89.w1", "model.layers.21.block_sparse_moe.experts.90.w1", "model.layers.21.block_sparse_moe.experts.91.w1", "model.layers.21.block_sparse_moe.experts.92.w1", "model.layers.21.block_sparse_moe.experts.93.w1", "model.layers.21.block_sparse_moe.experts.94.w1", "model.layers.21.block_sparse_moe.experts.95.w1", "model.layers.21.block_sparse_moe.experts.96.w1", "model.layers.21.block_sparse_moe.experts.97.w1", "model.layers.21.block_sparse_moe.experts.98.w1", "model.layers.21.block_sparse_moe.experts.99.w1", "model.layers.21.block_sparse_moe.experts.100.w1", "model.layers.21.block_sparse_moe.experts.101.w1", "model.layers.21.block_sparse_moe.experts.102.w1", "model.layers.21.block_sparse_moe.experts.103.w1", "model.layers.21.block_sparse_moe.experts.104.w1", "model.layers.21.block_sparse_moe.experts.105.w1", "model.layers.21.block_sparse_moe.experts.106.w1", "model.layers.21.block_sparse_moe.experts.107.w1", "model.layers.21.block_sparse_moe.experts.108.w1", "model.layers.21.block_sparse_moe.experts.109.w1", "model.layers.21.block_sparse_moe.experts.110.w1", "model.layers.21.block_sparse_moe.experts.111.w1", "model.layers.21.block_sparse_moe.experts.112.w1", "model.layers.21.block_sparse_moe.experts.113.w1", "model.layers.21.block_sparse_moe.experts.114.w1", "model.layers.21.block_sparse_moe.experts.115.w1", "model.layers.21.block_sparse_moe.experts.116.w1", "model.layers.21.block_sparse_moe.experts.117.w1", "model.layers.21.block_sparse_moe.experts.118.w1", "model.layers.21.block_sparse_moe.experts.119.w1", "model.layers.21.block_sparse_moe.experts.120.w1", "model.layers.21.block_sparse_moe.experts.121.w1", "model.layers.21.block_sparse_moe.experts.122.w1", "model.layers.21.block_sparse_moe.experts.123.w1", "model.layers.21.block_sparse_moe.experts.124.w1", "model.layers.21.block_sparse_moe.experts.125.w1", "model.layers.21.block_sparse_moe.experts.126.w1", "model.layers.21.block_sparse_moe.experts.127.w1", "model.layers.21.block_sparse_moe.experts.128.w1", "model.layers.21.block_sparse_moe.experts.129.w1", "model.layers.21.block_sparse_moe.experts.130.w1", "model.layers.21.block_sparse_moe.experts.131.w1", "model.layers.21.block_sparse_moe.experts.132.w1", "model.layers.21.block_sparse_moe.experts.133.w1", "model.layers.21.block_sparse_moe.experts.134.w1", "model.layers.21.block_sparse_moe.experts.135.w1", "model.layers.21.block_sparse_moe.experts.136.w1", "model.layers.21.block_sparse_moe.experts.137.w1", "model.layers.21.block_sparse_moe.experts.138.w1", "model.layers.21.block_sparse_moe.experts.139.w1", "model.layers.21.block_sparse_moe.experts.140.w1", "model.layers.21.block_sparse_moe.experts.141.w1", "model.layers.21.block_sparse_moe.experts.142.w1", "model.layers.21.block_sparse_moe.experts.143.w1", "model.layers.21.block_sparse_moe.experts.144.w1", "model.layers.21.block_sparse_moe.experts.145.w1", "model.layers.21.block_sparse_moe.experts.146.w1", "model.layers.21.block_sparse_moe.experts.147.w1", "model.layers.21.block_sparse_moe.experts.148.w1", "model.layers.21.block_sparse_moe.experts.149.w1", "model.layers.21.block_sparse_moe.experts.150.w1", "model.layers.21.block_sparse_moe.experts.151.w1", "model.layers.21.block_sparse_moe.experts.152.w1", "model.layers.21.block_sparse_moe.experts.153.w1", "model.layers.21.block_sparse_moe.experts.154.w1", "model.layers.21.block_sparse_moe.experts.155.w1", "model.layers.21.block_sparse_moe.experts.156.w1", "model.layers.21.block_sparse_moe.experts.157.w1", "model.layers.21.block_sparse_moe.experts.158.w1", "model.layers.21.block_sparse_moe.experts.159.w1", "model.layers.21.block_sparse_moe.experts.160.w1", "model.layers.21.block_sparse_moe.experts.161.w1", "model.layers.21.block_sparse_moe.experts.162.w1", "model.layers.21.block_sparse_moe.experts.163.w1", "model.layers.21.block_sparse_moe.experts.164.w1", "model.layers.21.block_sparse_moe.experts.165.w1", "model.layers.21.block_sparse_moe.experts.166.w1", "model.layers.21.block_sparse_moe.experts.167.w1", "model.layers.21.block_sparse_moe.experts.168.w1", "model.layers.21.block_sparse_moe.experts.169.w1", "model.layers.21.block_sparse_moe.experts.170.w1", "model.layers.21.block_sparse_moe.experts.171.w1", "model.layers.21.block_sparse_moe.experts.172.w1", "model.layers.21.block_sparse_moe.experts.173.w1", "model.layers.21.block_sparse_moe.experts.174.w1", "model.layers.21.block_sparse_moe.experts.175.w1", "model.layers.21.block_sparse_moe.experts.176.w1", "model.layers.21.block_sparse_moe.experts.177.w1", "model.layers.21.block_sparse_moe.experts.178.w1", "model.layers.21.block_sparse_moe.experts.179.w1", "model.layers.21.block_sparse_moe.experts.180.w1", "model.layers.21.block_sparse_moe.experts.181.w1", "model.layers.21.block_sparse_moe.experts.182.w1", "model.layers.21.block_sparse_moe.experts.183.w1", "model.layers.21.block_sparse_moe.experts.184.w1", "model.layers.21.block_sparse_moe.experts.185.w1", "model.layers.21.block_sparse_moe.experts.186.w1", "model.layers.21.block_sparse_moe.experts.187.w1", "model.layers.21.block_sparse_moe.experts.188.w1", "model.layers.21.block_sparse_moe.experts.189.w1", "model.layers.21.block_sparse_moe.experts.190.w1", "model.layers.21.block_sparse_moe.experts.191.w1", "model.layers.21.block_sparse_moe.experts.192.w1", "model.layers.21.block_sparse_moe.experts.193.w1", "model.layers.21.block_sparse_moe.experts.194.w1", "model.layers.21.block_sparse_moe.experts.195.w1", "model.layers.21.block_sparse_moe.experts.196.w1", "model.layers.21.block_sparse_moe.experts.197.w1", "model.layers.21.block_sparse_moe.experts.198.w1", "model.layers.21.block_sparse_moe.experts.199.w1", "model.layers.21.block_sparse_moe.experts.200.w1", "model.layers.21.block_sparse_moe.experts.201.w1", "model.layers.21.block_sparse_moe.experts.202.w1", "model.layers.21.block_sparse_moe.experts.203.w1", "model.layers.21.block_sparse_moe.experts.204.w1", "model.layers.21.block_sparse_moe.experts.205.w1", "model.layers.21.block_sparse_moe.experts.206.w1", "model.layers.21.block_sparse_moe.experts.207.w1", "model.layers.21.block_sparse_moe.experts.208.w1", "model.layers.21.block_sparse_moe.experts.209.w1", "model.layers.21.block_sparse_moe.experts.210.w1", "model.layers.21.block_sparse_moe.experts.211.w1", "model.layers.21.block_sparse_moe.experts.212.w1", "model.layers.21.block_sparse_moe.experts.213.w1", "model.layers.21.block_sparse_moe.experts.214.w1", "model.layers.21.block_sparse_moe.experts.215.w1", "model.layers.21.block_sparse_moe.experts.216.w1", "model.layers.21.block_sparse_moe.experts.217.w1", "model.layers.21.block_sparse_moe.experts.218.w1", "model.layers.21.block_sparse_moe.experts.219.w1", "model.layers.21.block_sparse_moe.experts.220.w1", "model.layers.21.block_sparse_moe.experts.221.w1", "model.layers.21.block_sparse_moe.experts.222.w1", "model.layers.21.block_sparse_moe.experts.223.w1", "model.layers.21.block_sparse_moe.experts.224.w1", "model.layers.21.block_sparse_moe.experts.225.w1", "model.layers.21.block_sparse_moe.experts.226.w1", "model.layers.21.block_sparse_moe.experts.227.w1", "model.layers.21.block_sparse_moe.experts.228.w1", "model.layers.21.block_sparse_moe.experts.229.w1", "model.layers.21.block_sparse_moe.experts.230.w1", "model.layers.21.block_sparse_moe.experts.231.w1", "model.layers.21.block_sparse_moe.experts.232.w1", "model.layers.21.block_sparse_moe.experts.233.w1", "model.layers.21.block_sparse_moe.experts.234.w1", "model.layers.21.block_sparse_moe.experts.235.w1", "model.layers.21.block_sparse_moe.experts.236.w1", "model.layers.21.block_sparse_moe.experts.237.w1", "model.layers.21.block_sparse_moe.experts.238.w1", "model.layers.21.block_sparse_moe.experts.239.w1", "model.layers.21.block_sparse_moe.experts.240.w1", "model.layers.21.block_sparse_moe.experts.241.w1", "model.layers.21.block_sparse_moe.experts.242.w1", "model.layers.21.block_sparse_moe.experts.243.w1", "model.layers.21.block_sparse_moe.experts.244.w1", "model.layers.21.block_sparse_moe.experts.245.w1", "model.layers.21.block_sparse_moe.experts.246.w1", "model.layers.21.block_sparse_moe.experts.247.w1", "model.layers.21.block_sparse_moe.experts.248.w1", "model.layers.21.block_sparse_moe.experts.249.w1", "model.layers.21.block_sparse_moe.experts.250.w1", "model.layers.21.block_sparse_moe.experts.251.w1", "model.layers.21.block_sparse_moe.experts.252.w1", "model.layers.21.block_sparse_moe.experts.253.w1", "model.layers.21.block_sparse_moe.experts.254.w1", "model.layers.21.block_sparse_moe.experts.255.w1", "model.layers.21.block_sparse_moe.experts.0.w3", "model.layers.21.block_sparse_moe.experts.1.w3", "model.layers.21.block_sparse_moe.experts.2.w3", "model.layers.21.block_sparse_moe.experts.3.w3", "model.layers.21.block_sparse_moe.experts.4.w3", "model.layers.21.block_sparse_moe.experts.5.w3", "model.layers.21.block_sparse_moe.experts.6.w3", "model.layers.21.block_sparse_moe.experts.7.w3", "model.layers.21.block_sparse_moe.experts.8.w3", "model.layers.21.block_sparse_moe.experts.9.w3", "model.layers.21.block_sparse_moe.experts.10.w3", "model.layers.21.block_sparse_moe.experts.11.w3", "model.layers.21.block_sparse_moe.experts.12.w3", "model.layers.21.block_sparse_moe.experts.13.w3", "model.layers.21.block_sparse_moe.experts.14.w3", "model.layers.21.block_sparse_moe.experts.15.w3", "model.layers.21.block_sparse_moe.experts.16.w3", "model.layers.21.block_sparse_moe.experts.17.w3", "model.layers.21.block_sparse_moe.experts.18.w3", "model.layers.21.block_sparse_moe.experts.19.w3", "model.layers.21.block_sparse_moe.experts.20.w3", "model.layers.21.block_sparse_moe.experts.21.w3", "model.layers.21.block_sparse_moe.experts.22.w3", "model.layers.21.block_sparse_moe.experts.23.w3", "model.layers.21.block_sparse_moe.experts.24.w3", "model.layers.21.block_sparse_moe.experts.25.w3", "model.layers.21.block_sparse_moe.experts.26.w3", "model.layers.21.block_sparse_moe.experts.27.w3", "model.layers.21.block_sparse_moe.experts.28.w3", "model.layers.21.block_sparse_moe.experts.29.w3", "model.layers.21.block_sparse_moe.experts.30.w3", "model.layers.21.block_sparse_moe.experts.31.w3", "model.layers.21.block_sparse_moe.experts.32.w3", "model.layers.21.block_sparse_moe.experts.33.w3", "model.layers.21.block_sparse_moe.experts.34.w3", "model.layers.21.block_sparse_moe.experts.35.w3", "model.layers.21.block_sparse_moe.experts.36.w3", "model.layers.21.block_sparse_moe.experts.37.w3", "model.layers.21.block_sparse_moe.experts.38.w3", "model.layers.21.block_sparse_moe.experts.39.w3", "model.layers.21.block_sparse_moe.experts.40.w3", "model.layers.21.block_sparse_moe.experts.41.w3", "model.layers.21.block_sparse_moe.experts.42.w3", "model.layers.21.block_sparse_moe.experts.43.w3", "model.layers.21.block_sparse_moe.experts.44.w3", "model.layers.21.block_sparse_moe.experts.45.w3", "model.layers.21.block_sparse_moe.experts.46.w3", "model.layers.21.block_sparse_moe.experts.47.w3", "model.layers.21.block_sparse_moe.experts.48.w3", "model.layers.21.block_sparse_moe.experts.49.w3", "model.layers.21.block_sparse_moe.experts.50.w3", "model.layers.21.block_sparse_moe.experts.51.w3", "model.layers.21.block_sparse_moe.experts.52.w3", "model.layers.21.block_sparse_moe.experts.53.w3", "model.layers.21.block_sparse_moe.experts.54.w3", "model.layers.21.block_sparse_moe.experts.55.w3", "model.layers.21.block_sparse_moe.experts.56.w3", "model.layers.21.block_sparse_moe.experts.57.w3", "model.layers.21.block_sparse_moe.experts.58.w3", "model.layers.21.block_sparse_moe.experts.59.w3", "model.layers.21.block_sparse_moe.experts.60.w3", "model.layers.21.block_sparse_moe.experts.61.w3", "model.layers.21.block_sparse_moe.experts.62.w3", "model.layers.21.block_sparse_moe.experts.63.w3", "model.layers.21.block_sparse_moe.experts.64.w3", "model.layers.21.block_sparse_moe.experts.65.w3", "model.layers.21.block_sparse_moe.experts.66.w3", "model.layers.21.block_sparse_moe.experts.67.w3", "model.layers.21.block_sparse_moe.experts.68.w3", "model.layers.21.block_sparse_moe.experts.69.w3", "model.layers.21.block_sparse_moe.experts.70.w3", "model.layers.21.block_sparse_moe.experts.71.w3", "model.layers.21.block_sparse_moe.experts.72.w3", "model.layers.21.block_sparse_moe.experts.73.w3", "model.layers.21.block_sparse_moe.experts.74.w3", "model.layers.21.block_sparse_moe.experts.75.w3", "model.layers.21.block_sparse_moe.experts.76.w3", "model.layers.21.block_sparse_moe.experts.77.w3", "model.layers.21.block_sparse_moe.experts.78.w3", "model.layers.21.block_sparse_moe.experts.79.w3", "model.layers.21.block_sparse_moe.experts.80.w3", "model.layers.21.block_sparse_moe.experts.81.w3", "model.layers.21.block_sparse_moe.experts.82.w3", "model.layers.21.block_sparse_moe.experts.83.w3", "model.layers.21.block_sparse_moe.experts.84.w3", "model.layers.21.block_sparse_moe.experts.85.w3", "model.layers.21.block_sparse_moe.experts.86.w3", "model.layers.21.block_sparse_moe.experts.87.w3", "model.layers.21.block_sparse_moe.experts.88.w3", "model.layers.21.block_sparse_moe.experts.89.w3", "model.layers.21.block_sparse_moe.experts.90.w3", "model.layers.21.block_sparse_moe.experts.91.w3", "model.layers.21.block_sparse_moe.experts.92.w3", "model.layers.21.block_sparse_moe.experts.93.w3", "model.layers.21.block_sparse_moe.experts.94.w3", "model.layers.21.block_sparse_moe.experts.95.w3", "model.layers.21.block_sparse_moe.experts.96.w3", "model.layers.21.block_sparse_moe.experts.97.w3", "model.layers.21.block_sparse_moe.experts.98.w3", "model.layers.21.block_sparse_moe.experts.99.w3", "model.layers.21.block_sparse_moe.experts.100.w3", "model.layers.21.block_sparse_moe.experts.101.w3", "model.layers.21.block_sparse_moe.experts.102.w3", "model.layers.21.block_sparse_moe.experts.103.w3", "model.layers.21.block_sparse_moe.experts.104.w3", "model.layers.21.block_sparse_moe.experts.105.w3", "model.layers.21.block_sparse_moe.experts.106.w3", "model.layers.21.block_sparse_moe.experts.107.w3", "model.layers.21.block_sparse_moe.experts.108.w3", "model.layers.21.block_sparse_moe.experts.109.w3", "model.layers.21.block_sparse_moe.experts.110.w3", "model.layers.21.block_sparse_moe.experts.111.w3", "model.layers.21.block_sparse_moe.experts.112.w3", "model.layers.21.block_sparse_moe.experts.113.w3", "model.layers.21.block_sparse_moe.experts.114.w3", "model.layers.21.block_sparse_moe.experts.115.w3", "model.layers.21.block_sparse_moe.experts.116.w3", "model.layers.21.block_sparse_moe.experts.117.w3", "model.layers.21.block_sparse_moe.experts.118.w3", "model.layers.21.block_sparse_moe.experts.119.w3", "model.layers.21.block_sparse_moe.experts.120.w3", "model.layers.21.block_sparse_moe.experts.121.w3", "model.layers.21.block_sparse_moe.experts.122.w3", "model.layers.21.block_sparse_moe.experts.123.w3", "model.layers.21.block_sparse_moe.experts.124.w3", "model.layers.21.block_sparse_moe.experts.125.w3", "model.layers.21.block_sparse_moe.experts.126.w3", "model.layers.21.block_sparse_moe.experts.127.w3", "model.layers.21.block_sparse_moe.experts.128.w3", "model.layers.21.block_sparse_moe.experts.129.w3", "model.layers.21.block_sparse_moe.experts.130.w3", "model.layers.21.block_sparse_moe.experts.131.w3", "model.layers.21.block_sparse_moe.experts.132.w3", "model.layers.21.block_sparse_moe.experts.133.w3", "model.layers.21.block_sparse_moe.experts.134.w3", "model.layers.21.block_sparse_moe.experts.135.w3", "model.layers.21.block_sparse_moe.experts.136.w3", "model.layers.21.block_sparse_moe.experts.137.w3", "model.layers.21.block_sparse_moe.experts.138.w3", "model.layers.21.block_sparse_moe.experts.139.w3", "model.layers.21.block_sparse_moe.experts.140.w3", "model.layers.21.block_sparse_moe.experts.141.w3", "model.layers.21.block_sparse_moe.experts.142.w3", "model.layers.21.block_sparse_moe.experts.143.w3", "model.layers.21.block_sparse_moe.experts.144.w3", "model.layers.21.block_sparse_moe.experts.145.w3", "model.layers.21.block_sparse_moe.experts.146.w3", "model.layers.21.block_sparse_moe.experts.147.w3", "model.layers.21.block_sparse_moe.experts.148.w3", "model.layers.21.block_sparse_moe.experts.149.w3", "model.layers.21.block_sparse_moe.experts.150.w3", "model.layers.21.block_sparse_moe.experts.151.w3", "model.layers.21.block_sparse_moe.experts.152.w3", "model.layers.21.block_sparse_moe.experts.153.w3", "model.layers.21.block_sparse_moe.experts.154.w3", "model.layers.21.block_sparse_moe.experts.155.w3", "model.layers.21.block_sparse_moe.experts.156.w3", "model.layers.21.block_sparse_moe.experts.157.w3", "model.layers.21.block_sparse_moe.experts.158.w3", "model.layers.21.block_sparse_moe.experts.159.w3", "model.layers.21.block_sparse_moe.experts.160.w3", "model.layers.21.block_sparse_moe.experts.161.w3", "model.layers.21.block_sparse_moe.experts.162.w3", "model.layers.21.block_sparse_moe.experts.163.w3", "model.layers.21.block_sparse_moe.experts.164.w3", "model.layers.21.block_sparse_moe.experts.165.w3", "model.layers.21.block_sparse_moe.experts.166.w3", "model.layers.21.block_sparse_moe.experts.167.w3", "model.layers.21.block_sparse_moe.experts.168.w3", "model.layers.21.block_sparse_moe.experts.169.w3", "model.layers.21.block_sparse_moe.experts.170.w3", "model.layers.21.block_sparse_moe.experts.171.w3", "model.layers.21.block_sparse_moe.experts.172.w3", "model.layers.21.block_sparse_moe.experts.173.w3", "model.layers.21.block_sparse_moe.experts.174.w3", "model.layers.21.block_sparse_moe.experts.175.w3", "model.layers.21.block_sparse_moe.experts.176.w3", "model.layers.21.block_sparse_moe.experts.177.w3", "model.layers.21.block_sparse_moe.experts.178.w3", "model.layers.21.block_sparse_moe.experts.179.w3", "model.layers.21.block_sparse_moe.experts.180.w3", "model.layers.21.block_sparse_moe.experts.181.w3", "model.layers.21.block_sparse_moe.experts.182.w3", "model.layers.21.block_sparse_moe.experts.183.w3", "model.layers.21.block_sparse_moe.experts.184.w3", "model.layers.21.block_sparse_moe.experts.185.w3", "model.layers.21.block_sparse_moe.experts.186.w3", "model.layers.21.block_sparse_moe.experts.187.w3", "model.layers.21.block_sparse_moe.experts.188.w3", "model.layers.21.block_sparse_moe.experts.189.w3", "model.layers.21.block_sparse_moe.experts.190.w3", "model.layers.21.block_sparse_moe.experts.191.w3", "model.layers.21.block_sparse_moe.experts.192.w3", "model.layers.21.block_sparse_moe.experts.193.w3", "model.layers.21.block_sparse_moe.experts.194.w3", "model.layers.21.block_sparse_moe.experts.195.w3", "model.layers.21.block_sparse_moe.experts.196.w3", "model.layers.21.block_sparse_moe.experts.197.w3", "model.layers.21.block_sparse_moe.experts.198.w3", "model.layers.21.block_sparse_moe.experts.199.w3", "model.layers.21.block_sparse_moe.experts.200.w3", "model.layers.21.block_sparse_moe.experts.201.w3", "model.layers.21.block_sparse_moe.experts.202.w3", "model.layers.21.block_sparse_moe.experts.203.w3", "model.layers.21.block_sparse_moe.experts.204.w3", "model.layers.21.block_sparse_moe.experts.205.w3", "model.layers.21.block_sparse_moe.experts.206.w3", "model.layers.21.block_sparse_moe.experts.207.w3", "model.layers.21.block_sparse_moe.experts.208.w3", "model.layers.21.block_sparse_moe.experts.209.w3", "model.layers.21.block_sparse_moe.experts.210.w3", "model.layers.21.block_sparse_moe.experts.211.w3", "model.layers.21.block_sparse_moe.experts.212.w3", "model.layers.21.block_sparse_moe.experts.213.w3", "model.layers.21.block_sparse_moe.experts.214.w3", "model.layers.21.block_sparse_moe.experts.215.w3", "model.layers.21.block_sparse_moe.experts.216.w3", "model.layers.21.block_sparse_moe.experts.217.w3", "model.layers.21.block_sparse_moe.experts.218.w3", "model.layers.21.block_sparse_moe.experts.219.w3", "model.layers.21.block_sparse_moe.experts.220.w3", "model.layers.21.block_sparse_moe.experts.221.w3", "model.layers.21.block_sparse_moe.experts.222.w3", "model.layers.21.block_sparse_moe.experts.223.w3", "model.layers.21.block_sparse_moe.experts.224.w3", "model.layers.21.block_sparse_moe.experts.225.w3", "model.layers.21.block_sparse_moe.experts.226.w3", "model.layers.21.block_sparse_moe.experts.227.w3", "model.layers.21.block_sparse_moe.experts.228.w3", "model.layers.21.block_sparse_moe.experts.229.w3", "model.layers.21.block_sparse_moe.experts.230.w3", "model.layers.21.block_sparse_moe.experts.231.w3", "model.layers.21.block_sparse_moe.experts.232.w3", "model.layers.21.block_sparse_moe.experts.233.w3", "model.layers.21.block_sparse_moe.experts.234.w3", "model.layers.21.block_sparse_moe.experts.235.w3", "model.layers.21.block_sparse_moe.experts.236.w3", "model.layers.21.block_sparse_moe.experts.237.w3", "model.layers.21.block_sparse_moe.experts.238.w3", "model.layers.21.block_sparse_moe.experts.239.w3", "model.layers.21.block_sparse_moe.experts.240.w3", "model.layers.21.block_sparse_moe.experts.241.w3", "model.layers.21.block_sparse_moe.experts.242.w3", "model.layers.21.block_sparse_moe.experts.243.w3", "model.layers.21.block_sparse_moe.experts.244.w3", "model.layers.21.block_sparse_moe.experts.245.w3", "model.layers.21.block_sparse_moe.experts.246.w3", "model.layers.21.block_sparse_moe.experts.247.w3", "model.layers.21.block_sparse_moe.experts.248.w3", "model.layers.21.block_sparse_moe.experts.249.w3", "model.layers.21.block_sparse_moe.experts.250.w3", "model.layers.21.block_sparse_moe.experts.251.w3", "model.layers.21.block_sparse_moe.experts.252.w3", "model.layers.21.block_sparse_moe.experts.253.w3", "model.layers.21.block_sparse_moe.experts.254.w3", "model.layers.21.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0007906198501586581, "dbits": 2415919104 } ] }, { "idx": 109, "layers": [ "model.layers.21.block_sparse_moe.experts.0.w2", "model.layers.21.block_sparse_moe.experts.1.w2", "model.layers.21.block_sparse_moe.experts.2.w2", "model.layers.21.block_sparse_moe.experts.3.w2", "model.layers.21.block_sparse_moe.experts.4.w2", "model.layers.21.block_sparse_moe.experts.5.w2", "model.layers.21.block_sparse_moe.experts.6.w2", "model.layers.21.block_sparse_moe.experts.7.w2", "model.layers.21.block_sparse_moe.experts.8.w2", "model.layers.21.block_sparse_moe.experts.9.w2", "model.layers.21.block_sparse_moe.experts.10.w2", "model.layers.21.block_sparse_moe.experts.11.w2", "model.layers.21.block_sparse_moe.experts.12.w2", "model.layers.21.block_sparse_moe.experts.13.w2", "model.layers.21.block_sparse_moe.experts.14.w2", "model.layers.21.block_sparse_moe.experts.15.w2", "model.layers.21.block_sparse_moe.experts.16.w2", "model.layers.21.block_sparse_moe.experts.17.w2", "model.layers.21.block_sparse_moe.experts.18.w2", "model.layers.21.block_sparse_moe.experts.19.w2", "model.layers.21.block_sparse_moe.experts.20.w2", "model.layers.21.block_sparse_moe.experts.21.w2", "model.layers.21.block_sparse_moe.experts.22.w2", "model.layers.21.block_sparse_moe.experts.23.w2", "model.layers.21.block_sparse_moe.experts.24.w2", "model.layers.21.block_sparse_moe.experts.25.w2", "model.layers.21.block_sparse_moe.experts.26.w2", "model.layers.21.block_sparse_moe.experts.27.w2", "model.layers.21.block_sparse_moe.experts.28.w2", "model.layers.21.block_sparse_moe.experts.29.w2", "model.layers.21.block_sparse_moe.experts.30.w2", "model.layers.21.block_sparse_moe.experts.31.w2", "model.layers.21.block_sparse_moe.experts.32.w2", "model.layers.21.block_sparse_moe.experts.33.w2", "model.layers.21.block_sparse_moe.experts.34.w2", "model.layers.21.block_sparse_moe.experts.35.w2", "model.layers.21.block_sparse_moe.experts.36.w2", "model.layers.21.block_sparse_moe.experts.37.w2", "model.layers.21.block_sparse_moe.experts.38.w2", "model.layers.21.block_sparse_moe.experts.39.w2", "model.layers.21.block_sparse_moe.experts.40.w2", "model.layers.21.block_sparse_moe.experts.41.w2", "model.layers.21.block_sparse_moe.experts.42.w2", "model.layers.21.block_sparse_moe.experts.43.w2", "model.layers.21.block_sparse_moe.experts.44.w2", "model.layers.21.block_sparse_moe.experts.45.w2", "model.layers.21.block_sparse_moe.experts.46.w2", "model.layers.21.block_sparse_moe.experts.47.w2", "model.layers.21.block_sparse_moe.experts.48.w2", "model.layers.21.block_sparse_moe.experts.49.w2", "model.layers.21.block_sparse_moe.experts.50.w2", "model.layers.21.block_sparse_moe.experts.51.w2", "model.layers.21.block_sparse_moe.experts.52.w2", "model.layers.21.block_sparse_moe.experts.53.w2", "model.layers.21.block_sparse_moe.experts.54.w2", "model.layers.21.block_sparse_moe.experts.55.w2", "model.layers.21.block_sparse_moe.experts.56.w2", "model.layers.21.block_sparse_moe.experts.57.w2", "model.layers.21.block_sparse_moe.experts.58.w2", "model.layers.21.block_sparse_moe.experts.59.w2", "model.layers.21.block_sparse_moe.experts.60.w2", "model.layers.21.block_sparse_moe.experts.61.w2", "model.layers.21.block_sparse_moe.experts.62.w2", "model.layers.21.block_sparse_moe.experts.63.w2", "model.layers.21.block_sparse_moe.experts.64.w2", "model.layers.21.block_sparse_moe.experts.65.w2", "model.layers.21.block_sparse_moe.experts.66.w2", "model.layers.21.block_sparse_moe.experts.67.w2", "model.layers.21.block_sparse_moe.experts.68.w2", "model.layers.21.block_sparse_moe.experts.69.w2", "model.layers.21.block_sparse_moe.experts.70.w2", "model.layers.21.block_sparse_moe.experts.71.w2", "model.layers.21.block_sparse_moe.experts.72.w2", "model.layers.21.block_sparse_moe.experts.73.w2", "model.layers.21.block_sparse_moe.experts.74.w2", "model.layers.21.block_sparse_moe.experts.75.w2", "model.layers.21.block_sparse_moe.experts.76.w2", "model.layers.21.block_sparse_moe.experts.77.w2", "model.layers.21.block_sparse_moe.experts.78.w2", "model.layers.21.block_sparse_moe.experts.79.w2", "model.layers.21.block_sparse_moe.experts.80.w2", "model.layers.21.block_sparse_moe.experts.81.w2", "model.layers.21.block_sparse_moe.experts.82.w2", "model.layers.21.block_sparse_moe.experts.83.w2", "model.layers.21.block_sparse_moe.experts.84.w2", "model.layers.21.block_sparse_moe.experts.85.w2", "model.layers.21.block_sparse_moe.experts.86.w2", "model.layers.21.block_sparse_moe.experts.87.w2", "model.layers.21.block_sparse_moe.experts.88.w2", "model.layers.21.block_sparse_moe.experts.89.w2", "model.layers.21.block_sparse_moe.experts.90.w2", "model.layers.21.block_sparse_moe.experts.91.w2", "model.layers.21.block_sparse_moe.experts.92.w2", "model.layers.21.block_sparse_moe.experts.93.w2", "model.layers.21.block_sparse_moe.experts.94.w2", "model.layers.21.block_sparse_moe.experts.95.w2", "model.layers.21.block_sparse_moe.experts.96.w2", "model.layers.21.block_sparse_moe.experts.97.w2", "model.layers.21.block_sparse_moe.experts.98.w2", "model.layers.21.block_sparse_moe.experts.99.w2", "model.layers.21.block_sparse_moe.experts.100.w2", "model.layers.21.block_sparse_moe.experts.101.w2", "model.layers.21.block_sparse_moe.experts.102.w2", "model.layers.21.block_sparse_moe.experts.103.w2", "model.layers.21.block_sparse_moe.experts.104.w2", "model.layers.21.block_sparse_moe.experts.105.w2", "model.layers.21.block_sparse_moe.experts.106.w2", "model.layers.21.block_sparse_moe.experts.107.w2", "model.layers.21.block_sparse_moe.experts.108.w2", "model.layers.21.block_sparse_moe.experts.109.w2", "model.layers.21.block_sparse_moe.experts.110.w2", "model.layers.21.block_sparse_moe.experts.111.w2", "model.layers.21.block_sparse_moe.experts.112.w2", "model.layers.21.block_sparse_moe.experts.113.w2", "model.layers.21.block_sparse_moe.experts.114.w2", "model.layers.21.block_sparse_moe.experts.115.w2", "model.layers.21.block_sparse_moe.experts.116.w2", "model.layers.21.block_sparse_moe.experts.117.w2", "model.layers.21.block_sparse_moe.experts.118.w2", "model.layers.21.block_sparse_moe.experts.119.w2", "model.layers.21.block_sparse_moe.experts.120.w2", "model.layers.21.block_sparse_moe.experts.121.w2", "model.layers.21.block_sparse_moe.experts.122.w2", "model.layers.21.block_sparse_moe.experts.123.w2", "model.layers.21.block_sparse_moe.experts.124.w2", "model.layers.21.block_sparse_moe.experts.125.w2", "model.layers.21.block_sparse_moe.experts.126.w2", "model.layers.21.block_sparse_moe.experts.127.w2", "model.layers.21.block_sparse_moe.experts.128.w2", "model.layers.21.block_sparse_moe.experts.129.w2", "model.layers.21.block_sparse_moe.experts.130.w2", "model.layers.21.block_sparse_moe.experts.131.w2", "model.layers.21.block_sparse_moe.experts.132.w2", "model.layers.21.block_sparse_moe.experts.133.w2", "model.layers.21.block_sparse_moe.experts.134.w2", "model.layers.21.block_sparse_moe.experts.135.w2", "model.layers.21.block_sparse_moe.experts.136.w2", "model.layers.21.block_sparse_moe.experts.137.w2", "model.layers.21.block_sparse_moe.experts.138.w2", "model.layers.21.block_sparse_moe.experts.139.w2", "model.layers.21.block_sparse_moe.experts.140.w2", "model.layers.21.block_sparse_moe.experts.141.w2", "model.layers.21.block_sparse_moe.experts.142.w2", "model.layers.21.block_sparse_moe.experts.143.w2", "model.layers.21.block_sparse_moe.experts.144.w2", "model.layers.21.block_sparse_moe.experts.145.w2", "model.layers.21.block_sparse_moe.experts.146.w2", "model.layers.21.block_sparse_moe.experts.147.w2", "model.layers.21.block_sparse_moe.experts.148.w2", "model.layers.21.block_sparse_moe.experts.149.w2", "model.layers.21.block_sparse_moe.experts.150.w2", "model.layers.21.block_sparse_moe.experts.151.w2", "model.layers.21.block_sparse_moe.experts.152.w2", "model.layers.21.block_sparse_moe.experts.153.w2", "model.layers.21.block_sparse_moe.experts.154.w2", "model.layers.21.block_sparse_moe.experts.155.w2", "model.layers.21.block_sparse_moe.experts.156.w2", "model.layers.21.block_sparse_moe.experts.157.w2", "model.layers.21.block_sparse_moe.experts.158.w2", "model.layers.21.block_sparse_moe.experts.159.w2", "model.layers.21.block_sparse_moe.experts.160.w2", "model.layers.21.block_sparse_moe.experts.161.w2", "model.layers.21.block_sparse_moe.experts.162.w2", "model.layers.21.block_sparse_moe.experts.163.w2", "model.layers.21.block_sparse_moe.experts.164.w2", "model.layers.21.block_sparse_moe.experts.165.w2", "model.layers.21.block_sparse_moe.experts.166.w2", "model.layers.21.block_sparse_moe.experts.167.w2", "model.layers.21.block_sparse_moe.experts.168.w2", "model.layers.21.block_sparse_moe.experts.169.w2", "model.layers.21.block_sparse_moe.experts.170.w2", "model.layers.21.block_sparse_moe.experts.171.w2", "model.layers.21.block_sparse_moe.experts.172.w2", "model.layers.21.block_sparse_moe.experts.173.w2", "model.layers.21.block_sparse_moe.experts.174.w2", "model.layers.21.block_sparse_moe.experts.175.w2", "model.layers.21.block_sparse_moe.experts.176.w2", "model.layers.21.block_sparse_moe.experts.177.w2", "model.layers.21.block_sparse_moe.experts.178.w2", "model.layers.21.block_sparse_moe.experts.179.w2", "model.layers.21.block_sparse_moe.experts.180.w2", "model.layers.21.block_sparse_moe.experts.181.w2", "model.layers.21.block_sparse_moe.experts.182.w2", "model.layers.21.block_sparse_moe.experts.183.w2", "model.layers.21.block_sparse_moe.experts.184.w2", "model.layers.21.block_sparse_moe.experts.185.w2", "model.layers.21.block_sparse_moe.experts.186.w2", "model.layers.21.block_sparse_moe.experts.187.w2", "model.layers.21.block_sparse_moe.experts.188.w2", "model.layers.21.block_sparse_moe.experts.189.w2", "model.layers.21.block_sparse_moe.experts.190.w2", "model.layers.21.block_sparse_moe.experts.191.w2", "model.layers.21.block_sparse_moe.experts.192.w2", "model.layers.21.block_sparse_moe.experts.193.w2", "model.layers.21.block_sparse_moe.experts.194.w2", "model.layers.21.block_sparse_moe.experts.195.w2", "model.layers.21.block_sparse_moe.experts.196.w2", "model.layers.21.block_sparse_moe.experts.197.w2", "model.layers.21.block_sparse_moe.experts.198.w2", "model.layers.21.block_sparse_moe.experts.199.w2", "model.layers.21.block_sparse_moe.experts.200.w2", "model.layers.21.block_sparse_moe.experts.201.w2", "model.layers.21.block_sparse_moe.experts.202.w2", "model.layers.21.block_sparse_moe.experts.203.w2", "model.layers.21.block_sparse_moe.experts.204.w2", "model.layers.21.block_sparse_moe.experts.205.w2", "model.layers.21.block_sparse_moe.experts.206.w2", "model.layers.21.block_sparse_moe.experts.207.w2", "model.layers.21.block_sparse_moe.experts.208.w2", "model.layers.21.block_sparse_moe.experts.209.w2", "model.layers.21.block_sparse_moe.experts.210.w2", "model.layers.21.block_sparse_moe.experts.211.w2", "model.layers.21.block_sparse_moe.experts.212.w2", "model.layers.21.block_sparse_moe.experts.213.w2", "model.layers.21.block_sparse_moe.experts.214.w2", "model.layers.21.block_sparse_moe.experts.215.w2", "model.layers.21.block_sparse_moe.experts.216.w2", "model.layers.21.block_sparse_moe.experts.217.w2", "model.layers.21.block_sparse_moe.experts.218.w2", "model.layers.21.block_sparse_moe.experts.219.w2", "model.layers.21.block_sparse_moe.experts.220.w2", "model.layers.21.block_sparse_moe.experts.221.w2", "model.layers.21.block_sparse_moe.experts.222.w2", "model.layers.21.block_sparse_moe.experts.223.w2", "model.layers.21.block_sparse_moe.experts.224.w2", "model.layers.21.block_sparse_moe.experts.225.w2", "model.layers.21.block_sparse_moe.experts.226.w2", "model.layers.21.block_sparse_moe.experts.227.w2", "model.layers.21.block_sparse_moe.experts.228.w2", "model.layers.21.block_sparse_moe.experts.229.w2", "model.layers.21.block_sparse_moe.experts.230.w2", "model.layers.21.block_sparse_moe.experts.231.w2", "model.layers.21.block_sparse_moe.experts.232.w2", "model.layers.21.block_sparse_moe.experts.233.w2", "model.layers.21.block_sparse_moe.experts.234.w2", "model.layers.21.block_sparse_moe.experts.235.w2", "model.layers.21.block_sparse_moe.experts.236.w2", "model.layers.21.block_sparse_moe.experts.237.w2", "model.layers.21.block_sparse_moe.experts.238.w2", "model.layers.21.block_sparse_moe.experts.239.w2", "model.layers.21.block_sparse_moe.experts.240.w2", "model.layers.21.block_sparse_moe.experts.241.w2", "model.layers.21.block_sparse_moe.experts.242.w2", "model.layers.21.block_sparse_moe.experts.243.w2", "model.layers.21.block_sparse_moe.experts.244.w2", "model.layers.21.block_sparse_moe.experts.245.w2", "model.layers.21.block_sparse_moe.experts.246.w2", "model.layers.21.block_sparse_moe.experts.247.w2", "model.layers.21.block_sparse_moe.experts.248.w2", "model.layers.21.block_sparse_moe.experts.249.w2", "model.layers.21.block_sparse_moe.experts.250.w2", "model.layers.21.block_sparse_moe.experts.251.w2", "model.layers.21.block_sparse_moe.experts.252.w2", "model.layers.21.block_sparse_moe.experts.253.w2", "model.layers.21.block_sparse_moe.experts.254.w2", "model.layers.21.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.000644032657146465, "dbits": 1207959552 } ] }, { "idx": 110, "layers": [ "model.layers.22.self_attn.q_proj" ], "candidates": [ { "dkld": 0.000879654660820961, "dbits": 18874368 } ] }, { "idx": 111, "layers": [ "model.layers.22.self_attn.k_proj", "model.layers.22.self_attn.v_proj" ], "candidates": [ { "dkld": -0.005359817296266556, "dbits": 6291456 } ] }, { "idx": 112, "layers": [ "model.layers.22.self_attn.o_proj" ], "candidates": [ { "dkld": 0.003640977665781975, "dbits": 18874368 } ] }, { "idx": 113, "layers": [ "model.layers.22.block_sparse_moe.experts.0.w1", "model.layers.22.block_sparse_moe.experts.1.w1", "model.layers.22.block_sparse_moe.experts.2.w1", "model.layers.22.block_sparse_moe.experts.3.w1", "model.layers.22.block_sparse_moe.experts.4.w1", "model.layers.22.block_sparse_moe.experts.5.w1", "model.layers.22.block_sparse_moe.experts.6.w1", "model.layers.22.block_sparse_moe.experts.7.w1", "model.layers.22.block_sparse_moe.experts.8.w1", "model.layers.22.block_sparse_moe.experts.9.w1", "model.layers.22.block_sparse_moe.experts.10.w1", "model.layers.22.block_sparse_moe.experts.11.w1", "model.layers.22.block_sparse_moe.experts.12.w1", "model.layers.22.block_sparse_moe.experts.13.w1", "model.layers.22.block_sparse_moe.experts.14.w1", "model.layers.22.block_sparse_moe.experts.15.w1", "model.layers.22.block_sparse_moe.experts.16.w1", "model.layers.22.block_sparse_moe.experts.17.w1", "model.layers.22.block_sparse_moe.experts.18.w1", "model.layers.22.block_sparse_moe.experts.19.w1", "model.layers.22.block_sparse_moe.experts.20.w1", "model.layers.22.block_sparse_moe.experts.21.w1", "model.layers.22.block_sparse_moe.experts.22.w1", "model.layers.22.block_sparse_moe.experts.23.w1", "model.layers.22.block_sparse_moe.experts.24.w1", "model.layers.22.block_sparse_moe.experts.25.w1", "model.layers.22.block_sparse_moe.experts.26.w1", "model.layers.22.block_sparse_moe.experts.27.w1", "model.layers.22.block_sparse_moe.experts.28.w1", "model.layers.22.block_sparse_moe.experts.29.w1", "model.layers.22.block_sparse_moe.experts.30.w1", "model.layers.22.block_sparse_moe.experts.31.w1", "model.layers.22.block_sparse_moe.experts.32.w1", "model.layers.22.block_sparse_moe.experts.33.w1", "model.layers.22.block_sparse_moe.experts.34.w1", "model.layers.22.block_sparse_moe.experts.35.w1", "model.layers.22.block_sparse_moe.experts.36.w1", "model.layers.22.block_sparse_moe.experts.37.w1", "model.layers.22.block_sparse_moe.experts.38.w1", "model.layers.22.block_sparse_moe.experts.39.w1", "model.layers.22.block_sparse_moe.experts.40.w1", "model.layers.22.block_sparse_moe.experts.41.w1", "model.layers.22.block_sparse_moe.experts.42.w1", "model.layers.22.block_sparse_moe.experts.43.w1", "model.layers.22.block_sparse_moe.experts.44.w1", "model.layers.22.block_sparse_moe.experts.45.w1", "model.layers.22.block_sparse_moe.experts.46.w1", "model.layers.22.block_sparse_moe.experts.47.w1", "model.layers.22.block_sparse_moe.experts.48.w1", "model.layers.22.block_sparse_moe.experts.49.w1", "model.layers.22.block_sparse_moe.experts.50.w1", "model.layers.22.block_sparse_moe.experts.51.w1", "model.layers.22.block_sparse_moe.experts.52.w1", "model.layers.22.block_sparse_moe.experts.53.w1", "model.layers.22.block_sparse_moe.experts.54.w1", "model.layers.22.block_sparse_moe.experts.55.w1", "model.layers.22.block_sparse_moe.experts.56.w1", "model.layers.22.block_sparse_moe.experts.57.w1", "model.layers.22.block_sparse_moe.experts.58.w1", "model.layers.22.block_sparse_moe.experts.59.w1", "model.layers.22.block_sparse_moe.experts.60.w1", "model.layers.22.block_sparse_moe.experts.61.w1", "model.layers.22.block_sparse_moe.experts.62.w1", "model.layers.22.block_sparse_moe.experts.63.w1", "model.layers.22.block_sparse_moe.experts.64.w1", "model.layers.22.block_sparse_moe.experts.65.w1", "model.layers.22.block_sparse_moe.experts.66.w1", "model.layers.22.block_sparse_moe.experts.67.w1", "model.layers.22.block_sparse_moe.experts.68.w1", "model.layers.22.block_sparse_moe.experts.69.w1", "model.layers.22.block_sparse_moe.experts.70.w1", "model.layers.22.block_sparse_moe.experts.71.w1", "model.layers.22.block_sparse_moe.experts.72.w1", "model.layers.22.block_sparse_moe.experts.73.w1", "model.layers.22.block_sparse_moe.experts.74.w1", "model.layers.22.block_sparse_moe.experts.75.w1", "model.layers.22.block_sparse_moe.experts.76.w1", "model.layers.22.block_sparse_moe.experts.77.w1", "model.layers.22.block_sparse_moe.experts.78.w1", "model.layers.22.block_sparse_moe.experts.79.w1", "model.layers.22.block_sparse_moe.experts.80.w1", "model.layers.22.block_sparse_moe.experts.81.w1", "model.layers.22.block_sparse_moe.experts.82.w1", "model.layers.22.block_sparse_moe.experts.83.w1", "model.layers.22.block_sparse_moe.experts.84.w1", "model.layers.22.block_sparse_moe.experts.85.w1", "model.layers.22.block_sparse_moe.experts.86.w1", "model.layers.22.block_sparse_moe.experts.87.w1", "model.layers.22.block_sparse_moe.experts.88.w1", "model.layers.22.block_sparse_moe.experts.89.w1", "model.layers.22.block_sparse_moe.experts.90.w1", "model.layers.22.block_sparse_moe.experts.91.w1", "model.layers.22.block_sparse_moe.experts.92.w1", "model.layers.22.block_sparse_moe.experts.93.w1", "model.layers.22.block_sparse_moe.experts.94.w1", "model.layers.22.block_sparse_moe.experts.95.w1", "model.layers.22.block_sparse_moe.experts.96.w1", "model.layers.22.block_sparse_moe.experts.97.w1", "model.layers.22.block_sparse_moe.experts.98.w1", "model.layers.22.block_sparse_moe.experts.99.w1", "model.layers.22.block_sparse_moe.experts.100.w1", "model.layers.22.block_sparse_moe.experts.101.w1", "model.layers.22.block_sparse_moe.experts.102.w1", "model.layers.22.block_sparse_moe.experts.103.w1", "model.layers.22.block_sparse_moe.experts.104.w1", "model.layers.22.block_sparse_moe.experts.105.w1", "model.layers.22.block_sparse_moe.experts.106.w1", "model.layers.22.block_sparse_moe.experts.107.w1", "model.layers.22.block_sparse_moe.experts.108.w1", "model.layers.22.block_sparse_moe.experts.109.w1", "model.layers.22.block_sparse_moe.experts.110.w1", "model.layers.22.block_sparse_moe.experts.111.w1", "model.layers.22.block_sparse_moe.experts.112.w1", "model.layers.22.block_sparse_moe.experts.113.w1", "model.layers.22.block_sparse_moe.experts.114.w1", "model.layers.22.block_sparse_moe.experts.115.w1", "model.layers.22.block_sparse_moe.experts.116.w1", "model.layers.22.block_sparse_moe.experts.117.w1", "model.layers.22.block_sparse_moe.experts.118.w1", "model.layers.22.block_sparse_moe.experts.119.w1", "model.layers.22.block_sparse_moe.experts.120.w1", "model.layers.22.block_sparse_moe.experts.121.w1", "model.layers.22.block_sparse_moe.experts.122.w1", "model.layers.22.block_sparse_moe.experts.123.w1", "model.layers.22.block_sparse_moe.experts.124.w1", "model.layers.22.block_sparse_moe.experts.125.w1", "model.layers.22.block_sparse_moe.experts.126.w1", "model.layers.22.block_sparse_moe.experts.127.w1", "model.layers.22.block_sparse_moe.experts.128.w1", "model.layers.22.block_sparse_moe.experts.129.w1", "model.layers.22.block_sparse_moe.experts.130.w1", "model.layers.22.block_sparse_moe.experts.131.w1", "model.layers.22.block_sparse_moe.experts.132.w1", "model.layers.22.block_sparse_moe.experts.133.w1", "model.layers.22.block_sparse_moe.experts.134.w1", "model.layers.22.block_sparse_moe.experts.135.w1", "model.layers.22.block_sparse_moe.experts.136.w1", "model.layers.22.block_sparse_moe.experts.137.w1", "model.layers.22.block_sparse_moe.experts.138.w1", "model.layers.22.block_sparse_moe.experts.139.w1", "model.layers.22.block_sparse_moe.experts.140.w1", "model.layers.22.block_sparse_moe.experts.141.w1", "model.layers.22.block_sparse_moe.experts.142.w1", "model.layers.22.block_sparse_moe.experts.143.w1", "model.layers.22.block_sparse_moe.experts.144.w1", "model.layers.22.block_sparse_moe.experts.145.w1", "model.layers.22.block_sparse_moe.experts.146.w1", "model.layers.22.block_sparse_moe.experts.147.w1", "model.layers.22.block_sparse_moe.experts.148.w1", "model.layers.22.block_sparse_moe.experts.149.w1", "model.layers.22.block_sparse_moe.experts.150.w1", "model.layers.22.block_sparse_moe.experts.151.w1", "model.layers.22.block_sparse_moe.experts.152.w1", "model.layers.22.block_sparse_moe.experts.153.w1", "model.layers.22.block_sparse_moe.experts.154.w1", "model.layers.22.block_sparse_moe.experts.155.w1", "model.layers.22.block_sparse_moe.experts.156.w1", "model.layers.22.block_sparse_moe.experts.157.w1", "model.layers.22.block_sparse_moe.experts.158.w1", "model.layers.22.block_sparse_moe.experts.159.w1", "model.layers.22.block_sparse_moe.experts.160.w1", "model.layers.22.block_sparse_moe.experts.161.w1", "model.layers.22.block_sparse_moe.experts.162.w1", "model.layers.22.block_sparse_moe.experts.163.w1", "model.layers.22.block_sparse_moe.experts.164.w1", "model.layers.22.block_sparse_moe.experts.165.w1", "model.layers.22.block_sparse_moe.experts.166.w1", "model.layers.22.block_sparse_moe.experts.167.w1", "model.layers.22.block_sparse_moe.experts.168.w1", "model.layers.22.block_sparse_moe.experts.169.w1", "model.layers.22.block_sparse_moe.experts.170.w1", "model.layers.22.block_sparse_moe.experts.171.w1", "model.layers.22.block_sparse_moe.experts.172.w1", "model.layers.22.block_sparse_moe.experts.173.w1", "model.layers.22.block_sparse_moe.experts.174.w1", "model.layers.22.block_sparse_moe.experts.175.w1", "model.layers.22.block_sparse_moe.experts.176.w1", "model.layers.22.block_sparse_moe.experts.177.w1", "model.layers.22.block_sparse_moe.experts.178.w1", "model.layers.22.block_sparse_moe.experts.179.w1", "model.layers.22.block_sparse_moe.experts.180.w1", "model.layers.22.block_sparse_moe.experts.181.w1", "model.layers.22.block_sparse_moe.experts.182.w1", "model.layers.22.block_sparse_moe.experts.183.w1", "model.layers.22.block_sparse_moe.experts.184.w1", "model.layers.22.block_sparse_moe.experts.185.w1", "model.layers.22.block_sparse_moe.experts.186.w1", "model.layers.22.block_sparse_moe.experts.187.w1", "model.layers.22.block_sparse_moe.experts.188.w1", "model.layers.22.block_sparse_moe.experts.189.w1", "model.layers.22.block_sparse_moe.experts.190.w1", "model.layers.22.block_sparse_moe.experts.191.w1", "model.layers.22.block_sparse_moe.experts.192.w1", "model.layers.22.block_sparse_moe.experts.193.w1", "model.layers.22.block_sparse_moe.experts.194.w1", "model.layers.22.block_sparse_moe.experts.195.w1", "model.layers.22.block_sparse_moe.experts.196.w1", "model.layers.22.block_sparse_moe.experts.197.w1", "model.layers.22.block_sparse_moe.experts.198.w1", "model.layers.22.block_sparse_moe.experts.199.w1", "model.layers.22.block_sparse_moe.experts.200.w1", "model.layers.22.block_sparse_moe.experts.201.w1", "model.layers.22.block_sparse_moe.experts.202.w1", "model.layers.22.block_sparse_moe.experts.203.w1", "model.layers.22.block_sparse_moe.experts.204.w1", "model.layers.22.block_sparse_moe.experts.205.w1", "model.layers.22.block_sparse_moe.experts.206.w1", "model.layers.22.block_sparse_moe.experts.207.w1", "model.layers.22.block_sparse_moe.experts.208.w1", "model.layers.22.block_sparse_moe.experts.209.w1", "model.layers.22.block_sparse_moe.experts.210.w1", "model.layers.22.block_sparse_moe.experts.211.w1", "model.layers.22.block_sparse_moe.experts.212.w1", "model.layers.22.block_sparse_moe.experts.213.w1", "model.layers.22.block_sparse_moe.experts.214.w1", "model.layers.22.block_sparse_moe.experts.215.w1", "model.layers.22.block_sparse_moe.experts.216.w1", "model.layers.22.block_sparse_moe.experts.217.w1", "model.layers.22.block_sparse_moe.experts.218.w1", "model.layers.22.block_sparse_moe.experts.219.w1", "model.layers.22.block_sparse_moe.experts.220.w1", "model.layers.22.block_sparse_moe.experts.221.w1", "model.layers.22.block_sparse_moe.experts.222.w1", "model.layers.22.block_sparse_moe.experts.223.w1", "model.layers.22.block_sparse_moe.experts.224.w1", "model.layers.22.block_sparse_moe.experts.225.w1", "model.layers.22.block_sparse_moe.experts.226.w1", "model.layers.22.block_sparse_moe.experts.227.w1", "model.layers.22.block_sparse_moe.experts.228.w1", "model.layers.22.block_sparse_moe.experts.229.w1", "model.layers.22.block_sparse_moe.experts.230.w1", "model.layers.22.block_sparse_moe.experts.231.w1", "model.layers.22.block_sparse_moe.experts.232.w1", "model.layers.22.block_sparse_moe.experts.233.w1", "model.layers.22.block_sparse_moe.experts.234.w1", "model.layers.22.block_sparse_moe.experts.235.w1", "model.layers.22.block_sparse_moe.experts.236.w1", "model.layers.22.block_sparse_moe.experts.237.w1", "model.layers.22.block_sparse_moe.experts.238.w1", "model.layers.22.block_sparse_moe.experts.239.w1", "model.layers.22.block_sparse_moe.experts.240.w1", "model.layers.22.block_sparse_moe.experts.241.w1", "model.layers.22.block_sparse_moe.experts.242.w1", "model.layers.22.block_sparse_moe.experts.243.w1", "model.layers.22.block_sparse_moe.experts.244.w1", "model.layers.22.block_sparse_moe.experts.245.w1", "model.layers.22.block_sparse_moe.experts.246.w1", "model.layers.22.block_sparse_moe.experts.247.w1", "model.layers.22.block_sparse_moe.experts.248.w1", "model.layers.22.block_sparse_moe.experts.249.w1", "model.layers.22.block_sparse_moe.experts.250.w1", "model.layers.22.block_sparse_moe.experts.251.w1", "model.layers.22.block_sparse_moe.experts.252.w1", "model.layers.22.block_sparse_moe.experts.253.w1", "model.layers.22.block_sparse_moe.experts.254.w1", "model.layers.22.block_sparse_moe.experts.255.w1", "model.layers.22.block_sparse_moe.experts.0.w3", "model.layers.22.block_sparse_moe.experts.1.w3", "model.layers.22.block_sparse_moe.experts.2.w3", "model.layers.22.block_sparse_moe.experts.3.w3", "model.layers.22.block_sparse_moe.experts.4.w3", "model.layers.22.block_sparse_moe.experts.5.w3", "model.layers.22.block_sparse_moe.experts.6.w3", "model.layers.22.block_sparse_moe.experts.7.w3", "model.layers.22.block_sparse_moe.experts.8.w3", "model.layers.22.block_sparse_moe.experts.9.w3", "model.layers.22.block_sparse_moe.experts.10.w3", "model.layers.22.block_sparse_moe.experts.11.w3", "model.layers.22.block_sparse_moe.experts.12.w3", "model.layers.22.block_sparse_moe.experts.13.w3", "model.layers.22.block_sparse_moe.experts.14.w3", "model.layers.22.block_sparse_moe.experts.15.w3", "model.layers.22.block_sparse_moe.experts.16.w3", "model.layers.22.block_sparse_moe.experts.17.w3", "model.layers.22.block_sparse_moe.experts.18.w3", "model.layers.22.block_sparse_moe.experts.19.w3", "model.layers.22.block_sparse_moe.experts.20.w3", "model.layers.22.block_sparse_moe.experts.21.w3", "model.layers.22.block_sparse_moe.experts.22.w3", "model.layers.22.block_sparse_moe.experts.23.w3", "model.layers.22.block_sparse_moe.experts.24.w3", "model.layers.22.block_sparse_moe.experts.25.w3", "model.layers.22.block_sparse_moe.experts.26.w3", "model.layers.22.block_sparse_moe.experts.27.w3", "model.layers.22.block_sparse_moe.experts.28.w3", "model.layers.22.block_sparse_moe.experts.29.w3", "model.layers.22.block_sparse_moe.experts.30.w3", "model.layers.22.block_sparse_moe.experts.31.w3", "model.layers.22.block_sparse_moe.experts.32.w3", "model.layers.22.block_sparse_moe.experts.33.w3", "model.layers.22.block_sparse_moe.experts.34.w3", "model.layers.22.block_sparse_moe.experts.35.w3", "model.layers.22.block_sparse_moe.experts.36.w3", "model.layers.22.block_sparse_moe.experts.37.w3", "model.layers.22.block_sparse_moe.experts.38.w3", "model.layers.22.block_sparse_moe.experts.39.w3", "model.layers.22.block_sparse_moe.experts.40.w3", "model.layers.22.block_sparse_moe.experts.41.w3", "model.layers.22.block_sparse_moe.experts.42.w3", "model.layers.22.block_sparse_moe.experts.43.w3", "model.layers.22.block_sparse_moe.experts.44.w3", "model.layers.22.block_sparse_moe.experts.45.w3", "model.layers.22.block_sparse_moe.experts.46.w3", "model.layers.22.block_sparse_moe.experts.47.w3", "model.layers.22.block_sparse_moe.experts.48.w3", "model.layers.22.block_sparse_moe.experts.49.w3", "model.layers.22.block_sparse_moe.experts.50.w3", "model.layers.22.block_sparse_moe.experts.51.w3", "model.layers.22.block_sparse_moe.experts.52.w3", "model.layers.22.block_sparse_moe.experts.53.w3", "model.layers.22.block_sparse_moe.experts.54.w3", "model.layers.22.block_sparse_moe.experts.55.w3", "model.layers.22.block_sparse_moe.experts.56.w3", "model.layers.22.block_sparse_moe.experts.57.w3", "model.layers.22.block_sparse_moe.experts.58.w3", "model.layers.22.block_sparse_moe.experts.59.w3", "model.layers.22.block_sparse_moe.experts.60.w3", "model.layers.22.block_sparse_moe.experts.61.w3", "model.layers.22.block_sparse_moe.experts.62.w3", "model.layers.22.block_sparse_moe.experts.63.w3", "model.layers.22.block_sparse_moe.experts.64.w3", "model.layers.22.block_sparse_moe.experts.65.w3", "model.layers.22.block_sparse_moe.experts.66.w3", "model.layers.22.block_sparse_moe.experts.67.w3", "model.layers.22.block_sparse_moe.experts.68.w3", "model.layers.22.block_sparse_moe.experts.69.w3", "model.layers.22.block_sparse_moe.experts.70.w3", "model.layers.22.block_sparse_moe.experts.71.w3", "model.layers.22.block_sparse_moe.experts.72.w3", "model.layers.22.block_sparse_moe.experts.73.w3", "model.layers.22.block_sparse_moe.experts.74.w3", "model.layers.22.block_sparse_moe.experts.75.w3", "model.layers.22.block_sparse_moe.experts.76.w3", "model.layers.22.block_sparse_moe.experts.77.w3", "model.layers.22.block_sparse_moe.experts.78.w3", "model.layers.22.block_sparse_moe.experts.79.w3", "model.layers.22.block_sparse_moe.experts.80.w3", "model.layers.22.block_sparse_moe.experts.81.w3", "model.layers.22.block_sparse_moe.experts.82.w3", "model.layers.22.block_sparse_moe.experts.83.w3", "model.layers.22.block_sparse_moe.experts.84.w3", "model.layers.22.block_sparse_moe.experts.85.w3", "model.layers.22.block_sparse_moe.experts.86.w3", "model.layers.22.block_sparse_moe.experts.87.w3", "model.layers.22.block_sparse_moe.experts.88.w3", "model.layers.22.block_sparse_moe.experts.89.w3", "model.layers.22.block_sparse_moe.experts.90.w3", "model.layers.22.block_sparse_moe.experts.91.w3", "model.layers.22.block_sparse_moe.experts.92.w3", "model.layers.22.block_sparse_moe.experts.93.w3", "model.layers.22.block_sparse_moe.experts.94.w3", "model.layers.22.block_sparse_moe.experts.95.w3", "model.layers.22.block_sparse_moe.experts.96.w3", "model.layers.22.block_sparse_moe.experts.97.w3", "model.layers.22.block_sparse_moe.experts.98.w3", "model.layers.22.block_sparse_moe.experts.99.w3", "model.layers.22.block_sparse_moe.experts.100.w3", "model.layers.22.block_sparse_moe.experts.101.w3", "model.layers.22.block_sparse_moe.experts.102.w3", "model.layers.22.block_sparse_moe.experts.103.w3", "model.layers.22.block_sparse_moe.experts.104.w3", "model.layers.22.block_sparse_moe.experts.105.w3", "model.layers.22.block_sparse_moe.experts.106.w3", "model.layers.22.block_sparse_moe.experts.107.w3", "model.layers.22.block_sparse_moe.experts.108.w3", "model.layers.22.block_sparse_moe.experts.109.w3", "model.layers.22.block_sparse_moe.experts.110.w3", "model.layers.22.block_sparse_moe.experts.111.w3", "model.layers.22.block_sparse_moe.experts.112.w3", "model.layers.22.block_sparse_moe.experts.113.w3", "model.layers.22.block_sparse_moe.experts.114.w3", "model.layers.22.block_sparse_moe.experts.115.w3", "model.layers.22.block_sparse_moe.experts.116.w3", "model.layers.22.block_sparse_moe.experts.117.w3", "model.layers.22.block_sparse_moe.experts.118.w3", "model.layers.22.block_sparse_moe.experts.119.w3", "model.layers.22.block_sparse_moe.experts.120.w3", "model.layers.22.block_sparse_moe.experts.121.w3", "model.layers.22.block_sparse_moe.experts.122.w3", "model.layers.22.block_sparse_moe.experts.123.w3", "model.layers.22.block_sparse_moe.experts.124.w3", "model.layers.22.block_sparse_moe.experts.125.w3", "model.layers.22.block_sparse_moe.experts.126.w3", "model.layers.22.block_sparse_moe.experts.127.w3", "model.layers.22.block_sparse_moe.experts.128.w3", "model.layers.22.block_sparse_moe.experts.129.w3", "model.layers.22.block_sparse_moe.experts.130.w3", "model.layers.22.block_sparse_moe.experts.131.w3", "model.layers.22.block_sparse_moe.experts.132.w3", "model.layers.22.block_sparse_moe.experts.133.w3", "model.layers.22.block_sparse_moe.experts.134.w3", "model.layers.22.block_sparse_moe.experts.135.w3", "model.layers.22.block_sparse_moe.experts.136.w3", "model.layers.22.block_sparse_moe.experts.137.w3", "model.layers.22.block_sparse_moe.experts.138.w3", "model.layers.22.block_sparse_moe.experts.139.w3", "model.layers.22.block_sparse_moe.experts.140.w3", "model.layers.22.block_sparse_moe.experts.141.w3", "model.layers.22.block_sparse_moe.experts.142.w3", "model.layers.22.block_sparse_moe.experts.143.w3", "model.layers.22.block_sparse_moe.experts.144.w3", "model.layers.22.block_sparse_moe.experts.145.w3", "model.layers.22.block_sparse_moe.experts.146.w3", "model.layers.22.block_sparse_moe.experts.147.w3", "model.layers.22.block_sparse_moe.experts.148.w3", "model.layers.22.block_sparse_moe.experts.149.w3", "model.layers.22.block_sparse_moe.experts.150.w3", "model.layers.22.block_sparse_moe.experts.151.w3", "model.layers.22.block_sparse_moe.experts.152.w3", "model.layers.22.block_sparse_moe.experts.153.w3", "model.layers.22.block_sparse_moe.experts.154.w3", "model.layers.22.block_sparse_moe.experts.155.w3", "model.layers.22.block_sparse_moe.experts.156.w3", "model.layers.22.block_sparse_moe.experts.157.w3", "model.layers.22.block_sparse_moe.experts.158.w3", "model.layers.22.block_sparse_moe.experts.159.w3", "model.layers.22.block_sparse_moe.experts.160.w3", "model.layers.22.block_sparse_moe.experts.161.w3", "model.layers.22.block_sparse_moe.experts.162.w3", "model.layers.22.block_sparse_moe.experts.163.w3", "model.layers.22.block_sparse_moe.experts.164.w3", "model.layers.22.block_sparse_moe.experts.165.w3", "model.layers.22.block_sparse_moe.experts.166.w3", "model.layers.22.block_sparse_moe.experts.167.w3", "model.layers.22.block_sparse_moe.experts.168.w3", "model.layers.22.block_sparse_moe.experts.169.w3", "model.layers.22.block_sparse_moe.experts.170.w3", "model.layers.22.block_sparse_moe.experts.171.w3", "model.layers.22.block_sparse_moe.experts.172.w3", "model.layers.22.block_sparse_moe.experts.173.w3", "model.layers.22.block_sparse_moe.experts.174.w3", "model.layers.22.block_sparse_moe.experts.175.w3", "model.layers.22.block_sparse_moe.experts.176.w3", "model.layers.22.block_sparse_moe.experts.177.w3", "model.layers.22.block_sparse_moe.experts.178.w3", "model.layers.22.block_sparse_moe.experts.179.w3", "model.layers.22.block_sparse_moe.experts.180.w3", "model.layers.22.block_sparse_moe.experts.181.w3", "model.layers.22.block_sparse_moe.experts.182.w3", "model.layers.22.block_sparse_moe.experts.183.w3", "model.layers.22.block_sparse_moe.experts.184.w3", "model.layers.22.block_sparse_moe.experts.185.w3", "model.layers.22.block_sparse_moe.experts.186.w3", "model.layers.22.block_sparse_moe.experts.187.w3", "model.layers.22.block_sparse_moe.experts.188.w3", "model.layers.22.block_sparse_moe.experts.189.w3", "model.layers.22.block_sparse_moe.experts.190.w3", "model.layers.22.block_sparse_moe.experts.191.w3", "model.layers.22.block_sparse_moe.experts.192.w3", "model.layers.22.block_sparse_moe.experts.193.w3", "model.layers.22.block_sparse_moe.experts.194.w3", "model.layers.22.block_sparse_moe.experts.195.w3", "model.layers.22.block_sparse_moe.experts.196.w3", "model.layers.22.block_sparse_moe.experts.197.w3", "model.layers.22.block_sparse_moe.experts.198.w3", "model.layers.22.block_sparse_moe.experts.199.w3", "model.layers.22.block_sparse_moe.experts.200.w3", "model.layers.22.block_sparse_moe.experts.201.w3", "model.layers.22.block_sparse_moe.experts.202.w3", "model.layers.22.block_sparse_moe.experts.203.w3", "model.layers.22.block_sparse_moe.experts.204.w3", "model.layers.22.block_sparse_moe.experts.205.w3", "model.layers.22.block_sparse_moe.experts.206.w3", "model.layers.22.block_sparse_moe.experts.207.w3", "model.layers.22.block_sparse_moe.experts.208.w3", "model.layers.22.block_sparse_moe.experts.209.w3", "model.layers.22.block_sparse_moe.experts.210.w3", "model.layers.22.block_sparse_moe.experts.211.w3", "model.layers.22.block_sparse_moe.experts.212.w3", "model.layers.22.block_sparse_moe.experts.213.w3", "model.layers.22.block_sparse_moe.experts.214.w3", "model.layers.22.block_sparse_moe.experts.215.w3", "model.layers.22.block_sparse_moe.experts.216.w3", "model.layers.22.block_sparse_moe.experts.217.w3", "model.layers.22.block_sparse_moe.experts.218.w3", "model.layers.22.block_sparse_moe.experts.219.w3", "model.layers.22.block_sparse_moe.experts.220.w3", "model.layers.22.block_sparse_moe.experts.221.w3", "model.layers.22.block_sparse_moe.experts.222.w3", "model.layers.22.block_sparse_moe.experts.223.w3", "model.layers.22.block_sparse_moe.experts.224.w3", "model.layers.22.block_sparse_moe.experts.225.w3", "model.layers.22.block_sparse_moe.experts.226.w3", "model.layers.22.block_sparse_moe.experts.227.w3", "model.layers.22.block_sparse_moe.experts.228.w3", "model.layers.22.block_sparse_moe.experts.229.w3", "model.layers.22.block_sparse_moe.experts.230.w3", "model.layers.22.block_sparse_moe.experts.231.w3", "model.layers.22.block_sparse_moe.experts.232.w3", "model.layers.22.block_sparse_moe.experts.233.w3", "model.layers.22.block_sparse_moe.experts.234.w3", "model.layers.22.block_sparse_moe.experts.235.w3", "model.layers.22.block_sparse_moe.experts.236.w3", "model.layers.22.block_sparse_moe.experts.237.w3", "model.layers.22.block_sparse_moe.experts.238.w3", "model.layers.22.block_sparse_moe.experts.239.w3", "model.layers.22.block_sparse_moe.experts.240.w3", "model.layers.22.block_sparse_moe.experts.241.w3", "model.layers.22.block_sparse_moe.experts.242.w3", "model.layers.22.block_sparse_moe.experts.243.w3", "model.layers.22.block_sparse_moe.experts.244.w3", "model.layers.22.block_sparse_moe.experts.245.w3", "model.layers.22.block_sparse_moe.experts.246.w3", "model.layers.22.block_sparse_moe.experts.247.w3", "model.layers.22.block_sparse_moe.experts.248.w3", "model.layers.22.block_sparse_moe.experts.249.w3", "model.layers.22.block_sparse_moe.experts.250.w3", "model.layers.22.block_sparse_moe.experts.251.w3", "model.layers.22.block_sparse_moe.experts.252.w3", "model.layers.22.block_sparse_moe.experts.253.w3", "model.layers.22.block_sparse_moe.experts.254.w3", "model.layers.22.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0014086242765188328, "dbits": 2415919104 } ] }, { "idx": 114, "layers": [ "model.layers.22.block_sparse_moe.experts.0.w2", "model.layers.22.block_sparse_moe.experts.1.w2", "model.layers.22.block_sparse_moe.experts.2.w2", "model.layers.22.block_sparse_moe.experts.3.w2", "model.layers.22.block_sparse_moe.experts.4.w2", "model.layers.22.block_sparse_moe.experts.5.w2", "model.layers.22.block_sparse_moe.experts.6.w2", "model.layers.22.block_sparse_moe.experts.7.w2", "model.layers.22.block_sparse_moe.experts.8.w2", "model.layers.22.block_sparse_moe.experts.9.w2", "model.layers.22.block_sparse_moe.experts.10.w2", "model.layers.22.block_sparse_moe.experts.11.w2", "model.layers.22.block_sparse_moe.experts.12.w2", "model.layers.22.block_sparse_moe.experts.13.w2", "model.layers.22.block_sparse_moe.experts.14.w2", "model.layers.22.block_sparse_moe.experts.15.w2", "model.layers.22.block_sparse_moe.experts.16.w2", "model.layers.22.block_sparse_moe.experts.17.w2", "model.layers.22.block_sparse_moe.experts.18.w2", "model.layers.22.block_sparse_moe.experts.19.w2", "model.layers.22.block_sparse_moe.experts.20.w2", "model.layers.22.block_sparse_moe.experts.21.w2", "model.layers.22.block_sparse_moe.experts.22.w2", "model.layers.22.block_sparse_moe.experts.23.w2", "model.layers.22.block_sparse_moe.experts.24.w2", "model.layers.22.block_sparse_moe.experts.25.w2", "model.layers.22.block_sparse_moe.experts.26.w2", "model.layers.22.block_sparse_moe.experts.27.w2", "model.layers.22.block_sparse_moe.experts.28.w2", "model.layers.22.block_sparse_moe.experts.29.w2", "model.layers.22.block_sparse_moe.experts.30.w2", "model.layers.22.block_sparse_moe.experts.31.w2", "model.layers.22.block_sparse_moe.experts.32.w2", "model.layers.22.block_sparse_moe.experts.33.w2", "model.layers.22.block_sparse_moe.experts.34.w2", "model.layers.22.block_sparse_moe.experts.35.w2", "model.layers.22.block_sparse_moe.experts.36.w2", "model.layers.22.block_sparse_moe.experts.37.w2", "model.layers.22.block_sparse_moe.experts.38.w2", "model.layers.22.block_sparse_moe.experts.39.w2", "model.layers.22.block_sparse_moe.experts.40.w2", "model.layers.22.block_sparse_moe.experts.41.w2", "model.layers.22.block_sparse_moe.experts.42.w2", "model.layers.22.block_sparse_moe.experts.43.w2", "model.layers.22.block_sparse_moe.experts.44.w2", "model.layers.22.block_sparse_moe.experts.45.w2", "model.layers.22.block_sparse_moe.experts.46.w2", "model.layers.22.block_sparse_moe.experts.47.w2", "model.layers.22.block_sparse_moe.experts.48.w2", "model.layers.22.block_sparse_moe.experts.49.w2", "model.layers.22.block_sparse_moe.experts.50.w2", "model.layers.22.block_sparse_moe.experts.51.w2", "model.layers.22.block_sparse_moe.experts.52.w2", "model.layers.22.block_sparse_moe.experts.53.w2", "model.layers.22.block_sparse_moe.experts.54.w2", "model.layers.22.block_sparse_moe.experts.55.w2", "model.layers.22.block_sparse_moe.experts.56.w2", "model.layers.22.block_sparse_moe.experts.57.w2", "model.layers.22.block_sparse_moe.experts.58.w2", "model.layers.22.block_sparse_moe.experts.59.w2", "model.layers.22.block_sparse_moe.experts.60.w2", "model.layers.22.block_sparse_moe.experts.61.w2", "model.layers.22.block_sparse_moe.experts.62.w2", "model.layers.22.block_sparse_moe.experts.63.w2", "model.layers.22.block_sparse_moe.experts.64.w2", "model.layers.22.block_sparse_moe.experts.65.w2", "model.layers.22.block_sparse_moe.experts.66.w2", "model.layers.22.block_sparse_moe.experts.67.w2", "model.layers.22.block_sparse_moe.experts.68.w2", "model.layers.22.block_sparse_moe.experts.69.w2", "model.layers.22.block_sparse_moe.experts.70.w2", "model.layers.22.block_sparse_moe.experts.71.w2", "model.layers.22.block_sparse_moe.experts.72.w2", "model.layers.22.block_sparse_moe.experts.73.w2", "model.layers.22.block_sparse_moe.experts.74.w2", "model.layers.22.block_sparse_moe.experts.75.w2", "model.layers.22.block_sparse_moe.experts.76.w2", "model.layers.22.block_sparse_moe.experts.77.w2", "model.layers.22.block_sparse_moe.experts.78.w2", "model.layers.22.block_sparse_moe.experts.79.w2", "model.layers.22.block_sparse_moe.experts.80.w2", "model.layers.22.block_sparse_moe.experts.81.w2", "model.layers.22.block_sparse_moe.experts.82.w2", "model.layers.22.block_sparse_moe.experts.83.w2", "model.layers.22.block_sparse_moe.experts.84.w2", "model.layers.22.block_sparse_moe.experts.85.w2", "model.layers.22.block_sparse_moe.experts.86.w2", "model.layers.22.block_sparse_moe.experts.87.w2", "model.layers.22.block_sparse_moe.experts.88.w2", "model.layers.22.block_sparse_moe.experts.89.w2", "model.layers.22.block_sparse_moe.experts.90.w2", "model.layers.22.block_sparse_moe.experts.91.w2", "model.layers.22.block_sparse_moe.experts.92.w2", "model.layers.22.block_sparse_moe.experts.93.w2", "model.layers.22.block_sparse_moe.experts.94.w2", "model.layers.22.block_sparse_moe.experts.95.w2", "model.layers.22.block_sparse_moe.experts.96.w2", "model.layers.22.block_sparse_moe.experts.97.w2", "model.layers.22.block_sparse_moe.experts.98.w2", "model.layers.22.block_sparse_moe.experts.99.w2", "model.layers.22.block_sparse_moe.experts.100.w2", "model.layers.22.block_sparse_moe.experts.101.w2", "model.layers.22.block_sparse_moe.experts.102.w2", "model.layers.22.block_sparse_moe.experts.103.w2", "model.layers.22.block_sparse_moe.experts.104.w2", "model.layers.22.block_sparse_moe.experts.105.w2", "model.layers.22.block_sparse_moe.experts.106.w2", "model.layers.22.block_sparse_moe.experts.107.w2", "model.layers.22.block_sparse_moe.experts.108.w2", "model.layers.22.block_sparse_moe.experts.109.w2", "model.layers.22.block_sparse_moe.experts.110.w2", "model.layers.22.block_sparse_moe.experts.111.w2", "model.layers.22.block_sparse_moe.experts.112.w2", "model.layers.22.block_sparse_moe.experts.113.w2", "model.layers.22.block_sparse_moe.experts.114.w2", "model.layers.22.block_sparse_moe.experts.115.w2", "model.layers.22.block_sparse_moe.experts.116.w2", "model.layers.22.block_sparse_moe.experts.117.w2", "model.layers.22.block_sparse_moe.experts.118.w2", "model.layers.22.block_sparse_moe.experts.119.w2", "model.layers.22.block_sparse_moe.experts.120.w2", "model.layers.22.block_sparse_moe.experts.121.w2", "model.layers.22.block_sparse_moe.experts.122.w2", "model.layers.22.block_sparse_moe.experts.123.w2", "model.layers.22.block_sparse_moe.experts.124.w2", "model.layers.22.block_sparse_moe.experts.125.w2", "model.layers.22.block_sparse_moe.experts.126.w2", "model.layers.22.block_sparse_moe.experts.127.w2", "model.layers.22.block_sparse_moe.experts.128.w2", "model.layers.22.block_sparse_moe.experts.129.w2", "model.layers.22.block_sparse_moe.experts.130.w2", "model.layers.22.block_sparse_moe.experts.131.w2", "model.layers.22.block_sparse_moe.experts.132.w2", "model.layers.22.block_sparse_moe.experts.133.w2", "model.layers.22.block_sparse_moe.experts.134.w2", "model.layers.22.block_sparse_moe.experts.135.w2", "model.layers.22.block_sparse_moe.experts.136.w2", "model.layers.22.block_sparse_moe.experts.137.w2", "model.layers.22.block_sparse_moe.experts.138.w2", "model.layers.22.block_sparse_moe.experts.139.w2", "model.layers.22.block_sparse_moe.experts.140.w2", "model.layers.22.block_sparse_moe.experts.141.w2", "model.layers.22.block_sparse_moe.experts.142.w2", "model.layers.22.block_sparse_moe.experts.143.w2", "model.layers.22.block_sparse_moe.experts.144.w2", "model.layers.22.block_sparse_moe.experts.145.w2", "model.layers.22.block_sparse_moe.experts.146.w2", "model.layers.22.block_sparse_moe.experts.147.w2", "model.layers.22.block_sparse_moe.experts.148.w2", "model.layers.22.block_sparse_moe.experts.149.w2", "model.layers.22.block_sparse_moe.experts.150.w2", "model.layers.22.block_sparse_moe.experts.151.w2", "model.layers.22.block_sparse_moe.experts.152.w2", "model.layers.22.block_sparse_moe.experts.153.w2", "model.layers.22.block_sparse_moe.experts.154.w2", "model.layers.22.block_sparse_moe.experts.155.w2", "model.layers.22.block_sparse_moe.experts.156.w2", "model.layers.22.block_sparse_moe.experts.157.w2", "model.layers.22.block_sparse_moe.experts.158.w2", "model.layers.22.block_sparse_moe.experts.159.w2", "model.layers.22.block_sparse_moe.experts.160.w2", "model.layers.22.block_sparse_moe.experts.161.w2", "model.layers.22.block_sparse_moe.experts.162.w2", "model.layers.22.block_sparse_moe.experts.163.w2", "model.layers.22.block_sparse_moe.experts.164.w2", "model.layers.22.block_sparse_moe.experts.165.w2", "model.layers.22.block_sparse_moe.experts.166.w2", "model.layers.22.block_sparse_moe.experts.167.w2", "model.layers.22.block_sparse_moe.experts.168.w2", "model.layers.22.block_sparse_moe.experts.169.w2", "model.layers.22.block_sparse_moe.experts.170.w2", "model.layers.22.block_sparse_moe.experts.171.w2", "model.layers.22.block_sparse_moe.experts.172.w2", "model.layers.22.block_sparse_moe.experts.173.w2", "model.layers.22.block_sparse_moe.experts.174.w2", "model.layers.22.block_sparse_moe.experts.175.w2", "model.layers.22.block_sparse_moe.experts.176.w2", "model.layers.22.block_sparse_moe.experts.177.w2", "model.layers.22.block_sparse_moe.experts.178.w2", "model.layers.22.block_sparse_moe.experts.179.w2", "model.layers.22.block_sparse_moe.experts.180.w2", "model.layers.22.block_sparse_moe.experts.181.w2", "model.layers.22.block_sparse_moe.experts.182.w2", "model.layers.22.block_sparse_moe.experts.183.w2", "model.layers.22.block_sparse_moe.experts.184.w2", "model.layers.22.block_sparse_moe.experts.185.w2", "model.layers.22.block_sparse_moe.experts.186.w2", "model.layers.22.block_sparse_moe.experts.187.w2", "model.layers.22.block_sparse_moe.experts.188.w2", "model.layers.22.block_sparse_moe.experts.189.w2", "model.layers.22.block_sparse_moe.experts.190.w2", "model.layers.22.block_sparse_moe.experts.191.w2", "model.layers.22.block_sparse_moe.experts.192.w2", "model.layers.22.block_sparse_moe.experts.193.w2", "model.layers.22.block_sparse_moe.experts.194.w2", "model.layers.22.block_sparse_moe.experts.195.w2", "model.layers.22.block_sparse_moe.experts.196.w2", "model.layers.22.block_sparse_moe.experts.197.w2", "model.layers.22.block_sparse_moe.experts.198.w2", "model.layers.22.block_sparse_moe.experts.199.w2", "model.layers.22.block_sparse_moe.experts.200.w2", "model.layers.22.block_sparse_moe.experts.201.w2", "model.layers.22.block_sparse_moe.experts.202.w2", "model.layers.22.block_sparse_moe.experts.203.w2", "model.layers.22.block_sparse_moe.experts.204.w2", "model.layers.22.block_sparse_moe.experts.205.w2", "model.layers.22.block_sparse_moe.experts.206.w2", "model.layers.22.block_sparse_moe.experts.207.w2", "model.layers.22.block_sparse_moe.experts.208.w2", "model.layers.22.block_sparse_moe.experts.209.w2", "model.layers.22.block_sparse_moe.experts.210.w2", "model.layers.22.block_sparse_moe.experts.211.w2", "model.layers.22.block_sparse_moe.experts.212.w2", "model.layers.22.block_sparse_moe.experts.213.w2", "model.layers.22.block_sparse_moe.experts.214.w2", "model.layers.22.block_sparse_moe.experts.215.w2", "model.layers.22.block_sparse_moe.experts.216.w2", "model.layers.22.block_sparse_moe.experts.217.w2", "model.layers.22.block_sparse_moe.experts.218.w2", "model.layers.22.block_sparse_moe.experts.219.w2", "model.layers.22.block_sparse_moe.experts.220.w2", "model.layers.22.block_sparse_moe.experts.221.w2", "model.layers.22.block_sparse_moe.experts.222.w2", "model.layers.22.block_sparse_moe.experts.223.w2", "model.layers.22.block_sparse_moe.experts.224.w2", "model.layers.22.block_sparse_moe.experts.225.w2", "model.layers.22.block_sparse_moe.experts.226.w2", "model.layers.22.block_sparse_moe.experts.227.w2", "model.layers.22.block_sparse_moe.experts.228.w2", "model.layers.22.block_sparse_moe.experts.229.w2", "model.layers.22.block_sparse_moe.experts.230.w2", "model.layers.22.block_sparse_moe.experts.231.w2", "model.layers.22.block_sparse_moe.experts.232.w2", "model.layers.22.block_sparse_moe.experts.233.w2", "model.layers.22.block_sparse_moe.experts.234.w2", "model.layers.22.block_sparse_moe.experts.235.w2", "model.layers.22.block_sparse_moe.experts.236.w2", "model.layers.22.block_sparse_moe.experts.237.w2", "model.layers.22.block_sparse_moe.experts.238.w2", "model.layers.22.block_sparse_moe.experts.239.w2", "model.layers.22.block_sparse_moe.experts.240.w2", "model.layers.22.block_sparse_moe.experts.241.w2", "model.layers.22.block_sparse_moe.experts.242.w2", "model.layers.22.block_sparse_moe.experts.243.w2", "model.layers.22.block_sparse_moe.experts.244.w2", "model.layers.22.block_sparse_moe.experts.245.w2", "model.layers.22.block_sparse_moe.experts.246.w2", "model.layers.22.block_sparse_moe.experts.247.w2", "model.layers.22.block_sparse_moe.experts.248.w2", "model.layers.22.block_sparse_moe.experts.249.w2", "model.layers.22.block_sparse_moe.experts.250.w2", "model.layers.22.block_sparse_moe.experts.251.w2", "model.layers.22.block_sparse_moe.experts.252.w2", "model.layers.22.block_sparse_moe.experts.253.w2", "model.layers.22.block_sparse_moe.experts.254.w2", "model.layers.22.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00010044090449806697, "dbits": 1207959552 } ] }, { "idx": 115, "layers": [ "model.layers.23.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0004732217639684677, "dbits": 18874368 } ] }, { "idx": 116, "layers": [ "model.layers.23.self_attn.k_proj", "model.layers.23.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0017716627568006849, "dbits": 6291456 } ] }, { "idx": 117, "layers": [ "model.layers.23.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0034115478396415377, "dbits": 18874368 } ] }, { "idx": 118, "layers": [ "model.layers.23.block_sparse_moe.experts.0.w1", "model.layers.23.block_sparse_moe.experts.1.w1", "model.layers.23.block_sparse_moe.experts.2.w1", "model.layers.23.block_sparse_moe.experts.3.w1", "model.layers.23.block_sparse_moe.experts.4.w1", "model.layers.23.block_sparse_moe.experts.5.w1", "model.layers.23.block_sparse_moe.experts.6.w1", "model.layers.23.block_sparse_moe.experts.7.w1", "model.layers.23.block_sparse_moe.experts.8.w1", "model.layers.23.block_sparse_moe.experts.9.w1", "model.layers.23.block_sparse_moe.experts.10.w1", "model.layers.23.block_sparse_moe.experts.11.w1", "model.layers.23.block_sparse_moe.experts.12.w1", "model.layers.23.block_sparse_moe.experts.13.w1", "model.layers.23.block_sparse_moe.experts.14.w1", "model.layers.23.block_sparse_moe.experts.15.w1", "model.layers.23.block_sparse_moe.experts.16.w1", "model.layers.23.block_sparse_moe.experts.17.w1", "model.layers.23.block_sparse_moe.experts.18.w1", "model.layers.23.block_sparse_moe.experts.19.w1", "model.layers.23.block_sparse_moe.experts.20.w1", "model.layers.23.block_sparse_moe.experts.21.w1", "model.layers.23.block_sparse_moe.experts.22.w1", "model.layers.23.block_sparse_moe.experts.23.w1", "model.layers.23.block_sparse_moe.experts.24.w1", "model.layers.23.block_sparse_moe.experts.25.w1", "model.layers.23.block_sparse_moe.experts.26.w1", "model.layers.23.block_sparse_moe.experts.27.w1", "model.layers.23.block_sparse_moe.experts.28.w1", "model.layers.23.block_sparse_moe.experts.29.w1", "model.layers.23.block_sparse_moe.experts.30.w1", "model.layers.23.block_sparse_moe.experts.31.w1", "model.layers.23.block_sparse_moe.experts.32.w1", "model.layers.23.block_sparse_moe.experts.33.w1", "model.layers.23.block_sparse_moe.experts.34.w1", "model.layers.23.block_sparse_moe.experts.35.w1", "model.layers.23.block_sparse_moe.experts.36.w1", "model.layers.23.block_sparse_moe.experts.37.w1", "model.layers.23.block_sparse_moe.experts.38.w1", "model.layers.23.block_sparse_moe.experts.39.w1", "model.layers.23.block_sparse_moe.experts.40.w1", "model.layers.23.block_sparse_moe.experts.41.w1", "model.layers.23.block_sparse_moe.experts.42.w1", "model.layers.23.block_sparse_moe.experts.43.w1", "model.layers.23.block_sparse_moe.experts.44.w1", "model.layers.23.block_sparse_moe.experts.45.w1", "model.layers.23.block_sparse_moe.experts.46.w1", "model.layers.23.block_sparse_moe.experts.47.w1", "model.layers.23.block_sparse_moe.experts.48.w1", "model.layers.23.block_sparse_moe.experts.49.w1", "model.layers.23.block_sparse_moe.experts.50.w1", "model.layers.23.block_sparse_moe.experts.51.w1", "model.layers.23.block_sparse_moe.experts.52.w1", "model.layers.23.block_sparse_moe.experts.53.w1", "model.layers.23.block_sparse_moe.experts.54.w1", "model.layers.23.block_sparse_moe.experts.55.w1", "model.layers.23.block_sparse_moe.experts.56.w1", "model.layers.23.block_sparse_moe.experts.57.w1", "model.layers.23.block_sparse_moe.experts.58.w1", "model.layers.23.block_sparse_moe.experts.59.w1", "model.layers.23.block_sparse_moe.experts.60.w1", "model.layers.23.block_sparse_moe.experts.61.w1", "model.layers.23.block_sparse_moe.experts.62.w1", "model.layers.23.block_sparse_moe.experts.63.w1", "model.layers.23.block_sparse_moe.experts.64.w1", "model.layers.23.block_sparse_moe.experts.65.w1", "model.layers.23.block_sparse_moe.experts.66.w1", "model.layers.23.block_sparse_moe.experts.67.w1", "model.layers.23.block_sparse_moe.experts.68.w1", "model.layers.23.block_sparse_moe.experts.69.w1", "model.layers.23.block_sparse_moe.experts.70.w1", "model.layers.23.block_sparse_moe.experts.71.w1", "model.layers.23.block_sparse_moe.experts.72.w1", "model.layers.23.block_sparse_moe.experts.73.w1", "model.layers.23.block_sparse_moe.experts.74.w1", "model.layers.23.block_sparse_moe.experts.75.w1", "model.layers.23.block_sparse_moe.experts.76.w1", "model.layers.23.block_sparse_moe.experts.77.w1", "model.layers.23.block_sparse_moe.experts.78.w1", "model.layers.23.block_sparse_moe.experts.79.w1", "model.layers.23.block_sparse_moe.experts.80.w1", "model.layers.23.block_sparse_moe.experts.81.w1", "model.layers.23.block_sparse_moe.experts.82.w1", "model.layers.23.block_sparse_moe.experts.83.w1", "model.layers.23.block_sparse_moe.experts.84.w1", "model.layers.23.block_sparse_moe.experts.85.w1", "model.layers.23.block_sparse_moe.experts.86.w1", "model.layers.23.block_sparse_moe.experts.87.w1", "model.layers.23.block_sparse_moe.experts.88.w1", "model.layers.23.block_sparse_moe.experts.89.w1", "model.layers.23.block_sparse_moe.experts.90.w1", "model.layers.23.block_sparse_moe.experts.91.w1", "model.layers.23.block_sparse_moe.experts.92.w1", "model.layers.23.block_sparse_moe.experts.93.w1", "model.layers.23.block_sparse_moe.experts.94.w1", "model.layers.23.block_sparse_moe.experts.95.w1", "model.layers.23.block_sparse_moe.experts.96.w1", "model.layers.23.block_sparse_moe.experts.97.w1", "model.layers.23.block_sparse_moe.experts.98.w1", "model.layers.23.block_sparse_moe.experts.99.w1", "model.layers.23.block_sparse_moe.experts.100.w1", "model.layers.23.block_sparse_moe.experts.101.w1", "model.layers.23.block_sparse_moe.experts.102.w1", "model.layers.23.block_sparse_moe.experts.103.w1", "model.layers.23.block_sparse_moe.experts.104.w1", "model.layers.23.block_sparse_moe.experts.105.w1", "model.layers.23.block_sparse_moe.experts.106.w1", "model.layers.23.block_sparse_moe.experts.107.w1", "model.layers.23.block_sparse_moe.experts.108.w1", "model.layers.23.block_sparse_moe.experts.109.w1", "model.layers.23.block_sparse_moe.experts.110.w1", "model.layers.23.block_sparse_moe.experts.111.w1", "model.layers.23.block_sparse_moe.experts.112.w1", "model.layers.23.block_sparse_moe.experts.113.w1", "model.layers.23.block_sparse_moe.experts.114.w1", "model.layers.23.block_sparse_moe.experts.115.w1", "model.layers.23.block_sparse_moe.experts.116.w1", "model.layers.23.block_sparse_moe.experts.117.w1", "model.layers.23.block_sparse_moe.experts.118.w1", "model.layers.23.block_sparse_moe.experts.119.w1", "model.layers.23.block_sparse_moe.experts.120.w1", "model.layers.23.block_sparse_moe.experts.121.w1", "model.layers.23.block_sparse_moe.experts.122.w1", "model.layers.23.block_sparse_moe.experts.123.w1", "model.layers.23.block_sparse_moe.experts.124.w1", "model.layers.23.block_sparse_moe.experts.125.w1", "model.layers.23.block_sparse_moe.experts.126.w1", "model.layers.23.block_sparse_moe.experts.127.w1", "model.layers.23.block_sparse_moe.experts.128.w1", "model.layers.23.block_sparse_moe.experts.129.w1", "model.layers.23.block_sparse_moe.experts.130.w1", "model.layers.23.block_sparse_moe.experts.131.w1", "model.layers.23.block_sparse_moe.experts.132.w1", "model.layers.23.block_sparse_moe.experts.133.w1", "model.layers.23.block_sparse_moe.experts.134.w1", "model.layers.23.block_sparse_moe.experts.135.w1", "model.layers.23.block_sparse_moe.experts.136.w1", "model.layers.23.block_sparse_moe.experts.137.w1", "model.layers.23.block_sparse_moe.experts.138.w1", "model.layers.23.block_sparse_moe.experts.139.w1", "model.layers.23.block_sparse_moe.experts.140.w1", "model.layers.23.block_sparse_moe.experts.141.w1", "model.layers.23.block_sparse_moe.experts.142.w1", "model.layers.23.block_sparse_moe.experts.143.w1", "model.layers.23.block_sparse_moe.experts.144.w1", "model.layers.23.block_sparse_moe.experts.145.w1", "model.layers.23.block_sparse_moe.experts.146.w1", "model.layers.23.block_sparse_moe.experts.147.w1", "model.layers.23.block_sparse_moe.experts.148.w1", "model.layers.23.block_sparse_moe.experts.149.w1", "model.layers.23.block_sparse_moe.experts.150.w1", "model.layers.23.block_sparse_moe.experts.151.w1", "model.layers.23.block_sparse_moe.experts.152.w1", "model.layers.23.block_sparse_moe.experts.153.w1", "model.layers.23.block_sparse_moe.experts.154.w1", "model.layers.23.block_sparse_moe.experts.155.w1", "model.layers.23.block_sparse_moe.experts.156.w1", "model.layers.23.block_sparse_moe.experts.157.w1", "model.layers.23.block_sparse_moe.experts.158.w1", "model.layers.23.block_sparse_moe.experts.159.w1", "model.layers.23.block_sparse_moe.experts.160.w1", "model.layers.23.block_sparse_moe.experts.161.w1", "model.layers.23.block_sparse_moe.experts.162.w1", "model.layers.23.block_sparse_moe.experts.163.w1", "model.layers.23.block_sparse_moe.experts.164.w1", "model.layers.23.block_sparse_moe.experts.165.w1", "model.layers.23.block_sparse_moe.experts.166.w1", "model.layers.23.block_sparse_moe.experts.167.w1", "model.layers.23.block_sparse_moe.experts.168.w1", "model.layers.23.block_sparse_moe.experts.169.w1", "model.layers.23.block_sparse_moe.experts.170.w1", "model.layers.23.block_sparse_moe.experts.171.w1", "model.layers.23.block_sparse_moe.experts.172.w1", "model.layers.23.block_sparse_moe.experts.173.w1", "model.layers.23.block_sparse_moe.experts.174.w1", "model.layers.23.block_sparse_moe.experts.175.w1", "model.layers.23.block_sparse_moe.experts.176.w1", "model.layers.23.block_sparse_moe.experts.177.w1", "model.layers.23.block_sparse_moe.experts.178.w1", "model.layers.23.block_sparse_moe.experts.179.w1", "model.layers.23.block_sparse_moe.experts.180.w1", "model.layers.23.block_sparse_moe.experts.181.w1", "model.layers.23.block_sparse_moe.experts.182.w1", "model.layers.23.block_sparse_moe.experts.183.w1", "model.layers.23.block_sparse_moe.experts.184.w1", "model.layers.23.block_sparse_moe.experts.185.w1", "model.layers.23.block_sparse_moe.experts.186.w1", "model.layers.23.block_sparse_moe.experts.187.w1", "model.layers.23.block_sparse_moe.experts.188.w1", "model.layers.23.block_sparse_moe.experts.189.w1", "model.layers.23.block_sparse_moe.experts.190.w1", "model.layers.23.block_sparse_moe.experts.191.w1", "model.layers.23.block_sparse_moe.experts.192.w1", "model.layers.23.block_sparse_moe.experts.193.w1", "model.layers.23.block_sparse_moe.experts.194.w1", "model.layers.23.block_sparse_moe.experts.195.w1", "model.layers.23.block_sparse_moe.experts.196.w1", "model.layers.23.block_sparse_moe.experts.197.w1", "model.layers.23.block_sparse_moe.experts.198.w1", "model.layers.23.block_sparse_moe.experts.199.w1", "model.layers.23.block_sparse_moe.experts.200.w1", "model.layers.23.block_sparse_moe.experts.201.w1", "model.layers.23.block_sparse_moe.experts.202.w1", "model.layers.23.block_sparse_moe.experts.203.w1", "model.layers.23.block_sparse_moe.experts.204.w1", "model.layers.23.block_sparse_moe.experts.205.w1", "model.layers.23.block_sparse_moe.experts.206.w1", "model.layers.23.block_sparse_moe.experts.207.w1", "model.layers.23.block_sparse_moe.experts.208.w1", "model.layers.23.block_sparse_moe.experts.209.w1", "model.layers.23.block_sparse_moe.experts.210.w1", "model.layers.23.block_sparse_moe.experts.211.w1", "model.layers.23.block_sparse_moe.experts.212.w1", "model.layers.23.block_sparse_moe.experts.213.w1", "model.layers.23.block_sparse_moe.experts.214.w1", "model.layers.23.block_sparse_moe.experts.215.w1", "model.layers.23.block_sparse_moe.experts.216.w1", "model.layers.23.block_sparse_moe.experts.217.w1", "model.layers.23.block_sparse_moe.experts.218.w1", "model.layers.23.block_sparse_moe.experts.219.w1", "model.layers.23.block_sparse_moe.experts.220.w1", "model.layers.23.block_sparse_moe.experts.221.w1", "model.layers.23.block_sparse_moe.experts.222.w1", "model.layers.23.block_sparse_moe.experts.223.w1", "model.layers.23.block_sparse_moe.experts.224.w1", "model.layers.23.block_sparse_moe.experts.225.w1", "model.layers.23.block_sparse_moe.experts.226.w1", "model.layers.23.block_sparse_moe.experts.227.w1", "model.layers.23.block_sparse_moe.experts.228.w1", "model.layers.23.block_sparse_moe.experts.229.w1", "model.layers.23.block_sparse_moe.experts.230.w1", "model.layers.23.block_sparse_moe.experts.231.w1", "model.layers.23.block_sparse_moe.experts.232.w1", "model.layers.23.block_sparse_moe.experts.233.w1", "model.layers.23.block_sparse_moe.experts.234.w1", "model.layers.23.block_sparse_moe.experts.235.w1", "model.layers.23.block_sparse_moe.experts.236.w1", "model.layers.23.block_sparse_moe.experts.237.w1", "model.layers.23.block_sparse_moe.experts.238.w1", "model.layers.23.block_sparse_moe.experts.239.w1", "model.layers.23.block_sparse_moe.experts.240.w1", "model.layers.23.block_sparse_moe.experts.241.w1", "model.layers.23.block_sparse_moe.experts.242.w1", "model.layers.23.block_sparse_moe.experts.243.w1", "model.layers.23.block_sparse_moe.experts.244.w1", "model.layers.23.block_sparse_moe.experts.245.w1", "model.layers.23.block_sparse_moe.experts.246.w1", "model.layers.23.block_sparse_moe.experts.247.w1", "model.layers.23.block_sparse_moe.experts.248.w1", "model.layers.23.block_sparse_moe.experts.249.w1", "model.layers.23.block_sparse_moe.experts.250.w1", "model.layers.23.block_sparse_moe.experts.251.w1", "model.layers.23.block_sparse_moe.experts.252.w1", "model.layers.23.block_sparse_moe.experts.253.w1", "model.layers.23.block_sparse_moe.experts.254.w1", "model.layers.23.block_sparse_moe.experts.255.w1", "model.layers.23.block_sparse_moe.experts.0.w3", "model.layers.23.block_sparse_moe.experts.1.w3", "model.layers.23.block_sparse_moe.experts.2.w3", "model.layers.23.block_sparse_moe.experts.3.w3", "model.layers.23.block_sparse_moe.experts.4.w3", "model.layers.23.block_sparse_moe.experts.5.w3", "model.layers.23.block_sparse_moe.experts.6.w3", "model.layers.23.block_sparse_moe.experts.7.w3", "model.layers.23.block_sparse_moe.experts.8.w3", "model.layers.23.block_sparse_moe.experts.9.w3", "model.layers.23.block_sparse_moe.experts.10.w3", "model.layers.23.block_sparse_moe.experts.11.w3", "model.layers.23.block_sparse_moe.experts.12.w3", "model.layers.23.block_sparse_moe.experts.13.w3", "model.layers.23.block_sparse_moe.experts.14.w3", "model.layers.23.block_sparse_moe.experts.15.w3", "model.layers.23.block_sparse_moe.experts.16.w3", "model.layers.23.block_sparse_moe.experts.17.w3", "model.layers.23.block_sparse_moe.experts.18.w3", "model.layers.23.block_sparse_moe.experts.19.w3", "model.layers.23.block_sparse_moe.experts.20.w3", "model.layers.23.block_sparse_moe.experts.21.w3", "model.layers.23.block_sparse_moe.experts.22.w3", "model.layers.23.block_sparse_moe.experts.23.w3", "model.layers.23.block_sparse_moe.experts.24.w3", "model.layers.23.block_sparse_moe.experts.25.w3", "model.layers.23.block_sparse_moe.experts.26.w3", "model.layers.23.block_sparse_moe.experts.27.w3", "model.layers.23.block_sparse_moe.experts.28.w3", "model.layers.23.block_sparse_moe.experts.29.w3", "model.layers.23.block_sparse_moe.experts.30.w3", "model.layers.23.block_sparse_moe.experts.31.w3", "model.layers.23.block_sparse_moe.experts.32.w3", "model.layers.23.block_sparse_moe.experts.33.w3", "model.layers.23.block_sparse_moe.experts.34.w3", "model.layers.23.block_sparse_moe.experts.35.w3", "model.layers.23.block_sparse_moe.experts.36.w3", "model.layers.23.block_sparse_moe.experts.37.w3", "model.layers.23.block_sparse_moe.experts.38.w3", "model.layers.23.block_sparse_moe.experts.39.w3", "model.layers.23.block_sparse_moe.experts.40.w3", "model.layers.23.block_sparse_moe.experts.41.w3", "model.layers.23.block_sparse_moe.experts.42.w3", "model.layers.23.block_sparse_moe.experts.43.w3", "model.layers.23.block_sparse_moe.experts.44.w3", "model.layers.23.block_sparse_moe.experts.45.w3", "model.layers.23.block_sparse_moe.experts.46.w3", "model.layers.23.block_sparse_moe.experts.47.w3", "model.layers.23.block_sparse_moe.experts.48.w3", "model.layers.23.block_sparse_moe.experts.49.w3", "model.layers.23.block_sparse_moe.experts.50.w3", "model.layers.23.block_sparse_moe.experts.51.w3", "model.layers.23.block_sparse_moe.experts.52.w3", "model.layers.23.block_sparse_moe.experts.53.w3", "model.layers.23.block_sparse_moe.experts.54.w3", "model.layers.23.block_sparse_moe.experts.55.w3", "model.layers.23.block_sparse_moe.experts.56.w3", "model.layers.23.block_sparse_moe.experts.57.w3", "model.layers.23.block_sparse_moe.experts.58.w3", "model.layers.23.block_sparse_moe.experts.59.w3", "model.layers.23.block_sparse_moe.experts.60.w3", "model.layers.23.block_sparse_moe.experts.61.w3", "model.layers.23.block_sparse_moe.experts.62.w3", "model.layers.23.block_sparse_moe.experts.63.w3", "model.layers.23.block_sparse_moe.experts.64.w3", "model.layers.23.block_sparse_moe.experts.65.w3", "model.layers.23.block_sparse_moe.experts.66.w3", "model.layers.23.block_sparse_moe.experts.67.w3", "model.layers.23.block_sparse_moe.experts.68.w3", "model.layers.23.block_sparse_moe.experts.69.w3", "model.layers.23.block_sparse_moe.experts.70.w3", "model.layers.23.block_sparse_moe.experts.71.w3", "model.layers.23.block_sparse_moe.experts.72.w3", "model.layers.23.block_sparse_moe.experts.73.w3", "model.layers.23.block_sparse_moe.experts.74.w3", "model.layers.23.block_sparse_moe.experts.75.w3", "model.layers.23.block_sparse_moe.experts.76.w3", "model.layers.23.block_sparse_moe.experts.77.w3", "model.layers.23.block_sparse_moe.experts.78.w3", "model.layers.23.block_sparse_moe.experts.79.w3", "model.layers.23.block_sparse_moe.experts.80.w3", "model.layers.23.block_sparse_moe.experts.81.w3", "model.layers.23.block_sparse_moe.experts.82.w3", "model.layers.23.block_sparse_moe.experts.83.w3", "model.layers.23.block_sparse_moe.experts.84.w3", "model.layers.23.block_sparse_moe.experts.85.w3", "model.layers.23.block_sparse_moe.experts.86.w3", "model.layers.23.block_sparse_moe.experts.87.w3", "model.layers.23.block_sparse_moe.experts.88.w3", "model.layers.23.block_sparse_moe.experts.89.w3", "model.layers.23.block_sparse_moe.experts.90.w3", "model.layers.23.block_sparse_moe.experts.91.w3", "model.layers.23.block_sparse_moe.experts.92.w3", "model.layers.23.block_sparse_moe.experts.93.w3", "model.layers.23.block_sparse_moe.experts.94.w3", "model.layers.23.block_sparse_moe.experts.95.w3", "model.layers.23.block_sparse_moe.experts.96.w3", "model.layers.23.block_sparse_moe.experts.97.w3", "model.layers.23.block_sparse_moe.experts.98.w3", "model.layers.23.block_sparse_moe.experts.99.w3", "model.layers.23.block_sparse_moe.experts.100.w3", "model.layers.23.block_sparse_moe.experts.101.w3", "model.layers.23.block_sparse_moe.experts.102.w3", "model.layers.23.block_sparse_moe.experts.103.w3", "model.layers.23.block_sparse_moe.experts.104.w3", "model.layers.23.block_sparse_moe.experts.105.w3", "model.layers.23.block_sparse_moe.experts.106.w3", "model.layers.23.block_sparse_moe.experts.107.w3", "model.layers.23.block_sparse_moe.experts.108.w3", "model.layers.23.block_sparse_moe.experts.109.w3", "model.layers.23.block_sparse_moe.experts.110.w3", "model.layers.23.block_sparse_moe.experts.111.w3", "model.layers.23.block_sparse_moe.experts.112.w3", "model.layers.23.block_sparse_moe.experts.113.w3", "model.layers.23.block_sparse_moe.experts.114.w3", "model.layers.23.block_sparse_moe.experts.115.w3", "model.layers.23.block_sparse_moe.experts.116.w3", "model.layers.23.block_sparse_moe.experts.117.w3", "model.layers.23.block_sparse_moe.experts.118.w3", "model.layers.23.block_sparse_moe.experts.119.w3", "model.layers.23.block_sparse_moe.experts.120.w3", "model.layers.23.block_sparse_moe.experts.121.w3", "model.layers.23.block_sparse_moe.experts.122.w3", "model.layers.23.block_sparse_moe.experts.123.w3", "model.layers.23.block_sparse_moe.experts.124.w3", "model.layers.23.block_sparse_moe.experts.125.w3", "model.layers.23.block_sparse_moe.experts.126.w3", "model.layers.23.block_sparse_moe.experts.127.w3", "model.layers.23.block_sparse_moe.experts.128.w3", "model.layers.23.block_sparse_moe.experts.129.w3", "model.layers.23.block_sparse_moe.experts.130.w3", "model.layers.23.block_sparse_moe.experts.131.w3", "model.layers.23.block_sparse_moe.experts.132.w3", "model.layers.23.block_sparse_moe.experts.133.w3", "model.layers.23.block_sparse_moe.experts.134.w3", "model.layers.23.block_sparse_moe.experts.135.w3", "model.layers.23.block_sparse_moe.experts.136.w3", "model.layers.23.block_sparse_moe.experts.137.w3", "model.layers.23.block_sparse_moe.experts.138.w3", "model.layers.23.block_sparse_moe.experts.139.w3", "model.layers.23.block_sparse_moe.experts.140.w3", "model.layers.23.block_sparse_moe.experts.141.w3", "model.layers.23.block_sparse_moe.experts.142.w3", "model.layers.23.block_sparse_moe.experts.143.w3", "model.layers.23.block_sparse_moe.experts.144.w3", "model.layers.23.block_sparse_moe.experts.145.w3", "model.layers.23.block_sparse_moe.experts.146.w3", "model.layers.23.block_sparse_moe.experts.147.w3", "model.layers.23.block_sparse_moe.experts.148.w3", "model.layers.23.block_sparse_moe.experts.149.w3", "model.layers.23.block_sparse_moe.experts.150.w3", "model.layers.23.block_sparse_moe.experts.151.w3", "model.layers.23.block_sparse_moe.experts.152.w3", "model.layers.23.block_sparse_moe.experts.153.w3", "model.layers.23.block_sparse_moe.experts.154.w3", "model.layers.23.block_sparse_moe.experts.155.w3", "model.layers.23.block_sparse_moe.experts.156.w3", "model.layers.23.block_sparse_moe.experts.157.w3", "model.layers.23.block_sparse_moe.experts.158.w3", "model.layers.23.block_sparse_moe.experts.159.w3", "model.layers.23.block_sparse_moe.experts.160.w3", "model.layers.23.block_sparse_moe.experts.161.w3", "model.layers.23.block_sparse_moe.experts.162.w3", "model.layers.23.block_sparse_moe.experts.163.w3", "model.layers.23.block_sparse_moe.experts.164.w3", "model.layers.23.block_sparse_moe.experts.165.w3", "model.layers.23.block_sparse_moe.experts.166.w3", "model.layers.23.block_sparse_moe.experts.167.w3", "model.layers.23.block_sparse_moe.experts.168.w3", "model.layers.23.block_sparse_moe.experts.169.w3", "model.layers.23.block_sparse_moe.experts.170.w3", "model.layers.23.block_sparse_moe.experts.171.w3", "model.layers.23.block_sparse_moe.experts.172.w3", "model.layers.23.block_sparse_moe.experts.173.w3", "model.layers.23.block_sparse_moe.experts.174.w3", "model.layers.23.block_sparse_moe.experts.175.w3", "model.layers.23.block_sparse_moe.experts.176.w3", "model.layers.23.block_sparse_moe.experts.177.w3", "model.layers.23.block_sparse_moe.experts.178.w3", "model.layers.23.block_sparse_moe.experts.179.w3", "model.layers.23.block_sparse_moe.experts.180.w3", "model.layers.23.block_sparse_moe.experts.181.w3", "model.layers.23.block_sparse_moe.experts.182.w3", "model.layers.23.block_sparse_moe.experts.183.w3", "model.layers.23.block_sparse_moe.experts.184.w3", "model.layers.23.block_sparse_moe.experts.185.w3", "model.layers.23.block_sparse_moe.experts.186.w3", "model.layers.23.block_sparse_moe.experts.187.w3", "model.layers.23.block_sparse_moe.experts.188.w3", "model.layers.23.block_sparse_moe.experts.189.w3", "model.layers.23.block_sparse_moe.experts.190.w3", "model.layers.23.block_sparse_moe.experts.191.w3", "model.layers.23.block_sparse_moe.experts.192.w3", "model.layers.23.block_sparse_moe.experts.193.w3", "model.layers.23.block_sparse_moe.experts.194.w3", "model.layers.23.block_sparse_moe.experts.195.w3", "model.layers.23.block_sparse_moe.experts.196.w3", "model.layers.23.block_sparse_moe.experts.197.w3", "model.layers.23.block_sparse_moe.experts.198.w3", "model.layers.23.block_sparse_moe.experts.199.w3", "model.layers.23.block_sparse_moe.experts.200.w3", "model.layers.23.block_sparse_moe.experts.201.w3", "model.layers.23.block_sparse_moe.experts.202.w3", "model.layers.23.block_sparse_moe.experts.203.w3", "model.layers.23.block_sparse_moe.experts.204.w3", "model.layers.23.block_sparse_moe.experts.205.w3", "model.layers.23.block_sparse_moe.experts.206.w3", "model.layers.23.block_sparse_moe.experts.207.w3", "model.layers.23.block_sparse_moe.experts.208.w3", "model.layers.23.block_sparse_moe.experts.209.w3", "model.layers.23.block_sparse_moe.experts.210.w3", "model.layers.23.block_sparse_moe.experts.211.w3", "model.layers.23.block_sparse_moe.experts.212.w3", "model.layers.23.block_sparse_moe.experts.213.w3", "model.layers.23.block_sparse_moe.experts.214.w3", "model.layers.23.block_sparse_moe.experts.215.w3", "model.layers.23.block_sparse_moe.experts.216.w3", "model.layers.23.block_sparse_moe.experts.217.w3", "model.layers.23.block_sparse_moe.experts.218.w3", "model.layers.23.block_sparse_moe.experts.219.w3", "model.layers.23.block_sparse_moe.experts.220.w3", "model.layers.23.block_sparse_moe.experts.221.w3", "model.layers.23.block_sparse_moe.experts.222.w3", "model.layers.23.block_sparse_moe.experts.223.w3", "model.layers.23.block_sparse_moe.experts.224.w3", "model.layers.23.block_sparse_moe.experts.225.w3", "model.layers.23.block_sparse_moe.experts.226.w3", "model.layers.23.block_sparse_moe.experts.227.w3", "model.layers.23.block_sparse_moe.experts.228.w3", "model.layers.23.block_sparse_moe.experts.229.w3", "model.layers.23.block_sparse_moe.experts.230.w3", "model.layers.23.block_sparse_moe.experts.231.w3", "model.layers.23.block_sparse_moe.experts.232.w3", "model.layers.23.block_sparse_moe.experts.233.w3", "model.layers.23.block_sparse_moe.experts.234.w3", "model.layers.23.block_sparse_moe.experts.235.w3", "model.layers.23.block_sparse_moe.experts.236.w3", "model.layers.23.block_sparse_moe.experts.237.w3", "model.layers.23.block_sparse_moe.experts.238.w3", "model.layers.23.block_sparse_moe.experts.239.w3", "model.layers.23.block_sparse_moe.experts.240.w3", "model.layers.23.block_sparse_moe.experts.241.w3", "model.layers.23.block_sparse_moe.experts.242.w3", "model.layers.23.block_sparse_moe.experts.243.w3", "model.layers.23.block_sparse_moe.experts.244.w3", "model.layers.23.block_sparse_moe.experts.245.w3", "model.layers.23.block_sparse_moe.experts.246.w3", "model.layers.23.block_sparse_moe.experts.247.w3", "model.layers.23.block_sparse_moe.experts.248.w3", "model.layers.23.block_sparse_moe.experts.249.w3", "model.layers.23.block_sparse_moe.experts.250.w3", "model.layers.23.block_sparse_moe.experts.251.w3", "model.layers.23.block_sparse_moe.experts.252.w3", "model.layers.23.block_sparse_moe.experts.253.w3", "model.layers.23.block_sparse_moe.experts.254.w3", "model.layers.23.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0012515176087617985, "dbits": 2415919104 } ] }, { "idx": 119, "layers": [ "model.layers.23.block_sparse_moe.experts.0.w2", "model.layers.23.block_sparse_moe.experts.1.w2", "model.layers.23.block_sparse_moe.experts.2.w2", "model.layers.23.block_sparse_moe.experts.3.w2", "model.layers.23.block_sparse_moe.experts.4.w2", "model.layers.23.block_sparse_moe.experts.5.w2", "model.layers.23.block_sparse_moe.experts.6.w2", "model.layers.23.block_sparse_moe.experts.7.w2", "model.layers.23.block_sparse_moe.experts.8.w2", "model.layers.23.block_sparse_moe.experts.9.w2", "model.layers.23.block_sparse_moe.experts.10.w2", "model.layers.23.block_sparse_moe.experts.11.w2", "model.layers.23.block_sparse_moe.experts.12.w2", "model.layers.23.block_sparse_moe.experts.13.w2", "model.layers.23.block_sparse_moe.experts.14.w2", "model.layers.23.block_sparse_moe.experts.15.w2", "model.layers.23.block_sparse_moe.experts.16.w2", "model.layers.23.block_sparse_moe.experts.17.w2", "model.layers.23.block_sparse_moe.experts.18.w2", "model.layers.23.block_sparse_moe.experts.19.w2", "model.layers.23.block_sparse_moe.experts.20.w2", "model.layers.23.block_sparse_moe.experts.21.w2", "model.layers.23.block_sparse_moe.experts.22.w2", "model.layers.23.block_sparse_moe.experts.23.w2", "model.layers.23.block_sparse_moe.experts.24.w2", "model.layers.23.block_sparse_moe.experts.25.w2", "model.layers.23.block_sparse_moe.experts.26.w2", "model.layers.23.block_sparse_moe.experts.27.w2", "model.layers.23.block_sparse_moe.experts.28.w2", "model.layers.23.block_sparse_moe.experts.29.w2", "model.layers.23.block_sparse_moe.experts.30.w2", "model.layers.23.block_sparse_moe.experts.31.w2", "model.layers.23.block_sparse_moe.experts.32.w2", "model.layers.23.block_sparse_moe.experts.33.w2", "model.layers.23.block_sparse_moe.experts.34.w2", "model.layers.23.block_sparse_moe.experts.35.w2", "model.layers.23.block_sparse_moe.experts.36.w2", "model.layers.23.block_sparse_moe.experts.37.w2", "model.layers.23.block_sparse_moe.experts.38.w2", "model.layers.23.block_sparse_moe.experts.39.w2", "model.layers.23.block_sparse_moe.experts.40.w2", "model.layers.23.block_sparse_moe.experts.41.w2", "model.layers.23.block_sparse_moe.experts.42.w2", "model.layers.23.block_sparse_moe.experts.43.w2", "model.layers.23.block_sparse_moe.experts.44.w2", "model.layers.23.block_sparse_moe.experts.45.w2", "model.layers.23.block_sparse_moe.experts.46.w2", "model.layers.23.block_sparse_moe.experts.47.w2", "model.layers.23.block_sparse_moe.experts.48.w2", "model.layers.23.block_sparse_moe.experts.49.w2", "model.layers.23.block_sparse_moe.experts.50.w2", "model.layers.23.block_sparse_moe.experts.51.w2", "model.layers.23.block_sparse_moe.experts.52.w2", "model.layers.23.block_sparse_moe.experts.53.w2", "model.layers.23.block_sparse_moe.experts.54.w2", "model.layers.23.block_sparse_moe.experts.55.w2", "model.layers.23.block_sparse_moe.experts.56.w2", "model.layers.23.block_sparse_moe.experts.57.w2", "model.layers.23.block_sparse_moe.experts.58.w2", "model.layers.23.block_sparse_moe.experts.59.w2", "model.layers.23.block_sparse_moe.experts.60.w2", "model.layers.23.block_sparse_moe.experts.61.w2", "model.layers.23.block_sparse_moe.experts.62.w2", "model.layers.23.block_sparse_moe.experts.63.w2", "model.layers.23.block_sparse_moe.experts.64.w2", "model.layers.23.block_sparse_moe.experts.65.w2", "model.layers.23.block_sparse_moe.experts.66.w2", "model.layers.23.block_sparse_moe.experts.67.w2", "model.layers.23.block_sparse_moe.experts.68.w2", "model.layers.23.block_sparse_moe.experts.69.w2", "model.layers.23.block_sparse_moe.experts.70.w2", "model.layers.23.block_sparse_moe.experts.71.w2", "model.layers.23.block_sparse_moe.experts.72.w2", "model.layers.23.block_sparse_moe.experts.73.w2", "model.layers.23.block_sparse_moe.experts.74.w2", "model.layers.23.block_sparse_moe.experts.75.w2", "model.layers.23.block_sparse_moe.experts.76.w2", "model.layers.23.block_sparse_moe.experts.77.w2", "model.layers.23.block_sparse_moe.experts.78.w2", "model.layers.23.block_sparse_moe.experts.79.w2", "model.layers.23.block_sparse_moe.experts.80.w2", "model.layers.23.block_sparse_moe.experts.81.w2", "model.layers.23.block_sparse_moe.experts.82.w2", "model.layers.23.block_sparse_moe.experts.83.w2", "model.layers.23.block_sparse_moe.experts.84.w2", "model.layers.23.block_sparse_moe.experts.85.w2", "model.layers.23.block_sparse_moe.experts.86.w2", "model.layers.23.block_sparse_moe.experts.87.w2", "model.layers.23.block_sparse_moe.experts.88.w2", "model.layers.23.block_sparse_moe.experts.89.w2", "model.layers.23.block_sparse_moe.experts.90.w2", "model.layers.23.block_sparse_moe.experts.91.w2", "model.layers.23.block_sparse_moe.experts.92.w2", "model.layers.23.block_sparse_moe.experts.93.w2", "model.layers.23.block_sparse_moe.experts.94.w2", "model.layers.23.block_sparse_moe.experts.95.w2", "model.layers.23.block_sparse_moe.experts.96.w2", "model.layers.23.block_sparse_moe.experts.97.w2", "model.layers.23.block_sparse_moe.experts.98.w2", "model.layers.23.block_sparse_moe.experts.99.w2", "model.layers.23.block_sparse_moe.experts.100.w2", "model.layers.23.block_sparse_moe.experts.101.w2", "model.layers.23.block_sparse_moe.experts.102.w2", "model.layers.23.block_sparse_moe.experts.103.w2", "model.layers.23.block_sparse_moe.experts.104.w2", "model.layers.23.block_sparse_moe.experts.105.w2", "model.layers.23.block_sparse_moe.experts.106.w2", "model.layers.23.block_sparse_moe.experts.107.w2", "model.layers.23.block_sparse_moe.experts.108.w2", "model.layers.23.block_sparse_moe.experts.109.w2", "model.layers.23.block_sparse_moe.experts.110.w2", "model.layers.23.block_sparse_moe.experts.111.w2", "model.layers.23.block_sparse_moe.experts.112.w2", "model.layers.23.block_sparse_moe.experts.113.w2", "model.layers.23.block_sparse_moe.experts.114.w2", "model.layers.23.block_sparse_moe.experts.115.w2", "model.layers.23.block_sparse_moe.experts.116.w2", "model.layers.23.block_sparse_moe.experts.117.w2", "model.layers.23.block_sparse_moe.experts.118.w2", "model.layers.23.block_sparse_moe.experts.119.w2", "model.layers.23.block_sparse_moe.experts.120.w2", "model.layers.23.block_sparse_moe.experts.121.w2", "model.layers.23.block_sparse_moe.experts.122.w2", "model.layers.23.block_sparse_moe.experts.123.w2", "model.layers.23.block_sparse_moe.experts.124.w2", "model.layers.23.block_sparse_moe.experts.125.w2", "model.layers.23.block_sparse_moe.experts.126.w2", "model.layers.23.block_sparse_moe.experts.127.w2", "model.layers.23.block_sparse_moe.experts.128.w2", "model.layers.23.block_sparse_moe.experts.129.w2", "model.layers.23.block_sparse_moe.experts.130.w2", "model.layers.23.block_sparse_moe.experts.131.w2", "model.layers.23.block_sparse_moe.experts.132.w2", "model.layers.23.block_sparse_moe.experts.133.w2", "model.layers.23.block_sparse_moe.experts.134.w2", "model.layers.23.block_sparse_moe.experts.135.w2", "model.layers.23.block_sparse_moe.experts.136.w2", "model.layers.23.block_sparse_moe.experts.137.w2", "model.layers.23.block_sparse_moe.experts.138.w2", "model.layers.23.block_sparse_moe.experts.139.w2", "model.layers.23.block_sparse_moe.experts.140.w2", "model.layers.23.block_sparse_moe.experts.141.w2", "model.layers.23.block_sparse_moe.experts.142.w2", "model.layers.23.block_sparse_moe.experts.143.w2", "model.layers.23.block_sparse_moe.experts.144.w2", "model.layers.23.block_sparse_moe.experts.145.w2", "model.layers.23.block_sparse_moe.experts.146.w2", "model.layers.23.block_sparse_moe.experts.147.w2", "model.layers.23.block_sparse_moe.experts.148.w2", "model.layers.23.block_sparse_moe.experts.149.w2", "model.layers.23.block_sparse_moe.experts.150.w2", "model.layers.23.block_sparse_moe.experts.151.w2", "model.layers.23.block_sparse_moe.experts.152.w2", "model.layers.23.block_sparse_moe.experts.153.w2", "model.layers.23.block_sparse_moe.experts.154.w2", "model.layers.23.block_sparse_moe.experts.155.w2", "model.layers.23.block_sparse_moe.experts.156.w2", "model.layers.23.block_sparse_moe.experts.157.w2", "model.layers.23.block_sparse_moe.experts.158.w2", "model.layers.23.block_sparse_moe.experts.159.w2", "model.layers.23.block_sparse_moe.experts.160.w2", "model.layers.23.block_sparse_moe.experts.161.w2", "model.layers.23.block_sparse_moe.experts.162.w2", "model.layers.23.block_sparse_moe.experts.163.w2", "model.layers.23.block_sparse_moe.experts.164.w2", "model.layers.23.block_sparse_moe.experts.165.w2", "model.layers.23.block_sparse_moe.experts.166.w2", "model.layers.23.block_sparse_moe.experts.167.w2", "model.layers.23.block_sparse_moe.experts.168.w2", "model.layers.23.block_sparse_moe.experts.169.w2", "model.layers.23.block_sparse_moe.experts.170.w2", "model.layers.23.block_sparse_moe.experts.171.w2", "model.layers.23.block_sparse_moe.experts.172.w2", "model.layers.23.block_sparse_moe.experts.173.w2", "model.layers.23.block_sparse_moe.experts.174.w2", "model.layers.23.block_sparse_moe.experts.175.w2", "model.layers.23.block_sparse_moe.experts.176.w2", "model.layers.23.block_sparse_moe.experts.177.w2", "model.layers.23.block_sparse_moe.experts.178.w2", "model.layers.23.block_sparse_moe.experts.179.w2", "model.layers.23.block_sparse_moe.experts.180.w2", "model.layers.23.block_sparse_moe.experts.181.w2", "model.layers.23.block_sparse_moe.experts.182.w2", "model.layers.23.block_sparse_moe.experts.183.w2", "model.layers.23.block_sparse_moe.experts.184.w2", "model.layers.23.block_sparse_moe.experts.185.w2", "model.layers.23.block_sparse_moe.experts.186.w2", "model.layers.23.block_sparse_moe.experts.187.w2", "model.layers.23.block_sparse_moe.experts.188.w2", "model.layers.23.block_sparse_moe.experts.189.w2", "model.layers.23.block_sparse_moe.experts.190.w2", "model.layers.23.block_sparse_moe.experts.191.w2", "model.layers.23.block_sparse_moe.experts.192.w2", "model.layers.23.block_sparse_moe.experts.193.w2", "model.layers.23.block_sparse_moe.experts.194.w2", "model.layers.23.block_sparse_moe.experts.195.w2", "model.layers.23.block_sparse_moe.experts.196.w2", "model.layers.23.block_sparse_moe.experts.197.w2", "model.layers.23.block_sparse_moe.experts.198.w2", "model.layers.23.block_sparse_moe.experts.199.w2", "model.layers.23.block_sparse_moe.experts.200.w2", "model.layers.23.block_sparse_moe.experts.201.w2", "model.layers.23.block_sparse_moe.experts.202.w2", "model.layers.23.block_sparse_moe.experts.203.w2", "model.layers.23.block_sparse_moe.experts.204.w2", "model.layers.23.block_sparse_moe.experts.205.w2", "model.layers.23.block_sparse_moe.experts.206.w2", "model.layers.23.block_sparse_moe.experts.207.w2", "model.layers.23.block_sparse_moe.experts.208.w2", "model.layers.23.block_sparse_moe.experts.209.w2", "model.layers.23.block_sparse_moe.experts.210.w2", "model.layers.23.block_sparse_moe.experts.211.w2", "model.layers.23.block_sparse_moe.experts.212.w2", "model.layers.23.block_sparse_moe.experts.213.w2", "model.layers.23.block_sparse_moe.experts.214.w2", "model.layers.23.block_sparse_moe.experts.215.w2", "model.layers.23.block_sparse_moe.experts.216.w2", "model.layers.23.block_sparse_moe.experts.217.w2", "model.layers.23.block_sparse_moe.experts.218.w2", "model.layers.23.block_sparse_moe.experts.219.w2", "model.layers.23.block_sparse_moe.experts.220.w2", "model.layers.23.block_sparse_moe.experts.221.w2", "model.layers.23.block_sparse_moe.experts.222.w2", "model.layers.23.block_sparse_moe.experts.223.w2", "model.layers.23.block_sparse_moe.experts.224.w2", "model.layers.23.block_sparse_moe.experts.225.w2", "model.layers.23.block_sparse_moe.experts.226.w2", "model.layers.23.block_sparse_moe.experts.227.w2", "model.layers.23.block_sparse_moe.experts.228.w2", "model.layers.23.block_sparse_moe.experts.229.w2", "model.layers.23.block_sparse_moe.experts.230.w2", "model.layers.23.block_sparse_moe.experts.231.w2", "model.layers.23.block_sparse_moe.experts.232.w2", "model.layers.23.block_sparse_moe.experts.233.w2", "model.layers.23.block_sparse_moe.experts.234.w2", "model.layers.23.block_sparse_moe.experts.235.w2", "model.layers.23.block_sparse_moe.experts.236.w2", "model.layers.23.block_sparse_moe.experts.237.w2", "model.layers.23.block_sparse_moe.experts.238.w2", "model.layers.23.block_sparse_moe.experts.239.w2", "model.layers.23.block_sparse_moe.experts.240.w2", "model.layers.23.block_sparse_moe.experts.241.w2", "model.layers.23.block_sparse_moe.experts.242.w2", "model.layers.23.block_sparse_moe.experts.243.w2", "model.layers.23.block_sparse_moe.experts.244.w2", "model.layers.23.block_sparse_moe.experts.245.w2", "model.layers.23.block_sparse_moe.experts.246.w2", "model.layers.23.block_sparse_moe.experts.247.w2", "model.layers.23.block_sparse_moe.experts.248.w2", "model.layers.23.block_sparse_moe.experts.249.w2", "model.layers.23.block_sparse_moe.experts.250.w2", "model.layers.23.block_sparse_moe.experts.251.w2", "model.layers.23.block_sparse_moe.experts.252.w2", "model.layers.23.block_sparse_moe.experts.253.w2", "model.layers.23.block_sparse_moe.experts.254.w2", "model.layers.23.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0010048486292362213, "dbits": 1207959552 } ] }, { "idx": 120, "layers": [ "model.layers.24.self_attn.q_proj" ], "candidates": [ { "dkld": -0.002053121477365527, "dbits": 18874368 } ] }, { "idx": 121, "layers": [ "model.layers.24.self_attn.k_proj", "model.layers.24.self_attn.v_proj" ], "candidates": [ { "dkld": 0.002538493648171436, "dbits": 6291456 } ] }, { "idx": 122, "layers": [ "model.layers.24.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0074089601635933255, "dbits": 18874368 } ] }, { "idx": 123, "layers": [ "model.layers.24.block_sparse_moe.experts.0.w1", "model.layers.24.block_sparse_moe.experts.1.w1", "model.layers.24.block_sparse_moe.experts.2.w1", "model.layers.24.block_sparse_moe.experts.3.w1", "model.layers.24.block_sparse_moe.experts.4.w1", "model.layers.24.block_sparse_moe.experts.5.w1", "model.layers.24.block_sparse_moe.experts.6.w1", "model.layers.24.block_sparse_moe.experts.7.w1", "model.layers.24.block_sparse_moe.experts.8.w1", "model.layers.24.block_sparse_moe.experts.9.w1", "model.layers.24.block_sparse_moe.experts.10.w1", "model.layers.24.block_sparse_moe.experts.11.w1", "model.layers.24.block_sparse_moe.experts.12.w1", "model.layers.24.block_sparse_moe.experts.13.w1", "model.layers.24.block_sparse_moe.experts.14.w1", "model.layers.24.block_sparse_moe.experts.15.w1", "model.layers.24.block_sparse_moe.experts.16.w1", "model.layers.24.block_sparse_moe.experts.17.w1", "model.layers.24.block_sparse_moe.experts.18.w1", "model.layers.24.block_sparse_moe.experts.19.w1", "model.layers.24.block_sparse_moe.experts.20.w1", "model.layers.24.block_sparse_moe.experts.21.w1", "model.layers.24.block_sparse_moe.experts.22.w1", "model.layers.24.block_sparse_moe.experts.23.w1", "model.layers.24.block_sparse_moe.experts.24.w1", "model.layers.24.block_sparse_moe.experts.25.w1", "model.layers.24.block_sparse_moe.experts.26.w1", "model.layers.24.block_sparse_moe.experts.27.w1", "model.layers.24.block_sparse_moe.experts.28.w1", "model.layers.24.block_sparse_moe.experts.29.w1", "model.layers.24.block_sparse_moe.experts.30.w1", "model.layers.24.block_sparse_moe.experts.31.w1", "model.layers.24.block_sparse_moe.experts.32.w1", "model.layers.24.block_sparse_moe.experts.33.w1", "model.layers.24.block_sparse_moe.experts.34.w1", "model.layers.24.block_sparse_moe.experts.35.w1", "model.layers.24.block_sparse_moe.experts.36.w1", "model.layers.24.block_sparse_moe.experts.37.w1", "model.layers.24.block_sparse_moe.experts.38.w1", "model.layers.24.block_sparse_moe.experts.39.w1", "model.layers.24.block_sparse_moe.experts.40.w1", "model.layers.24.block_sparse_moe.experts.41.w1", "model.layers.24.block_sparse_moe.experts.42.w1", "model.layers.24.block_sparse_moe.experts.43.w1", "model.layers.24.block_sparse_moe.experts.44.w1", "model.layers.24.block_sparse_moe.experts.45.w1", "model.layers.24.block_sparse_moe.experts.46.w1", "model.layers.24.block_sparse_moe.experts.47.w1", "model.layers.24.block_sparse_moe.experts.48.w1", "model.layers.24.block_sparse_moe.experts.49.w1", "model.layers.24.block_sparse_moe.experts.50.w1", "model.layers.24.block_sparse_moe.experts.51.w1", "model.layers.24.block_sparse_moe.experts.52.w1", "model.layers.24.block_sparse_moe.experts.53.w1", "model.layers.24.block_sparse_moe.experts.54.w1", "model.layers.24.block_sparse_moe.experts.55.w1", "model.layers.24.block_sparse_moe.experts.56.w1", "model.layers.24.block_sparse_moe.experts.57.w1", "model.layers.24.block_sparse_moe.experts.58.w1", "model.layers.24.block_sparse_moe.experts.59.w1", "model.layers.24.block_sparse_moe.experts.60.w1", "model.layers.24.block_sparse_moe.experts.61.w1", "model.layers.24.block_sparse_moe.experts.62.w1", "model.layers.24.block_sparse_moe.experts.63.w1", "model.layers.24.block_sparse_moe.experts.64.w1", "model.layers.24.block_sparse_moe.experts.65.w1", "model.layers.24.block_sparse_moe.experts.66.w1", "model.layers.24.block_sparse_moe.experts.67.w1", "model.layers.24.block_sparse_moe.experts.68.w1", "model.layers.24.block_sparse_moe.experts.69.w1", "model.layers.24.block_sparse_moe.experts.70.w1", "model.layers.24.block_sparse_moe.experts.71.w1", "model.layers.24.block_sparse_moe.experts.72.w1", "model.layers.24.block_sparse_moe.experts.73.w1", "model.layers.24.block_sparse_moe.experts.74.w1", "model.layers.24.block_sparse_moe.experts.75.w1", "model.layers.24.block_sparse_moe.experts.76.w1", "model.layers.24.block_sparse_moe.experts.77.w1", "model.layers.24.block_sparse_moe.experts.78.w1", "model.layers.24.block_sparse_moe.experts.79.w1", "model.layers.24.block_sparse_moe.experts.80.w1", "model.layers.24.block_sparse_moe.experts.81.w1", "model.layers.24.block_sparse_moe.experts.82.w1", "model.layers.24.block_sparse_moe.experts.83.w1", "model.layers.24.block_sparse_moe.experts.84.w1", "model.layers.24.block_sparse_moe.experts.85.w1", "model.layers.24.block_sparse_moe.experts.86.w1", "model.layers.24.block_sparse_moe.experts.87.w1", "model.layers.24.block_sparse_moe.experts.88.w1", "model.layers.24.block_sparse_moe.experts.89.w1", "model.layers.24.block_sparse_moe.experts.90.w1", "model.layers.24.block_sparse_moe.experts.91.w1", "model.layers.24.block_sparse_moe.experts.92.w1", "model.layers.24.block_sparse_moe.experts.93.w1", "model.layers.24.block_sparse_moe.experts.94.w1", "model.layers.24.block_sparse_moe.experts.95.w1", "model.layers.24.block_sparse_moe.experts.96.w1", "model.layers.24.block_sparse_moe.experts.97.w1", "model.layers.24.block_sparse_moe.experts.98.w1", "model.layers.24.block_sparse_moe.experts.99.w1", "model.layers.24.block_sparse_moe.experts.100.w1", "model.layers.24.block_sparse_moe.experts.101.w1", "model.layers.24.block_sparse_moe.experts.102.w1", "model.layers.24.block_sparse_moe.experts.103.w1", "model.layers.24.block_sparse_moe.experts.104.w1", "model.layers.24.block_sparse_moe.experts.105.w1", "model.layers.24.block_sparse_moe.experts.106.w1", "model.layers.24.block_sparse_moe.experts.107.w1", "model.layers.24.block_sparse_moe.experts.108.w1", "model.layers.24.block_sparse_moe.experts.109.w1", "model.layers.24.block_sparse_moe.experts.110.w1", "model.layers.24.block_sparse_moe.experts.111.w1", "model.layers.24.block_sparse_moe.experts.112.w1", "model.layers.24.block_sparse_moe.experts.113.w1", "model.layers.24.block_sparse_moe.experts.114.w1", "model.layers.24.block_sparse_moe.experts.115.w1", "model.layers.24.block_sparse_moe.experts.116.w1", "model.layers.24.block_sparse_moe.experts.117.w1", "model.layers.24.block_sparse_moe.experts.118.w1", "model.layers.24.block_sparse_moe.experts.119.w1", "model.layers.24.block_sparse_moe.experts.120.w1", "model.layers.24.block_sparse_moe.experts.121.w1", "model.layers.24.block_sparse_moe.experts.122.w1", "model.layers.24.block_sparse_moe.experts.123.w1", "model.layers.24.block_sparse_moe.experts.124.w1", "model.layers.24.block_sparse_moe.experts.125.w1", "model.layers.24.block_sparse_moe.experts.126.w1", "model.layers.24.block_sparse_moe.experts.127.w1", "model.layers.24.block_sparse_moe.experts.128.w1", "model.layers.24.block_sparse_moe.experts.129.w1", "model.layers.24.block_sparse_moe.experts.130.w1", "model.layers.24.block_sparse_moe.experts.131.w1", "model.layers.24.block_sparse_moe.experts.132.w1", "model.layers.24.block_sparse_moe.experts.133.w1", "model.layers.24.block_sparse_moe.experts.134.w1", "model.layers.24.block_sparse_moe.experts.135.w1", "model.layers.24.block_sparse_moe.experts.136.w1", "model.layers.24.block_sparse_moe.experts.137.w1", "model.layers.24.block_sparse_moe.experts.138.w1", "model.layers.24.block_sparse_moe.experts.139.w1", "model.layers.24.block_sparse_moe.experts.140.w1", "model.layers.24.block_sparse_moe.experts.141.w1", "model.layers.24.block_sparse_moe.experts.142.w1", "model.layers.24.block_sparse_moe.experts.143.w1", "model.layers.24.block_sparse_moe.experts.144.w1", "model.layers.24.block_sparse_moe.experts.145.w1", "model.layers.24.block_sparse_moe.experts.146.w1", "model.layers.24.block_sparse_moe.experts.147.w1", "model.layers.24.block_sparse_moe.experts.148.w1", "model.layers.24.block_sparse_moe.experts.149.w1", "model.layers.24.block_sparse_moe.experts.150.w1", "model.layers.24.block_sparse_moe.experts.151.w1", "model.layers.24.block_sparse_moe.experts.152.w1", "model.layers.24.block_sparse_moe.experts.153.w1", "model.layers.24.block_sparse_moe.experts.154.w1", "model.layers.24.block_sparse_moe.experts.155.w1", "model.layers.24.block_sparse_moe.experts.156.w1", "model.layers.24.block_sparse_moe.experts.157.w1", "model.layers.24.block_sparse_moe.experts.158.w1", "model.layers.24.block_sparse_moe.experts.159.w1", "model.layers.24.block_sparse_moe.experts.160.w1", "model.layers.24.block_sparse_moe.experts.161.w1", "model.layers.24.block_sparse_moe.experts.162.w1", "model.layers.24.block_sparse_moe.experts.163.w1", "model.layers.24.block_sparse_moe.experts.164.w1", "model.layers.24.block_sparse_moe.experts.165.w1", "model.layers.24.block_sparse_moe.experts.166.w1", "model.layers.24.block_sparse_moe.experts.167.w1", "model.layers.24.block_sparse_moe.experts.168.w1", "model.layers.24.block_sparse_moe.experts.169.w1", "model.layers.24.block_sparse_moe.experts.170.w1", "model.layers.24.block_sparse_moe.experts.171.w1", "model.layers.24.block_sparse_moe.experts.172.w1", "model.layers.24.block_sparse_moe.experts.173.w1", "model.layers.24.block_sparse_moe.experts.174.w1", "model.layers.24.block_sparse_moe.experts.175.w1", "model.layers.24.block_sparse_moe.experts.176.w1", "model.layers.24.block_sparse_moe.experts.177.w1", "model.layers.24.block_sparse_moe.experts.178.w1", "model.layers.24.block_sparse_moe.experts.179.w1", "model.layers.24.block_sparse_moe.experts.180.w1", "model.layers.24.block_sparse_moe.experts.181.w1", "model.layers.24.block_sparse_moe.experts.182.w1", "model.layers.24.block_sparse_moe.experts.183.w1", "model.layers.24.block_sparse_moe.experts.184.w1", "model.layers.24.block_sparse_moe.experts.185.w1", "model.layers.24.block_sparse_moe.experts.186.w1", "model.layers.24.block_sparse_moe.experts.187.w1", "model.layers.24.block_sparse_moe.experts.188.w1", "model.layers.24.block_sparse_moe.experts.189.w1", "model.layers.24.block_sparse_moe.experts.190.w1", "model.layers.24.block_sparse_moe.experts.191.w1", "model.layers.24.block_sparse_moe.experts.192.w1", "model.layers.24.block_sparse_moe.experts.193.w1", "model.layers.24.block_sparse_moe.experts.194.w1", "model.layers.24.block_sparse_moe.experts.195.w1", "model.layers.24.block_sparse_moe.experts.196.w1", "model.layers.24.block_sparse_moe.experts.197.w1", "model.layers.24.block_sparse_moe.experts.198.w1", "model.layers.24.block_sparse_moe.experts.199.w1", "model.layers.24.block_sparse_moe.experts.200.w1", "model.layers.24.block_sparse_moe.experts.201.w1", "model.layers.24.block_sparse_moe.experts.202.w1", "model.layers.24.block_sparse_moe.experts.203.w1", "model.layers.24.block_sparse_moe.experts.204.w1", "model.layers.24.block_sparse_moe.experts.205.w1", "model.layers.24.block_sparse_moe.experts.206.w1", "model.layers.24.block_sparse_moe.experts.207.w1", "model.layers.24.block_sparse_moe.experts.208.w1", "model.layers.24.block_sparse_moe.experts.209.w1", "model.layers.24.block_sparse_moe.experts.210.w1", "model.layers.24.block_sparse_moe.experts.211.w1", "model.layers.24.block_sparse_moe.experts.212.w1", "model.layers.24.block_sparse_moe.experts.213.w1", "model.layers.24.block_sparse_moe.experts.214.w1", "model.layers.24.block_sparse_moe.experts.215.w1", "model.layers.24.block_sparse_moe.experts.216.w1", "model.layers.24.block_sparse_moe.experts.217.w1", "model.layers.24.block_sparse_moe.experts.218.w1", "model.layers.24.block_sparse_moe.experts.219.w1", "model.layers.24.block_sparse_moe.experts.220.w1", "model.layers.24.block_sparse_moe.experts.221.w1", "model.layers.24.block_sparse_moe.experts.222.w1", "model.layers.24.block_sparse_moe.experts.223.w1", "model.layers.24.block_sparse_moe.experts.224.w1", "model.layers.24.block_sparse_moe.experts.225.w1", "model.layers.24.block_sparse_moe.experts.226.w1", "model.layers.24.block_sparse_moe.experts.227.w1", "model.layers.24.block_sparse_moe.experts.228.w1", "model.layers.24.block_sparse_moe.experts.229.w1", "model.layers.24.block_sparse_moe.experts.230.w1", "model.layers.24.block_sparse_moe.experts.231.w1", "model.layers.24.block_sparse_moe.experts.232.w1", "model.layers.24.block_sparse_moe.experts.233.w1", "model.layers.24.block_sparse_moe.experts.234.w1", "model.layers.24.block_sparse_moe.experts.235.w1", "model.layers.24.block_sparse_moe.experts.236.w1", "model.layers.24.block_sparse_moe.experts.237.w1", "model.layers.24.block_sparse_moe.experts.238.w1", "model.layers.24.block_sparse_moe.experts.239.w1", "model.layers.24.block_sparse_moe.experts.240.w1", "model.layers.24.block_sparse_moe.experts.241.w1", "model.layers.24.block_sparse_moe.experts.242.w1", "model.layers.24.block_sparse_moe.experts.243.w1", "model.layers.24.block_sparse_moe.experts.244.w1", "model.layers.24.block_sparse_moe.experts.245.w1", "model.layers.24.block_sparse_moe.experts.246.w1", "model.layers.24.block_sparse_moe.experts.247.w1", "model.layers.24.block_sparse_moe.experts.248.w1", "model.layers.24.block_sparse_moe.experts.249.w1", "model.layers.24.block_sparse_moe.experts.250.w1", "model.layers.24.block_sparse_moe.experts.251.w1", "model.layers.24.block_sparse_moe.experts.252.w1", "model.layers.24.block_sparse_moe.experts.253.w1", "model.layers.24.block_sparse_moe.experts.254.w1", "model.layers.24.block_sparse_moe.experts.255.w1", "model.layers.24.block_sparse_moe.experts.0.w3", "model.layers.24.block_sparse_moe.experts.1.w3", "model.layers.24.block_sparse_moe.experts.2.w3", "model.layers.24.block_sparse_moe.experts.3.w3", "model.layers.24.block_sparse_moe.experts.4.w3", "model.layers.24.block_sparse_moe.experts.5.w3", "model.layers.24.block_sparse_moe.experts.6.w3", "model.layers.24.block_sparse_moe.experts.7.w3", "model.layers.24.block_sparse_moe.experts.8.w3", "model.layers.24.block_sparse_moe.experts.9.w3", "model.layers.24.block_sparse_moe.experts.10.w3", "model.layers.24.block_sparse_moe.experts.11.w3", "model.layers.24.block_sparse_moe.experts.12.w3", "model.layers.24.block_sparse_moe.experts.13.w3", "model.layers.24.block_sparse_moe.experts.14.w3", "model.layers.24.block_sparse_moe.experts.15.w3", "model.layers.24.block_sparse_moe.experts.16.w3", "model.layers.24.block_sparse_moe.experts.17.w3", "model.layers.24.block_sparse_moe.experts.18.w3", "model.layers.24.block_sparse_moe.experts.19.w3", "model.layers.24.block_sparse_moe.experts.20.w3", "model.layers.24.block_sparse_moe.experts.21.w3", "model.layers.24.block_sparse_moe.experts.22.w3", "model.layers.24.block_sparse_moe.experts.23.w3", "model.layers.24.block_sparse_moe.experts.24.w3", "model.layers.24.block_sparse_moe.experts.25.w3", "model.layers.24.block_sparse_moe.experts.26.w3", "model.layers.24.block_sparse_moe.experts.27.w3", "model.layers.24.block_sparse_moe.experts.28.w3", "model.layers.24.block_sparse_moe.experts.29.w3", "model.layers.24.block_sparse_moe.experts.30.w3", "model.layers.24.block_sparse_moe.experts.31.w3", "model.layers.24.block_sparse_moe.experts.32.w3", "model.layers.24.block_sparse_moe.experts.33.w3", "model.layers.24.block_sparse_moe.experts.34.w3", "model.layers.24.block_sparse_moe.experts.35.w3", "model.layers.24.block_sparse_moe.experts.36.w3", "model.layers.24.block_sparse_moe.experts.37.w3", "model.layers.24.block_sparse_moe.experts.38.w3", "model.layers.24.block_sparse_moe.experts.39.w3", "model.layers.24.block_sparse_moe.experts.40.w3", "model.layers.24.block_sparse_moe.experts.41.w3", "model.layers.24.block_sparse_moe.experts.42.w3", "model.layers.24.block_sparse_moe.experts.43.w3", "model.layers.24.block_sparse_moe.experts.44.w3", "model.layers.24.block_sparse_moe.experts.45.w3", "model.layers.24.block_sparse_moe.experts.46.w3", "model.layers.24.block_sparse_moe.experts.47.w3", "model.layers.24.block_sparse_moe.experts.48.w3", "model.layers.24.block_sparse_moe.experts.49.w3", "model.layers.24.block_sparse_moe.experts.50.w3", "model.layers.24.block_sparse_moe.experts.51.w3", "model.layers.24.block_sparse_moe.experts.52.w3", "model.layers.24.block_sparse_moe.experts.53.w3", "model.layers.24.block_sparse_moe.experts.54.w3", "model.layers.24.block_sparse_moe.experts.55.w3", "model.layers.24.block_sparse_moe.experts.56.w3", "model.layers.24.block_sparse_moe.experts.57.w3", "model.layers.24.block_sparse_moe.experts.58.w3", "model.layers.24.block_sparse_moe.experts.59.w3", "model.layers.24.block_sparse_moe.experts.60.w3", "model.layers.24.block_sparse_moe.experts.61.w3", "model.layers.24.block_sparse_moe.experts.62.w3", "model.layers.24.block_sparse_moe.experts.63.w3", "model.layers.24.block_sparse_moe.experts.64.w3", "model.layers.24.block_sparse_moe.experts.65.w3", "model.layers.24.block_sparse_moe.experts.66.w3", "model.layers.24.block_sparse_moe.experts.67.w3", "model.layers.24.block_sparse_moe.experts.68.w3", "model.layers.24.block_sparse_moe.experts.69.w3", "model.layers.24.block_sparse_moe.experts.70.w3", "model.layers.24.block_sparse_moe.experts.71.w3", "model.layers.24.block_sparse_moe.experts.72.w3", "model.layers.24.block_sparse_moe.experts.73.w3", "model.layers.24.block_sparse_moe.experts.74.w3", "model.layers.24.block_sparse_moe.experts.75.w3", "model.layers.24.block_sparse_moe.experts.76.w3", "model.layers.24.block_sparse_moe.experts.77.w3", "model.layers.24.block_sparse_moe.experts.78.w3", "model.layers.24.block_sparse_moe.experts.79.w3", "model.layers.24.block_sparse_moe.experts.80.w3", "model.layers.24.block_sparse_moe.experts.81.w3", "model.layers.24.block_sparse_moe.experts.82.w3", "model.layers.24.block_sparse_moe.experts.83.w3", "model.layers.24.block_sparse_moe.experts.84.w3", "model.layers.24.block_sparse_moe.experts.85.w3", "model.layers.24.block_sparse_moe.experts.86.w3", "model.layers.24.block_sparse_moe.experts.87.w3", "model.layers.24.block_sparse_moe.experts.88.w3", "model.layers.24.block_sparse_moe.experts.89.w3", "model.layers.24.block_sparse_moe.experts.90.w3", "model.layers.24.block_sparse_moe.experts.91.w3", "model.layers.24.block_sparse_moe.experts.92.w3", "model.layers.24.block_sparse_moe.experts.93.w3", "model.layers.24.block_sparse_moe.experts.94.w3", "model.layers.24.block_sparse_moe.experts.95.w3", "model.layers.24.block_sparse_moe.experts.96.w3", "model.layers.24.block_sparse_moe.experts.97.w3", "model.layers.24.block_sparse_moe.experts.98.w3", "model.layers.24.block_sparse_moe.experts.99.w3", "model.layers.24.block_sparse_moe.experts.100.w3", "model.layers.24.block_sparse_moe.experts.101.w3", "model.layers.24.block_sparse_moe.experts.102.w3", "model.layers.24.block_sparse_moe.experts.103.w3", "model.layers.24.block_sparse_moe.experts.104.w3", "model.layers.24.block_sparse_moe.experts.105.w3", "model.layers.24.block_sparse_moe.experts.106.w3", "model.layers.24.block_sparse_moe.experts.107.w3", "model.layers.24.block_sparse_moe.experts.108.w3", "model.layers.24.block_sparse_moe.experts.109.w3", "model.layers.24.block_sparse_moe.experts.110.w3", "model.layers.24.block_sparse_moe.experts.111.w3", "model.layers.24.block_sparse_moe.experts.112.w3", "model.layers.24.block_sparse_moe.experts.113.w3", "model.layers.24.block_sparse_moe.experts.114.w3", "model.layers.24.block_sparse_moe.experts.115.w3", "model.layers.24.block_sparse_moe.experts.116.w3", "model.layers.24.block_sparse_moe.experts.117.w3", "model.layers.24.block_sparse_moe.experts.118.w3", "model.layers.24.block_sparse_moe.experts.119.w3", "model.layers.24.block_sparse_moe.experts.120.w3", "model.layers.24.block_sparse_moe.experts.121.w3", "model.layers.24.block_sparse_moe.experts.122.w3", "model.layers.24.block_sparse_moe.experts.123.w3", "model.layers.24.block_sparse_moe.experts.124.w3", "model.layers.24.block_sparse_moe.experts.125.w3", "model.layers.24.block_sparse_moe.experts.126.w3", "model.layers.24.block_sparse_moe.experts.127.w3", "model.layers.24.block_sparse_moe.experts.128.w3", "model.layers.24.block_sparse_moe.experts.129.w3", "model.layers.24.block_sparse_moe.experts.130.w3", "model.layers.24.block_sparse_moe.experts.131.w3", "model.layers.24.block_sparse_moe.experts.132.w3", "model.layers.24.block_sparse_moe.experts.133.w3", "model.layers.24.block_sparse_moe.experts.134.w3", "model.layers.24.block_sparse_moe.experts.135.w3", "model.layers.24.block_sparse_moe.experts.136.w3", "model.layers.24.block_sparse_moe.experts.137.w3", "model.layers.24.block_sparse_moe.experts.138.w3", "model.layers.24.block_sparse_moe.experts.139.w3", "model.layers.24.block_sparse_moe.experts.140.w3", "model.layers.24.block_sparse_moe.experts.141.w3", "model.layers.24.block_sparse_moe.experts.142.w3", "model.layers.24.block_sparse_moe.experts.143.w3", "model.layers.24.block_sparse_moe.experts.144.w3", "model.layers.24.block_sparse_moe.experts.145.w3", "model.layers.24.block_sparse_moe.experts.146.w3", "model.layers.24.block_sparse_moe.experts.147.w3", "model.layers.24.block_sparse_moe.experts.148.w3", "model.layers.24.block_sparse_moe.experts.149.w3", "model.layers.24.block_sparse_moe.experts.150.w3", "model.layers.24.block_sparse_moe.experts.151.w3", "model.layers.24.block_sparse_moe.experts.152.w3", "model.layers.24.block_sparse_moe.experts.153.w3", "model.layers.24.block_sparse_moe.experts.154.w3", "model.layers.24.block_sparse_moe.experts.155.w3", "model.layers.24.block_sparse_moe.experts.156.w3", "model.layers.24.block_sparse_moe.experts.157.w3", "model.layers.24.block_sparse_moe.experts.158.w3", "model.layers.24.block_sparse_moe.experts.159.w3", "model.layers.24.block_sparse_moe.experts.160.w3", "model.layers.24.block_sparse_moe.experts.161.w3", "model.layers.24.block_sparse_moe.experts.162.w3", "model.layers.24.block_sparse_moe.experts.163.w3", "model.layers.24.block_sparse_moe.experts.164.w3", "model.layers.24.block_sparse_moe.experts.165.w3", "model.layers.24.block_sparse_moe.experts.166.w3", "model.layers.24.block_sparse_moe.experts.167.w3", "model.layers.24.block_sparse_moe.experts.168.w3", "model.layers.24.block_sparse_moe.experts.169.w3", "model.layers.24.block_sparse_moe.experts.170.w3", "model.layers.24.block_sparse_moe.experts.171.w3", "model.layers.24.block_sparse_moe.experts.172.w3", "model.layers.24.block_sparse_moe.experts.173.w3", "model.layers.24.block_sparse_moe.experts.174.w3", "model.layers.24.block_sparse_moe.experts.175.w3", "model.layers.24.block_sparse_moe.experts.176.w3", "model.layers.24.block_sparse_moe.experts.177.w3", "model.layers.24.block_sparse_moe.experts.178.w3", "model.layers.24.block_sparse_moe.experts.179.w3", "model.layers.24.block_sparse_moe.experts.180.w3", "model.layers.24.block_sparse_moe.experts.181.w3", "model.layers.24.block_sparse_moe.experts.182.w3", "model.layers.24.block_sparse_moe.experts.183.w3", "model.layers.24.block_sparse_moe.experts.184.w3", "model.layers.24.block_sparse_moe.experts.185.w3", "model.layers.24.block_sparse_moe.experts.186.w3", "model.layers.24.block_sparse_moe.experts.187.w3", "model.layers.24.block_sparse_moe.experts.188.w3", "model.layers.24.block_sparse_moe.experts.189.w3", "model.layers.24.block_sparse_moe.experts.190.w3", "model.layers.24.block_sparse_moe.experts.191.w3", "model.layers.24.block_sparse_moe.experts.192.w3", "model.layers.24.block_sparse_moe.experts.193.w3", "model.layers.24.block_sparse_moe.experts.194.w3", "model.layers.24.block_sparse_moe.experts.195.w3", "model.layers.24.block_sparse_moe.experts.196.w3", "model.layers.24.block_sparse_moe.experts.197.w3", "model.layers.24.block_sparse_moe.experts.198.w3", "model.layers.24.block_sparse_moe.experts.199.w3", "model.layers.24.block_sparse_moe.experts.200.w3", "model.layers.24.block_sparse_moe.experts.201.w3", "model.layers.24.block_sparse_moe.experts.202.w3", "model.layers.24.block_sparse_moe.experts.203.w3", "model.layers.24.block_sparse_moe.experts.204.w3", "model.layers.24.block_sparse_moe.experts.205.w3", "model.layers.24.block_sparse_moe.experts.206.w3", "model.layers.24.block_sparse_moe.experts.207.w3", "model.layers.24.block_sparse_moe.experts.208.w3", "model.layers.24.block_sparse_moe.experts.209.w3", "model.layers.24.block_sparse_moe.experts.210.w3", "model.layers.24.block_sparse_moe.experts.211.w3", "model.layers.24.block_sparse_moe.experts.212.w3", "model.layers.24.block_sparse_moe.experts.213.w3", "model.layers.24.block_sparse_moe.experts.214.w3", "model.layers.24.block_sparse_moe.experts.215.w3", "model.layers.24.block_sparse_moe.experts.216.w3", "model.layers.24.block_sparse_moe.experts.217.w3", "model.layers.24.block_sparse_moe.experts.218.w3", "model.layers.24.block_sparse_moe.experts.219.w3", "model.layers.24.block_sparse_moe.experts.220.w3", "model.layers.24.block_sparse_moe.experts.221.w3", "model.layers.24.block_sparse_moe.experts.222.w3", "model.layers.24.block_sparse_moe.experts.223.w3", "model.layers.24.block_sparse_moe.experts.224.w3", "model.layers.24.block_sparse_moe.experts.225.w3", "model.layers.24.block_sparse_moe.experts.226.w3", "model.layers.24.block_sparse_moe.experts.227.w3", "model.layers.24.block_sparse_moe.experts.228.w3", "model.layers.24.block_sparse_moe.experts.229.w3", "model.layers.24.block_sparse_moe.experts.230.w3", "model.layers.24.block_sparse_moe.experts.231.w3", "model.layers.24.block_sparse_moe.experts.232.w3", "model.layers.24.block_sparse_moe.experts.233.w3", "model.layers.24.block_sparse_moe.experts.234.w3", "model.layers.24.block_sparse_moe.experts.235.w3", "model.layers.24.block_sparse_moe.experts.236.w3", "model.layers.24.block_sparse_moe.experts.237.w3", "model.layers.24.block_sparse_moe.experts.238.w3", "model.layers.24.block_sparse_moe.experts.239.w3", "model.layers.24.block_sparse_moe.experts.240.w3", "model.layers.24.block_sparse_moe.experts.241.w3", "model.layers.24.block_sparse_moe.experts.242.w3", "model.layers.24.block_sparse_moe.experts.243.w3", "model.layers.24.block_sparse_moe.experts.244.w3", "model.layers.24.block_sparse_moe.experts.245.w3", "model.layers.24.block_sparse_moe.experts.246.w3", "model.layers.24.block_sparse_moe.experts.247.w3", "model.layers.24.block_sparse_moe.experts.248.w3", "model.layers.24.block_sparse_moe.experts.249.w3", "model.layers.24.block_sparse_moe.experts.250.w3", "model.layers.24.block_sparse_moe.experts.251.w3", "model.layers.24.block_sparse_moe.experts.252.w3", "model.layers.24.block_sparse_moe.experts.253.w3", "model.layers.24.block_sparse_moe.experts.254.w3", "model.layers.24.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0013961300253867992, "dbits": 2415919104 } ] }, { "idx": 124, "layers": [ "model.layers.24.block_sparse_moe.experts.0.w2", "model.layers.24.block_sparse_moe.experts.1.w2", "model.layers.24.block_sparse_moe.experts.2.w2", "model.layers.24.block_sparse_moe.experts.3.w2", "model.layers.24.block_sparse_moe.experts.4.w2", "model.layers.24.block_sparse_moe.experts.5.w2", "model.layers.24.block_sparse_moe.experts.6.w2", "model.layers.24.block_sparse_moe.experts.7.w2", "model.layers.24.block_sparse_moe.experts.8.w2", "model.layers.24.block_sparse_moe.experts.9.w2", "model.layers.24.block_sparse_moe.experts.10.w2", "model.layers.24.block_sparse_moe.experts.11.w2", "model.layers.24.block_sparse_moe.experts.12.w2", "model.layers.24.block_sparse_moe.experts.13.w2", "model.layers.24.block_sparse_moe.experts.14.w2", "model.layers.24.block_sparse_moe.experts.15.w2", "model.layers.24.block_sparse_moe.experts.16.w2", "model.layers.24.block_sparse_moe.experts.17.w2", "model.layers.24.block_sparse_moe.experts.18.w2", "model.layers.24.block_sparse_moe.experts.19.w2", "model.layers.24.block_sparse_moe.experts.20.w2", "model.layers.24.block_sparse_moe.experts.21.w2", "model.layers.24.block_sparse_moe.experts.22.w2", "model.layers.24.block_sparse_moe.experts.23.w2", "model.layers.24.block_sparse_moe.experts.24.w2", "model.layers.24.block_sparse_moe.experts.25.w2", "model.layers.24.block_sparse_moe.experts.26.w2", "model.layers.24.block_sparse_moe.experts.27.w2", "model.layers.24.block_sparse_moe.experts.28.w2", "model.layers.24.block_sparse_moe.experts.29.w2", "model.layers.24.block_sparse_moe.experts.30.w2", "model.layers.24.block_sparse_moe.experts.31.w2", "model.layers.24.block_sparse_moe.experts.32.w2", "model.layers.24.block_sparse_moe.experts.33.w2", "model.layers.24.block_sparse_moe.experts.34.w2", "model.layers.24.block_sparse_moe.experts.35.w2", "model.layers.24.block_sparse_moe.experts.36.w2", "model.layers.24.block_sparse_moe.experts.37.w2", "model.layers.24.block_sparse_moe.experts.38.w2", "model.layers.24.block_sparse_moe.experts.39.w2", "model.layers.24.block_sparse_moe.experts.40.w2", "model.layers.24.block_sparse_moe.experts.41.w2", "model.layers.24.block_sparse_moe.experts.42.w2", "model.layers.24.block_sparse_moe.experts.43.w2", "model.layers.24.block_sparse_moe.experts.44.w2", "model.layers.24.block_sparse_moe.experts.45.w2", "model.layers.24.block_sparse_moe.experts.46.w2", "model.layers.24.block_sparse_moe.experts.47.w2", "model.layers.24.block_sparse_moe.experts.48.w2", "model.layers.24.block_sparse_moe.experts.49.w2", "model.layers.24.block_sparse_moe.experts.50.w2", "model.layers.24.block_sparse_moe.experts.51.w2", "model.layers.24.block_sparse_moe.experts.52.w2", "model.layers.24.block_sparse_moe.experts.53.w2", "model.layers.24.block_sparse_moe.experts.54.w2", "model.layers.24.block_sparse_moe.experts.55.w2", "model.layers.24.block_sparse_moe.experts.56.w2", "model.layers.24.block_sparse_moe.experts.57.w2", "model.layers.24.block_sparse_moe.experts.58.w2", "model.layers.24.block_sparse_moe.experts.59.w2", "model.layers.24.block_sparse_moe.experts.60.w2", "model.layers.24.block_sparse_moe.experts.61.w2", "model.layers.24.block_sparse_moe.experts.62.w2", "model.layers.24.block_sparse_moe.experts.63.w2", "model.layers.24.block_sparse_moe.experts.64.w2", "model.layers.24.block_sparse_moe.experts.65.w2", "model.layers.24.block_sparse_moe.experts.66.w2", "model.layers.24.block_sparse_moe.experts.67.w2", "model.layers.24.block_sparse_moe.experts.68.w2", "model.layers.24.block_sparse_moe.experts.69.w2", "model.layers.24.block_sparse_moe.experts.70.w2", "model.layers.24.block_sparse_moe.experts.71.w2", "model.layers.24.block_sparse_moe.experts.72.w2", "model.layers.24.block_sparse_moe.experts.73.w2", "model.layers.24.block_sparse_moe.experts.74.w2", "model.layers.24.block_sparse_moe.experts.75.w2", "model.layers.24.block_sparse_moe.experts.76.w2", "model.layers.24.block_sparse_moe.experts.77.w2", "model.layers.24.block_sparse_moe.experts.78.w2", "model.layers.24.block_sparse_moe.experts.79.w2", "model.layers.24.block_sparse_moe.experts.80.w2", "model.layers.24.block_sparse_moe.experts.81.w2", "model.layers.24.block_sparse_moe.experts.82.w2", "model.layers.24.block_sparse_moe.experts.83.w2", "model.layers.24.block_sparse_moe.experts.84.w2", "model.layers.24.block_sparse_moe.experts.85.w2", "model.layers.24.block_sparse_moe.experts.86.w2", "model.layers.24.block_sparse_moe.experts.87.w2", "model.layers.24.block_sparse_moe.experts.88.w2", "model.layers.24.block_sparse_moe.experts.89.w2", "model.layers.24.block_sparse_moe.experts.90.w2", "model.layers.24.block_sparse_moe.experts.91.w2", "model.layers.24.block_sparse_moe.experts.92.w2", "model.layers.24.block_sparse_moe.experts.93.w2", "model.layers.24.block_sparse_moe.experts.94.w2", "model.layers.24.block_sparse_moe.experts.95.w2", "model.layers.24.block_sparse_moe.experts.96.w2", "model.layers.24.block_sparse_moe.experts.97.w2", "model.layers.24.block_sparse_moe.experts.98.w2", "model.layers.24.block_sparse_moe.experts.99.w2", "model.layers.24.block_sparse_moe.experts.100.w2", "model.layers.24.block_sparse_moe.experts.101.w2", "model.layers.24.block_sparse_moe.experts.102.w2", "model.layers.24.block_sparse_moe.experts.103.w2", "model.layers.24.block_sparse_moe.experts.104.w2", "model.layers.24.block_sparse_moe.experts.105.w2", "model.layers.24.block_sparse_moe.experts.106.w2", "model.layers.24.block_sparse_moe.experts.107.w2", "model.layers.24.block_sparse_moe.experts.108.w2", "model.layers.24.block_sparse_moe.experts.109.w2", "model.layers.24.block_sparse_moe.experts.110.w2", "model.layers.24.block_sparse_moe.experts.111.w2", "model.layers.24.block_sparse_moe.experts.112.w2", "model.layers.24.block_sparse_moe.experts.113.w2", "model.layers.24.block_sparse_moe.experts.114.w2", "model.layers.24.block_sparse_moe.experts.115.w2", "model.layers.24.block_sparse_moe.experts.116.w2", "model.layers.24.block_sparse_moe.experts.117.w2", "model.layers.24.block_sparse_moe.experts.118.w2", "model.layers.24.block_sparse_moe.experts.119.w2", "model.layers.24.block_sparse_moe.experts.120.w2", "model.layers.24.block_sparse_moe.experts.121.w2", "model.layers.24.block_sparse_moe.experts.122.w2", "model.layers.24.block_sparse_moe.experts.123.w2", "model.layers.24.block_sparse_moe.experts.124.w2", "model.layers.24.block_sparse_moe.experts.125.w2", "model.layers.24.block_sparse_moe.experts.126.w2", "model.layers.24.block_sparse_moe.experts.127.w2", "model.layers.24.block_sparse_moe.experts.128.w2", "model.layers.24.block_sparse_moe.experts.129.w2", "model.layers.24.block_sparse_moe.experts.130.w2", "model.layers.24.block_sparse_moe.experts.131.w2", "model.layers.24.block_sparse_moe.experts.132.w2", "model.layers.24.block_sparse_moe.experts.133.w2", "model.layers.24.block_sparse_moe.experts.134.w2", "model.layers.24.block_sparse_moe.experts.135.w2", "model.layers.24.block_sparse_moe.experts.136.w2", "model.layers.24.block_sparse_moe.experts.137.w2", "model.layers.24.block_sparse_moe.experts.138.w2", "model.layers.24.block_sparse_moe.experts.139.w2", "model.layers.24.block_sparse_moe.experts.140.w2", "model.layers.24.block_sparse_moe.experts.141.w2", "model.layers.24.block_sparse_moe.experts.142.w2", "model.layers.24.block_sparse_moe.experts.143.w2", "model.layers.24.block_sparse_moe.experts.144.w2", "model.layers.24.block_sparse_moe.experts.145.w2", "model.layers.24.block_sparse_moe.experts.146.w2", "model.layers.24.block_sparse_moe.experts.147.w2", "model.layers.24.block_sparse_moe.experts.148.w2", "model.layers.24.block_sparse_moe.experts.149.w2", "model.layers.24.block_sparse_moe.experts.150.w2", "model.layers.24.block_sparse_moe.experts.151.w2", "model.layers.24.block_sparse_moe.experts.152.w2", "model.layers.24.block_sparse_moe.experts.153.w2", "model.layers.24.block_sparse_moe.experts.154.w2", "model.layers.24.block_sparse_moe.experts.155.w2", "model.layers.24.block_sparse_moe.experts.156.w2", "model.layers.24.block_sparse_moe.experts.157.w2", "model.layers.24.block_sparse_moe.experts.158.w2", "model.layers.24.block_sparse_moe.experts.159.w2", "model.layers.24.block_sparse_moe.experts.160.w2", "model.layers.24.block_sparse_moe.experts.161.w2", "model.layers.24.block_sparse_moe.experts.162.w2", "model.layers.24.block_sparse_moe.experts.163.w2", "model.layers.24.block_sparse_moe.experts.164.w2", "model.layers.24.block_sparse_moe.experts.165.w2", "model.layers.24.block_sparse_moe.experts.166.w2", "model.layers.24.block_sparse_moe.experts.167.w2", "model.layers.24.block_sparse_moe.experts.168.w2", "model.layers.24.block_sparse_moe.experts.169.w2", "model.layers.24.block_sparse_moe.experts.170.w2", "model.layers.24.block_sparse_moe.experts.171.w2", "model.layers.24.block_sparse_moe.experts.172.w2", "model.layers.24.block_sparse_moe.experts.173.w2", "model.layers.24.block_sparse_moe.experts.174.w2", "model.layers.24.block_sparse_moe.experts.175.w2", "model.layers.24.block_sparse_moe.experts.176.w2", "model.layers.24.block_sparse_moe.experts.177.w2", "model.layers.24.block_sparse_moe.experts.178.w2", "model.layers.24.block_sparse_moe.experts.179.w2", "model.layers.24.block_sparse_moe.experts.180.w2", "model.layers.24.block_sparse_moe.experts.181.w2", "model.layers.24.block_sparse_moe.experts.182.w2", "model.layers.24.block_sparse_moe.experts.183.w2", "model.layers.24.block_sparse_moe.experts.184.w2", "model.layers.24.block_sparse_moe.experts.185.w2", "model.layers.24.block_sparse_moe.experts.186.w2", "model.layers.24.block_sparse_moe.experts.187.w2", "model.layers.24.block_sparse_moe.experts.188.w2", "model.layers.24.block_sparse_moe.experts.189.w2", "model.layers.24.block_sparse_moe.experts.190.w2", "model.layers.24.block_sparse_moe.experts.191.w2", "model.layers.24.block_sparse_moe.experts.192.w2", "model.layers.24.block_sparse_moe.experts.193.w2", "model.layers.24.block_sparse_moe.experts.194.w2", "model.layers.24.block_sparse_moe.experts.195.w2", "model.layers.24.block_sparse_moe.experts.196.w2", "model.layers.24.block_sparse_moe.experts.197.w2", "model.layers.24.block_sparse_moe.experts.198.w2", "model.layers.24.block_sparse_moe.experts.199.w2", "model.layers.24.block_sparse_moe.experts.200.w2", "model.layers.24.block_sparse_moe.experts.201.w2", "model.layers.24.block_sparse_moe.experts.202.w2", "model.layers.24.block_sparse_moe.experts.203.w2", "model.layers.24.block_sparse_moe.experts.204.w2", "model.layers.24.block_sparse_moe.experts.205.w2", "model.layers.24.block_sparse_moe.experts.206.w2", "model.layers.24.block_sparse_moe.experts.207.w2", "model.layers.24.block_sparse_moe.experts.208.w2", "model.layers.24.block_sparse_moe.experts.209.w2", "model.layers.24.block_sparse_moe.experts.210.w2", "model.layers.24.block_sparse_moe.experts.211.w2", "model.layers.24.block_sparse_moe.experts.212.w2", "model.layers.24.block_sparse_moe.experts.213.w2", "model.layers.24.block_sparse_moe.experts.214.w2", "model.layers.24.block_sparse_moe.experts.215.w2", "model.layers.24.block_sparse_moe.experts.216.w2", "model.layers.24.block_sparse_moe.experts.217.w2", "model.layers.24.block_sparse_moe.experts.218.w2", "model.layers.24.block_sparse_moe.experts.219.w2", "model.layers.24.block_sparse_moe.experts.220.w2", "model.layers.24.block_sparse_moe.experts.221.w2", "model.layers.24.block_sparse_moe.experts.222.w2", "model.layers.24.block_sparse_moe.experts.223.w2", "model.layers.24.block_sparse_moe.experts.224.w2", "model.layers.24.block_sparse_moe.experts.225.w2", "model.layers.24.block_sparse_moe.experts.226.w2", "model.layers.24.block_sparse_moe.experts.227.w2", "model.layers.24.block_sparse_moe.experts.228.w2", "model.layers.24.block_sparse_moe.experts.229.w2", "model.layers.24.block_sparse_moe.experts.230.w2", "model.layers.24.block_sparse_moe.experts.231.w2", "model.layers.24.block_sparse_moe.experts.232.w2", "model.layers.24.block_sparse_moe.experts.233.w2", "model.layers.24.block_sparse_moe.experts.234.w2", "model.layers.24.block_sparse_moe.experts.235.w2", "model.layers.24.block_sparse_moe.experts.236.w2", "model.layers.24.block_sparse_moe.experts.237.w2", "model.layers.24.block_sparse_moe.experts.238.w2", "model.layers.24.block_sparse_moe.experts.239.w2", "model.layers.24.block_sparse_moe.experts.240.w2", "model.layers.24.block_sparse_moe.experts.241.w2", "model.layers.24.block_sparse_moe.experts.242.w2", "model.layers.24.block_sparse_moe.experts.243.w2", "model.layers.24.block_sparse_moe.experts.244.w2", "model.layers.24.block_sparse_moe.experts.245.w2", "model.layers.24.block_sparse_moe.experts.246.w2", "model.layers.24.block_sparse_moe.experts.247.w2", "model.layers.24.block_sparse_moe.experts.248.w2", "model.layers.24.block_sparse_moe.experts.249.w2", "model.layers.24.block_sparse_moe.experts.250.w2", "model.layers.24.block_sparse_moe.experts.251.w2", "model.layers.24.block_sparse_moe.experts.252.w2", "model.layers.24.block_sparse_moe.experts.253.w2", "model.layers.24.block_sparse_moe.experts.254.w2", "model.layers.24.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00036087967455389336, "dbits": 1207959552 } ] }, { "idx": 125, "layers": [ "model.layers.25.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0017759628593921883, "dbits": 18874368 } ] }, { "idx": 126, "layers": [ "model.layers.25.self_attn.k_proj", "model.layers.25.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0002579718828201183, "dbits": 6291456 } ] }, { "idx": 127, "layers": [ "model.layers.25.self_attn.o_proj" ], "candidates": [ { "dkld": -0.003744507580995593, "dbits": 18874368 } ] }, { "idx": 128, "layers": [ "model.layers.25.block_sparse_moe.experts.0.w1", "model.layers.25.block_sparse_moe.experts.1.w1", "model.layers.25.block_sparse_moe.experts.2.w1", "model.layers.25.block_sparse_moe.experts.3.w1", "model.layers.25.block_sparse_moe.experts.4.w1", "model.layers.25.block_sparse_moe.experts.5.w1", "model.layers.25.block_sparse_moe.experts.6.w1", "model.layers.25.block_sparse_moe.experts.7.w1", "model.layers.25.block_sparse_moe.experts.8.w1", "model.layers.25.block_sparse_moe.experts.9.w1", "model.layers.25.block_sparse_moe.experts.10.w1", "model.layers.25.block_sparse_moe.experts.11.w1", "model.layers.25.block_sparse_moe.experts.12.w1", "model.layers.25.block_sparse_moe.experts.13.w1", "model.layers.25.block_sparse_moe.experts.14.w1", "model.layers.25.block_sparse_moe.experts.15.w1", "model.layers.25.block_sparse_moe.experts.16.w1", "model.layers.25.block_sparse_moe.experts.17.w1", "model.layers.25.block_sparse_moe.experts.18.w1", "model.layers.25.block_sparse_moe.experts.19.w1", "model.layers.25.block_sparse_moe.experts.20.w1", "model.layers.25.block_sparse_moe.experts.21.w1", "model.layers.25.block_sparse_moe.experts.22.w1", "model.layers.25.block_sparse_moe.experts.23.w1", "model.layers.25.block_sparse_moe.experts.24.w1", "model.layers.25.block_sparse_moe.experts.25.w1", "model.layers.25.block_sparse_moe.experts.26.w1", "model.layers.25.block_sparse_moe.experts.27.w1", "model.layers.25.block_sparse_moe.experts.28.w1", "model.layers.25.block_sparse_moe.experts.29.w1", "model.layers.25.block_sparse_moe.experts.30.w1", "model.layers.25.block_sparse_moe.experts.31.w1", "model.layers.25.block_sparse_moe.experts.32.w1", "model.layers.25.block_sparse_moe.experts.33.w1", "model.layers.25.block_sparse_moe.experts.34.w1", "model.layers.25.block_sparse_moe.experts.35.w1", "model.layers.25.block_sparse_moe.experts.36.w1", "model.layers.25.block_sparse_moe.experts.37.w1", "model.layers.25.block_sparse_moe.experts.38.w1", "model.layers.25.block_sparse_moe.experts.39.w1", "model.layers.25.block_sparse_moe.experts.40.w1", "model.layers.25.block_sparse_moe.experts.41.w1", "model.layers.25.block_sparse_moe.experts.42.w1", "model.layers.25.block_sparse_moe.experts.43.w1", "model.layers.25.block_sparse_moe.experts.44.w1", "model.layers.25.block_sparse_moe.experts.45.w1", "model.layers.25.block_sparse_moe.experts.46.w1", "model.layers.25.block_sparse_moe.experts.47.w1", "model.layers.25.block_sparse_moe.experts.48.w1", "model.layers.25.block_sparse_moe.experts.49.w1", "model.layers.25.block_sparse_moe.experts.50.w1", "model.layers.25.block_sparse_moe.experts.51.w1", "model.layers.25.block_sparse_moe.experts.52.w1", "model.layers.25.block_sparse_moe.experts.53.w1", "model.layers.25.block_sparse_moe.experts.54.w1", "model.layers.25.block_sparse_moe.experts.55.w1", "model.layers.25.block_sparse_moe.experts.56.w1", "model.layers.25.block_sparse_moe.experts.57.w1", "model.layers.25.block_sparse_moe.experts.58.w1", "model.layers.25.block_sparse_moe.experts.59.w1", "model.layers.25.block_sparse_moe.experts.60.w1", "model.layers.25.block_sparse_moe.experts.61.w1", "model.layers.25.block_sparse_moe.experts.62.w1", "model.layers.25.block_sparse_moe.experts.63.w1", "model.layers.25.block_sparse_moe.experts.64.w1", "model.layers.25.block_sparse_moe.experts.65.w1", "model.layers.25.block_sparse_moe.experts.66.w1", "model.layers.25.block_sparse_moe.experts.67.w1", "model.layers.25.block_sparse_moe.experts.68.w1", "model.layers.25.block_sparse_moe.experts.69.w1", "model.layers.25.block_sparse_moe.experts.70.w1", "model.layers.25.block_sparse_moe.experts.71.w1", "model.layers.25.block_sparse_moe.experts.72.w1", "model.layers.25.block_sparse_moe.experts.73.w1", "model.layers.25.block_sparse_moe.experts.74.w1", "model.layers.25.block_sparse_moe.experts.75.w1", "model.layers.25.block_sparse_moe.experts.76.w1", "model.layers.25.block_sparse_moe.experts.77.w1", "model.layers.25.block_sparse_moe.experts.78.w1", "model.layers.25.block_sparse_moe.experts.79.w1", "model.layers.25.block_sparse_moe.experts.80.w1", "model.layers.25.block_sparse_moe.experts.81.w1", "model.layers.25.block_sparse_moe.experts.82.w1", "model.layers.25.block_sparse_moe.experts.83.w1", "model.layers.25.block_sparse_moe.experts.84.w1", "model.layers.25.block_sparse_moe.experts.85.w1", "model.layers.25.block_sparse_moe.experts.86.w1", "model.layers.25.block_sparse_moe.experts.87.w1", "model.layers.25.block_sparse_moe.experts.88.w1", "model.layers.25.block_sparse_moe.experts.89.w1", "model.layers.25.block_sparse_moe.experts.90.w1", "model.layers.25.block_sparse_moe.experts.91.w1", "model.layers.25.block_sparse_moe.experts.92.w1", "model.layers.25.block_sparse_moe.experts.93.w1", "model.layers.25.block_sparse_moe.experts.94.w1", "model.layers.25.block_sparse_moe.experts.95.w1", "model.layers.25.block_sparse_moe.experts.96.w1", "model.layers.25.block_sparse_moe.experts.97.w1", "model.layers.25.block_sparse_moe.experts.98.w1", "model.layers.25.block_sparse_moe.experts.99.w1", "model.layers.25.block_sparse_moe.experts.100.w1", "model.layers.25.block_sparse_moe.experts.101.w1", "model.layers.25.block_sparse_moe.experts.102.w1", "model.layers.25.block_sparse_moe.experts.103.w1", "model.layers.25.block_sparse_moe.experts.104.w1", "model.layers.25.block_sparse_moe.experts.105.w1", "model.layers.25.block_sparse_moe.experts.106.w1", "model.layers.25.block_sparse_moe.experts.107.w1", "model.layers.25.block_sparse_moe.experts.108.w1", "model.layers.25.block_sparse_moe.experts.109.w1", "model.layers.25.block_sparse_moe.experts.110.w1", "model.layers.25.block_sparse_moe.experts.111.w1", "model.layers.25.block_sparse_moe.experts.112.w1", "model.layers.25.block_sparse_moe.experts.113.w1", "model.layers.25.block_sparse_moe.experts.114.w1", "model.layers.25.block_sparse_moe.experts.115.w1", "model.layers.25.block_sparse_moe.experts.116.w1", "model.layers.25.block_sparse_moe.experts.117.w1", "model.layers.25.block_sparse_moe.experts.118.w1", "model.layers.25.block_sparse_moe.experts.119.w1", "model.layers.25.block_sparse_moe.experts.120.w1", "model.layers.25.block_sparse_moe.experts.121.w1", "model.layers.25.block_sparse_moe.experts.122.w1", "model.layers.25.block_sparse_moe.experts.123.w1", "model.layers.25.block_sparse_moe.experts.124.w1", "model.layers.25.block_sparse_moe.experts.125.w1", "model.layers.25.block_sparse_moe.experts.126.w1", "model.layers.25.block_sparse_moe.experts.127.w1", "model.layers.25.block_sparse_moe.experts.128.w1", "model.layers.25.block_sparse_moe.experts.129.w1", "model.layers.25.block_sparse_moe.experts.130.w1", "model.layers.25.block_sparse_moe.experts.131.w1", "model.layers.25.block_sparse_moe.experts.132.w1", "model.layers.25.block_sparse_moe.experts.133.w1", "model.layers.25.block_sparse_moe.experts.134.w1", "model.layers.25.block_sparse_moe.experts.135.w1", "model.layers.25.block_sparse_moe.experts.136.w1", "model.layers.25.block_sparse_moe.experts.137.w1", "model.layers.25.block_sparse_moe.experts.138.w1", "model.layers.25.block_sparse_moe.experts.139.w1", "model.layers.25.block_sparse_moe.experts.140.w1", "model.layers.25.block_sparse_moe.experts.141.w1", "model.layers.25.block_sparse_moe.experts.142.w1", "model.layers.25.block_sparse_moe.experts.143.w1", "model.layers.25.block_sparse_moe.experts.144.w1", "model.layers.25.block_sparse_moe.experts.145.w1", "model.layers.25.block_sparse_moe.experts.146.w1", "model.layers.25.block_sparse_moe.experts.147.w1", "model.layers.25.block_sparse_moe.experts.148.w1", "model.layers.25.block_sparse_moe.experts.149.w1", "model.layers.25.block_sparse_moe.experts.150.w1", "model.layers.25.block_sparse_moe.experts.151.w1", "model.layers.25.block_sparse_moe.experts.152.w1", "model.layers.25.block_sparse_moe.experts.153.w1", "model.layers.25.block_sparse_moe.experts.154.w1", "model.layers.25.block_sparse_moe.experts.155.w1", "model.layers.25.block_sparse_moe.experts.156.w1", "model.layers.25.block_sparse_moe.experts.157.w1", "model.layers.25.block_sparse_moe.experts.158.w1", "model.layers.25.block_sparse_moe.experts.159.w1", "model.layers.25.block_sparse_moe.experts.160.w1", "model.layers.25.block_sparse_moe.experts.161.w1", "model.layers.25.block_sparse_moe.experts.162.w1", "model.layers.25.block_sparse_moe.experts.163.w1", "model.layers.25.block_sparse_moe.experts.164.w1", "model.layers.25.block_sparse_moe.experts.165.w1", "model.layers.25.block_sparse_moe.experts.166.w1", "model.layers.25.block_sparse_moe.experts.167.w1", "model.layers.25.block_sparse_moe.experts.168.w1", "model.layers.25.block_sparse_moe.experts.169.w1", "model.layers.25.block_sparse_moe.experts.170.w1", "model.layers.25.block_sparse_moe.experts.171.w1", "model.layers.25.block_sparse_moe.experts.172.w1", "model.layers.25.block_sparse_moe.experts.173.w1", "model.layers.25.block_sparse_moe.experts.174.w1", "model.layers.25.block_sparse_moe.experts.175.w1", "model.layers.25.block_sparse_moe.experts.176.w1", "model.layers.25.block_sparse_moe.experts.177.w1", "model.layers.25.block_sparse_moe.experts.178.w1", "model.layers.25.block_sparse_moe.experts.179.w1", "model.layers.25.block_sparse_moe.experts.180.w1", "model.layers.25.block_sparse_moe.experts.181.w1", "model.layers.25.block_sparse_moe.experts.182.w1", "model.layers.25.block_sparse_moe.experts.183.w1", "model.layers.25.block_sparse_moe.experts.184.w1", "model.layers.25.block_sparse_moe.experts.185.w1", "model.layers.25.block_sparse_moe.experts.186.w1", "model.layers.25.block_sparse_moe.experts.187.w1", "model.layers.25.block_sparse_moe.experts.188.w1", "model.layers.25.block_sparse_moe.experts.189.w1", "model.layers.25.block_sparse_moe.experts.190.w1", "model.layers.25.block_sparse_moe.experts.191.w1", "model.layers.25.block_sparse_moe.experts.192.w1", "model.layers.25.block_sparse_moe.experts.193.w1", "model.layers.25.block_sparse_moe.experts.194.w1", "model.layers.25.block_sparse_moe.experts.195.w1", "model.layers.25.block_sparse_moe.experts.196.w1", "model.layers.25.block_sparse_moe.experts.197.w1", "model.layers.25.block_sparse_moe.experts.198.w1", "model.layers.25.block_sparse_moe.experts.199.w1", "model.layers.25.block_sparse_moe.experts.200.w1", "model.layers.25.block_sparse_moe.experts.201.w1", "model.layers.25.block_sparse_moe.experts.202.w1", "model.layers.25.block_sparse_moe.experts.203.w1", "model.layers.25.block_sparse_moe.experts.204.w1", "model.layers.25.block_sparse_moe.experts.205.w1", "model.layers.25.block_sparse_moe.experts.206.w1", "model.layers.25.block_sparse_moe.experts.207.w1", "model.layers.25.block_sparse_moe.experts.208.w1", "model.layers.25.block_sparse_moe.experts.209.w1", "model.layers.25.block_sparse_moe.experts.210.w1", "model.layers.25.block_sparse_moe.experts.211.w1", "model.layers.25.block_sparse_moe.experts.212.w1", "model.layers.25.block_sparse_moe.experts.213.w1", "model.layers.25.block_sparse_moe.experts.214.w1", "model.layers.25.block_sparse_moe.experts.215.w1", "model.layers.25.block_sparse_moe.experts.216.w1", "model.layers.25.block_sparse_moe.experts.217.w1", "model.layers.25.block_sparse_moe.experts.218.w1", "model.layers.25.block_sparse_moe.experts.219.w1", "model.layers.25.block_sparse_moe.experts.220.w1", "model.layers.25.block_sparse_moe.experts.221.w1", "model.layers.25.block_sparse_moe.experts.222.w1", "model.layers.25.block_sparse_moe.experts.223.w1", "model.layers.25.block_sparse_moe.experts.224.w1", "model.layers.25.block_sparse_moe.experts.225.w1", "model.layers.25.block_sparse_moe.experts.226.w1", "model.layers.25.block_sparse_moe.experts.227.w1", "model.layers.25.block_sparse_moe.experts.228.w1", "model.layers.25.block_sparse_moe.experts.229.w1", "model.layers.25.block_sparse_moe.experts.230.w1", "model.layers.25.block_sparse_moe.experts.231.w1", "model.layers.25.block_sparse_moe.experts.232.w1", "model.layers.25.block_sparse_moe.experts.233.w1", "model.layers.25.block_sparse_moe.experts.234.w1", "model.layers.25.block_sparse_moe.experts.235.w1", "model.layers.25.block_sparse_moe.experts.236.w1", "model.layers.25.block_sparse_moe.experts.237.w1", "model.layers.25.block_sparse_moe.experts.238.w1", "model.layers.25.block_sparse_moe.experts.239.w1", "model.layers.25.block_sparse_moe.experts.240.w1", "model.layers.25.block_sparse_moe.experts.241.w1", "model.layers.25.block_sparse_moe.experts.242.w1", "model.layers.25.block_sparse_moe.experts.243.w1", "model.layers.25.block_sparse_moe.experts.244.w1", "model.layers.25.block_sparse_moe.experts.245.w1", "model.layers.25.block_sparse_moe.experts.246.w1", "model.layers.25.block_sparse_moe.experts.247.w1", "model.layers.25.block_sparse_moe.experts.248.w1", "model.layers.25.block_sparse_moe.experts.249.w1", "model.layers.25.block_sparse_moe.experts.250.w1", "model.layers.25.block_sparse_moe.experts.251.w1", "model.layers.25.block_sparse_moe.experts.252.w1", "model.layers.25.block_sparse_moe.experts.253.w1", "model.layers.25.block_sparse_moe.experts.254.w1", "model.layers.25.block_sparse_moe.experts.255.w1", "model.layers.25.block_sparse_moe.experts.0.w3", "model.layers.25.block_sparse_moe.experts.1.w3", "model.layers.25.block_sparse_moe.experts.2.w3", "model.layers.25.block_sparse_moe.experts.3.w3", "model.layers.25.block_sparse_moe.experts.4.w3", "model.layers.25.block_sparse_moe.experts.5.w3", "model.layers.25.block_sparse_moe.experts.6.w3", "model.layers.25.block_sparse_moe.experts.7.w3", "model.layers.25.block_sparse_moe.experts.8.w3", "model.layers.25.block_sparse_moe.experts.9.w3", "model.layers.25.block_sparse_moe.experts.10.w3", "model.layers.25.block_sparse_moe.experts.11.w3", "model.layers.25.block_sparse_moe.experts.12.w3", "model.layers.25.block_sparse_moe.experts.13.w3", "model.layers.25.block_sparse_moe.experts.14.w3", "model.layers.25.block_sparse_moe.experts.15.w3", "model.layers.25.block_sparse_moe.experts.16.w3", "model.layers.25.block_sparse_moe.experts.17.w3", "model.layers.25.block_sparse_moe.experts.18.w3", "model.layers.25.block_sparse_moe.experts.19.w3", "model.layers.25.block_sparse_moe.experts.20.w3", "model.layers.25.block_sparse_moe.experts.21.w3", "model.layers.25.block_sparse_moe.experts.22.w3", "model.layers.25.block_sparse_moe.experts.23.w3", "model.layers.25.block_sparse_moe.experts.24.w3", "model.layers.25.block_sparse_moe.experts.25.w3", "model.layers.25.block_sparse_moe.experts.26.w3", "model.layers.25.block_sparse_moe.experts.27.w3", "model.layers.25.block_sparse_moe.experts.28.w3", "model.layers.25.block_sparse_moe.experts.29.w3", "model.layers.25.block_sparse_moe.experts.30.w3", "model.layers.25.block_sparse_moe.experts.31.w3", "model.layers.25.block_sparse_moe.experts.32.w3", "model.layers.25.block_sparse_moe.experts.33.w3", "model.layers.25.block_sparse_moe.experts.34.w3", "model.layers.25.block_sparse_moe.experts.35.w3", "model.layers.25.block_sparse_moe.experts.36.w3", "model.layers.25.block_sparse_moe.experts.37.w3", "model.layers.25.block_sparse_moe.experts.38.w3", "model.layers.25.block_sparse_moe.experts.39.w3", "model.layers.25.block_sparse_moe.experts.40.w3", "model.layers.25.block_sparse_moe.experts.41.w3", "model.layers.25.block_sparse_moe.experts.42.w3", "model.layers.25.block_sparse_moe.experts.43.w3", "model.layers.25.block_sparse_moe.experts.44.w3", "model.layers.25.block_sparse_moe.experts.45.w3", "model.layers.25.block_sparse_moe.experts.46.w3", "model.layers.25.block_sparse_moe.experts.47.w3", "model.layers.25.block_sparse_moe.experts.48.w3", "model.layers.25.block_sparse_moe.experts.49.w3", "model.layers.25.block_sparse_moe.experts.50.w3", "model.layers.25.block_sparse_moe.experts.51.w3", "model.layers.25.block_sparse_moe.experts.52.w3", "model.layers.25.block_sparse_moe.experts.53.w3", "model.layers.25.block_sparse_moe.experts.54.w3", "model.layers.25.block_sparse_moe.experts.55.w3", "model.layers.25.block_sparse_moe.experts.56.w3", "model.layers.25.block_sparse_moe.experts.57.w3", "model.layers.25.block_sparse_moe.experts.58.w3", "model.layers.25.block_sparse_moe.experts.59.w3", "model.layers.25.block_sparse_moe.experts.60.w3", "model.layers.25.block_sparse_moe.experts.61.w3", "model.layers.25.block_sparse_moe.experts.62.w3", "model.layers.25.block_sparse_moe.experts.63.w3", "model.layers.25.block_sparse_moe.experts.64.w3", "model.layers.25.block_sparse_moe.experts.65.w3", "model.layers.25.block_sparse_moe.experts.66.w3", "model.layers.25.block_sparse_moe.experts.67.w3", "model.layers.25.block_sparse_moe.experts.68.w3", "model.layers.25.block_sparse_moe.experts.69.w3", "model.layers.25.block_sparse_moe.experts.70.w3", "model.layers.25.block_sparse_moe.experts.71.w3", "model.layers.25.block_sparse_moe.experts.72.w3", "model.layers.25.block_sparse_moe.experts.73.w3", "model.layers.25.block_sparse_moe.experts.74.w3", "model.layers.25.block_sparse_moe.experts.75.w3", "model.layers.25.block_sparse_moe.experts.76.w3", "model.layers.25.block_sparse_moe.experts.77.w3", "model.layers.25.block_sparse_moe.experts.78.w3", "model.layers.25.block_sparse_moe.experts.79.w3", "model.layers.25.block_sparse_moe.experts.80.w3", "model.layers.25.block_sparse_moe.experts.81.w3", "model.layers.25.block_sparse_moe.experts.82.w3", "model.layers.25.block_sparse_moe.experts.83.w3", "model.layers.25.block_sparse_moe.experts.84.w3", "model.layers.25.block_sparse_moe.experts.85.w3", "model.layers.25.block_sparse_moe.experts.86.w3", "model.layers.25.block_sparse_moe.experts.87.w3", "model.layers.25.block_sparse_moe.experts.88.w3", "model.layers.25.block_sparse_moe.experts.89.w3", "model.layers.25.block_sparse_moe.experts.90.w3", "model.layers.25.block_sparse_moe.experts.91.w3", "model.layers.25.block_sparse_moe.experts.92.w3", "model.layers.25.block_sparse_moe.experts.93.w3", "model.layers.25.block_sparse_moe.experts.94.w3", "model.layers.25.block_sparse_moe.experts.95.w3", "model.layers.25.block_sparse_moe.experts.96.w3", "model.layers.25.block_sparse_moe.experts.97.w3", "model.layers.25.block_sparse_moe.experts.98.w3", "model.layers.25.block_sparse_moe.experts.99.w3", "model.layers.25.block_sparse_moe.experts.100.w3", "model.layers.25.block_sparse_moe.experts.101.w3", "model.layers.25.block_sparse_moe.experts.102.w3", "model.layers.25.block_sparse_moe.experts.103.w3", "model.layers.25.block_sparse_moe.experts.104.w3", "model.layers.25.block_sparse_moe.experts.105.w3", "model.layers.25.block_sparse_moe.experts.106.w3", "model.layers.25.block_sparse_moe.experts.107.w3", "model.layers.25.block_sparse_moe.experts.108.w3", "model.layers.25.block_sparse_moe.experts.109.w3", "model.layers.25.block_sparse_moe.experts.110.w3", "model.layers.25.block_sparse_moe.experts.111.w3", "model.layers.25.block_sparse_moe.experts.112.w3", "model.layers.25.block_sparse_moe.experts.113.w3", "model.layers.25.block_sparse_moe.experts.114.w3", "model.layers.25.block_sparse_moe.experts.115.w3", "model.layers.25.block_sparse_moe.experts.116.w3", "model.layers.25.block_sparse_moe.experts.117.w3", "model.layers.25.block_sparse_moe.experts.118.w3", "model.layers.25.block_sparse_moe.experts.119.w3", "model.layers.25.block_sparse_moe.experts.120.w3", "model.layers.25.block_sparse_moe.experts.121.w3", "model.layers.25.block_sparse_moe.experts.122.w3", "model.layers.25.block_sparse_moe.experts.123.w3", "model.layers.25.block_sparse_moe.experts.124.w3", "model.layers.25.block_sparse_moe.experts.125.w3", "model.layers.25.block_sparse_moe.experts.126.w3", "model.layers.25.block_sparse_moe.experts.127.w3", "model.layers.25.block_sparse_moe.experts.128.w3", "model.layers.25.block_sparse_moe.experts.129.w3", "model.layers.25.block_sparse_moe.experts.130.w3", "model.layers.25.block_sparse_moe.experts.131.w3", "model.layers.25.block_sparse_moe.experts.132.w3", "model.layers.25.block_sparse_moe.experts.133.w3", "model.layers.25.block_sparse_moe.experts.134.w3", "model.layers.25.block_sparse_moe.experts.135.w3", "model.layers.25.block_sparse_moe.experts.136.w3", "model.layers.25.block_sparse_moe.experts.137.w3", "model.layers.25.block_sparse_moe.experts.138.w3", "model.layers.25.block_sparse_moe.experts.139.w3", "model.layers.25.block_sparse_moe.experts.140.w3", "model.layers.25.block_sparse_moe.experts.141.w3", "model.layers.25.block_sparse_moe.experts.142.w3", "model.layers.25.block_sparse_moe.experts.143.w3", "model.layers.25.block_sparse_moe.experts.144.w3", "model.layers.25.block_sparse_moe.experts.145.w3", "model.layers.25.block_sparse_moe.experts.146.w3", "model.layers.25.block_sparse_moe.experts.147.w3", "model.layers.25.block_sparse_moe.experts.148.w3", "model.layers.25.block_sparse_moe.experts.149.w3", "model.layers.25.block_sparse_moe.experts.150.w3", "model.layers.25.block_sparse_moe.experts.151.w3", "model.layers.25.block_sparse_moe.experts.152.w3", "model.layers.25.block_sparse_moe.experts.153.w3", "model.layers.25.block_sparse_moe.experts.154.w3", "model.layers.25.block_sparse_moe.experts.155.w3", "model.layers.25.block_sparse_moe.experts.156.w3", "model.layers.25.block_sparse_moe.experts.157.w3", "model.layers.25.block_sparse_moe.experts.158.w3", "model.layers.25.block_sparse_moe.experts.159.w3", "model.layers.25.block_sparse_moe.experts.160.w3", "model.layers.25.block_sparse_moe.experts.161.w3", "model.layers.25.block_sparse_moe.experts.162.w3", "model.layers.25.block_sparse_moe.experts.163.w3", "model.layers.25.block_sparse_moe.experts.164.w3", "model.layers.25.block_sparse_moe.experts.165.w3", "model.layers.25.block_sparse_moe.experts.166.w3", "model.layers.25.block_sparse_moe.experts.167.w3", "model.layers.25.block_sparse_moe.experts.168.w3", "model.layers.25.block_sparse_moe.experts.169.w3", "model.layers.25.block_sparse_moe.experts.170.w3", "model.layers.25.block_sparse_moe.experts.171.w3", "model.layers.25.block_sparse_moe.experts.172.w3", "model.layers.25.block_sparse_moe.experts.173.w3", "model.layers.25.block_sparse_moe.experts.174.w3", "model.layers.25.block_sparse_moe.experts.175.w3", "model.layers.25.block_sparse_moe.experts.176.w3", "model.layers.25.block_sparse_moe.experts.177.w3", "model.layers.25.block_sparse_moe.experts.178.w3", "model.layers.25.block_sparse_moe.experts.179.w3", "model.layers.25.block_sparse_moe.experts.180.w3", "model.layers.25.block_sparse_moe.experts.181.w3", "model.layers.25.block_sparse_moe.experts.182.w3", "model.layers.25.block_sparse_moe.experts.183.w3", "model.layers.25.block_sparse_moe.experts.184.w3", "model.layers.25.block_sparse_moe.experts.185.w3", "model.layers.25.block_sparse_moe.experts.186.w3", "model.layers.25.block_sparse_moe.experts.187.w3", "model.layers.25.block_sparse_moe.experts.188.w3", "model.layers.25.block_sparse_moe.experts.189.w3", "model.layers.25.block_sparse_moe.experts.190.w3", "model.layers.25.block_sparse_moe.experts.191.w3", "model.layers.25.block_sparse_moe.experts.192.w3", "model.layers.25.block_sparse_moe.experts.193.w3", "model.layers.25.block_sparse_moe.experts.194.w3", "model.layers.25.block_sparse_moe.experts.195.w3", "model.layers.25.block_sparse_moe.experts.196.w3", "model.layers.25.block_sparse_moe.experts.197.w3", "model.layers.25.block_sparse_moe.experts.198.w3", "model.layers.25.block_sparse_moe.experts.199.w3", "model.layers.25.block_sparse_moe.experts.200.w3", "model.layers.25.block_sparse_moe.experts.201.w3", "model.layers.25.block_sparse_moe.experts.202.w3", "model.layers.25.block_sparse_moe.experts.203.w3", "model.layers.25.block_sparse_moe.experts.204.w3", "model.layers.25.block_sparse_moe.experts.205.w3", "model.layers.25.block_sparse_moe.experts.206.w3", "model.layers.25.block_sparse_moe.experts.207.w3", "model.layers.25.block_sparse_moe.experts.208.w3", "model.layers.25.block_sparse_moe.experts.209.w3", "model.layers.25.block_sparse_moe.experts.210.w3", "model.layers.25.block_sparse_moe.experts.211.w3", "model.layers.25.block_sparse_moe.experts.212.w3", "model.layers.25.block_sparse_moe.experts.213.w3", "model.layers.25.block_sparse_moe.experts.214.w3", "model.layers.25.block_sparse_moe.experts.215.w3", "model.layers.25.block_sparse_moe.experts.216.w3", "model.layers.25.block_sparse_moe.experts.217.w3", "model.layers.25.block_sparse_moe.experts.218.w3", "model.layers.25.block_sparse_moe.experts.219.w3", "model.layers.25.block_sparse_moe.experts.220.w3", "model.layers.25.block_sparse_moe.experts.221.w3", "model.layers.25.block_sparse_moe.experts.222.w3", "model.layers.25.block_sparse_moe.experts.223.w3", "model.layers.25.block_sparse_moe.experts.224.w3", "model.layers.25.block_sparse_moe.experts.225.w3", "model.layers.25.block_sparse_moe.experts.226.w3", "model.layers.25.block_sparse_moe.experts.227.w3", "model.layers.25.block_sparse_moe.experts.228.w3", "model.layers.25.block_sparse_moe.experts.229.w3", "model.layers.25.block_sparse_moe.experts.230.w3", "model.layers.25.block_sparse_moe.experts.231.w3", "model.layers.25.block_sparse_moe.experts.232.w3", "model.layers.25.block_sparse_moe.experts.233.w3", "model.layers.25.block_sparse_moe.experts.234.w3", "model.layers.25.block_sparse_moe.experts.235.w3", "model.layers.25.block_sparse_moe.experts.236.w3", "model.layers.25.block_sparse_moe.experts.237.w3", "model.layers.25.block_sparse_moe.experts.238.w3", "model.layers.25.block_sparse_moe.experts.239.w3", "model.layers.25.block_sparse_moe.experts.240.w3", "model.layers.25.block_sparse_moe.experts.241.w3", "model.layers.25.block_sparse_moe.experts.242.w3", "model.layers.25.block_sparse_moe.experts.243.w3", "model.layers.25.block_sparse_moe.experts.244.w3", "model.layers.25.block_sparse_moe.experts.245.w3", "model.layers.25.block_sparse_moe.experts.246.w3", "model.layers.25.block_sparse_moe.experts.247.w3", "model.layers.25.block_sparse_moe.experts.248.w3", "model.layers.25.block_sparse_moe.experts.249.w3", "model.layers.25.block_sparse_moe.experts.250.w3", "model.layers.25.block_sparse_moe.experts.251.w3", "model.layers.25.block_sparse_moe.experts.252.w3", "model.layers.25.block_sparse_moe.experts.253.w3", "model.layers.25.block_sparse_moe.experts.254.w3", "model.layers.25.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00014212094247340046, "dbits": 2415919104 } ] }, { "idx": 129, "layers": [ "model.layers.25.block_sparse_moe.experts.0.w2", "model.layers.25.block_sparse_moe.experts.1.w2", "model.layers.25.block_sparse_moe.experts.2.w2", "model.layers.25.block_sparse_moe.experts.3.w2", "model.layers.25.block_sparse_moe.experts.4.w2", "model.layers.25.block_sparse_moe.experts.5.w2", "model.layers.25.block_sparse_moe.experts.6.w2", "model.layers.25.block_sparse_moe.experts.7.w2", "model.layers.25.block_sparse_moe.experts.8.w2", "model.layers.25.block_sparse_moe.experts.9.w2", "model.layers.25.block_sparse_moe.experts.10.w2", "model.layers.25.block_sparse_moe.experts.11.w2", "model.layers.25.block_sparse_moe.experts.12.w2", "model.layers.25.block_sparse_moe.experts.13.w2", "model.layers.25.block_sparse_moe.experts.14.w2", "model.layers.25.block_sparse_moe.experts.15.w2", "model.layers.25.block_sparse_moe.experts.16.w2", "model.layers.25.block_sparse_moe.experts.17.w2", "model.layers.25.block_sparse_moe.experts.18.w2", "model.layers.25.block_sparse_moe.experts.19.w2", "model.layers.25.block_sparse_moe.experts.20.w2", "model.layers.25.block_sparse_moe.experts.21.w2", "model.layers.25.block_sparse_moe.experts.22.w2", "model.layers.25.block_sparse_moe.experts.23.w2", "model.layers.25.block_sparse_moe.experts.24.w2", "model.layers.25.block_sparse_moe.experts.25.w2", "model.layers.25.block_sparse_moe.experts.26.w2", "model.layers.25.block_sparse_moe.experts.27.w2", "model.layers.25.block_sparse_moe.experts.28.w2", "model.layers.25.block_sparse_moe.experts.29.w2", "model.layers.25.block_sparse_moe.experts.30.w2", "model.layers.25.block_sparse_moe.experts.31.w2", "model.layers.25.block_sparse_moe.experts.32.w2", "model.layers.25.block_sparse_moe.experts.33.w2", "model.layers.25.block_sparse_moe.experts.34.w2", "model.layers.25.block_sparse_moe.experts.35.w2", "model.layers.25.block_sparse_moe.experts.36.w2", "model.layers.25.block_sparse_moe.experts.37.w2", "model.layers.25.block_sparse_moe.experts.38.w2", "model.layers.25.block_sparse_moe.experts.39.w2", "model.layers.25.block_sparse_moe.experts.40.w2", "model.layers.25.block_sparse_moe.experts.41.w2", "model.layers.25.block_sparse_moe.experts.42.w2", "model.layers.25.block_sparse_moe.experts.43.w2", "model.layers.25.block_sparse_moe.experts.44.w2", "model.layers.25.block_sparse_moe.experts.45.w2", "model.layers.25.block_sparse_moe.experts.46.w2", "model.layers.25.block_sparse_moe.experts.47.w2", "model.layers.25.block_sparse_moe.experts.48.w2", "model.layers.25.block_sparse_moe.experts.49.w2", "model.layers.25.block_sparse_moe.experts.50.w2", "model.layers.25.block_sparse_moe.experts.51.w2", "model.layers.25.block_sparse_moe.experts.52.w2", "model.layers.25.block_sparse_moe.experts.53.w2", "model.layers.25.block_sparse_moe.experts.54.w2", "model.layers.25.block_sparse_moe.experts.55.w2", "model.layers.25.block_sparse_moe.experts.56.w2", "model.layers.25.block_sparse_moe.experts.57.w2", "model.layers.25.block_sparse_moe.experts.58.w2", "model.layers.25.block_sparse_moe.experts.59.w2", "model.layers.25.block_sparse_moe.experts.60.w2", "model.layers.25.block_sparse_moe.experts.61.w2", "model.layers.25.block_sparse_moe.experts.62.w2", "model.layers.25.block_sparse_moe.experts.63.w2", "model.layers.25.block_sparse_moe.experts.64.w2", "model.layers.25.block_sparse_moe.experts.65.w2", "model.layers.25.block_sparse_moe.experts.66.w2", "model.layers.25.block_sparse_moe.experts.67.w2", "model.layers.25.block_sparse_moe.experts.68.w2", "model.layers.25.block_sparse_moe.experts.69.w2", "model.layers.25.block_sparse_moe.experts.70.w2", "model.layers.25.block_sparse_moe.experts.71.w2", "model.layers.25.block_sparse_moe.experts.72.w2", "model.layers.25.block_sparse_moe.experts.73.w2", "model.layers.25.block_sparse_moe.experts.74.w2", "model.layers.25.block_sparse_moe.experts.75.w2", "model.layers.25.block_sparse_moe.experts.76.w2", "model.layers.25.block_sparse_moe.experts.77.w2", "model.layers.25.block_sparse_moe.experts.78.w2", "model.layers.25.block_sparse_moe.experts.79.w2", "model.layers.25.block_sparse_moe.experts.80.w2", "model.layers.25.block_sparse_moe.experts.81.w2", "model.layers.25.block_sparse_moe.experts.82.w2", "model.layers.25.block_sparse_moe.experts.83.w2", "model.layers.25.block_sparse_moe.experts.84.w2", "model.layers.25.block_sparse_moe.experts.85.w2", "model.layers.25.block_sparse_moe.experts.86.w2", "model.layers.25.block_sparse_moe.experts.87.w2", "model.layers.25.block_sparse_moe.experts.88.w2", "model.layers.25.block_sparse_moe.experts.89.w2", "model.layers.25.block_sparse_moe.experts.90.w2", "model.layers.25.block_sparse_moe.experts.91.w2", "model.layers.25.block_sparse_moe.experts.92.w2", "model.layers.25.block_sparse_moe.experts.93.w2", "model.layers.25.block_sparse_moe.experts.94.w2", "model.layers.25.block_sparse_moe.experts.95.w2", "model.layers.25.block_sparse_moe.experts.96.w2", "model.layers.25.block_sparse_moe.experts.97.w2", "model.layers.25.block_sparse_moe.experts.98.w2", "model.layers.25.block_sparse_moe.experts.99.w2", "model.layers.25.block_sparse_moe.experts.100.w2", "model.layers.25.block_sparse_moe.experts.101.w2", "model.layers.25.block_sparse_moe.experts.102.w2", "model.layers.25.block_sparse_moe.experts.103.w2", "model.layers.25.block_sparse_moe.experts.104.w2", "model.layers.25.block_sparse_moe.experts.105.w2", "model.layers.25.block_sparse_moe.experts.106.w2", "model.layers.25.block_sparse_moe.experts.107.w2", "model.layers.25.block_sparse_moe.experts.108.w2", "model.layers.25.block_sparse_moe.experts.109.w2", "model.layers.25.block_sparse_moe.experts.110.w2", "model.layers.25.block_sparse_moe.experts.111.w2", "model.layers.25.block_sparse_moe.experts.112.w2", "model.layers.25.block_sparse_moe.experts.113.w2", "model.layers.25.block_sparse_moe.experts.114.w2", "model.layers.25.block_sparse_moe.experts.115.w2", "model.layers.25.block_sparse_moe.experts.116.w2", "model.layers.25.block_sparse_moe.experts.117.w2", "model.layers.25.block_sparse_moe.experts.118.w2", "model.layers.25.block_sparse_moe.experts.119.w2", "model.layers.25.block_sparse_moe.experts.120.w2", "model.layers.25.block_sparse_moe.experts.121.w2", "model.layers.25.block_sparse_moe.experts.122.w2", "model.layers.25.block_sparse_moe.experts.123.w2", "model.layers.25.block_sparse_moe.experts.124.w2", "model.layers.25.block_sparse_moe.experts.125.w2", "model.layers.25.block_sparse_moe.experts.126.w2", "model.layers.25.block_sparse_moe.experts.127.w2", "model.layers.25.block_sparse_moe.experts.128.w2", "model.layers.25.block_sparse_moe.experts.129.w2", "model.layers.25.block_sparse_moe.experts.130.w2", "model.layers.25.block_sparse_moe.experts.131.w2", "model.layers.25.block_sparse_moe.experts.132.w2", "model.layers.25.block_sparse_moe.experts.133.w2", "model.layers.25.block_sparse_moe.experts.134.w2", "model.layers.25.block_sparse_moe.experts.135.w2", "model.layers.25.block_sparse_moe.experts.136.w2", "model.layers.25.block_sparse_moe.experts.137.w2", "model.layers.25.block_sparse_moe.experts.138.w2", "model.layers.25.block_sparse_moe.experts.139.w2", "model.layers.25.block_sparse_moe.experts.140.w2", "model.layers.25.block_sparse_moe.experts.141.w2", "model.layers.25.block_sparse_moe.experts.142.w2", "model.layers.25.block_sparse_moe.experts.143.w2", "model.layers.25.block_sparse_moe.experts.144.w2", "model.layers.25.block_sparse_moe.experts.145.w2", "model.layers.25.block_sparse_moe.experts.146.w2", "model.layers.25.block_sparse_moe.experts.147.w2", "model.layers.25.block_sparse_moe.experts.148.w2", "model.layers.25.block_sparse_moe.experts.149.w2", "model.layers.25.block_sparse_moe.experts.150.w2", "model.layers.25.block_sparse_moe.experts.151.w2", "model.layers.25.block_sparse_moe.experts.152.w2", "model.layers.25.block_sparse_moe.experts.153.w2", "model.layers.25.block_sparse_moe.experts.154.w2", "model.layers.25.block_sparse_moe.experts.155.w2", "model.layers.25.block_sparse_moe.experts.156.w2", "model.layers.25.block_sparse_moe.experts.157.w2", "model.layers.25.block_sparse_moe.experts.158.w2", "model.layers.25.block_sparse_moe.experts.159.w2", "model.layers.25.block_sparse_moe.experts.160.w2", "model.layers.25.block_sparse_moe.experts.161.w2", "model.layers.25.block_sparse_moe.experts.162.w2", "model.layers.25.block_sparse_moe.experts.163.w2", "model.layers.25.block_sparse_moe.experts.164.w2", "model.layers.25.block_sparse_moe.experts.165.w2", "model.layers.25.block_sparse_moe.experts.166.w2", "model.layers.25.block_sparse_moe.experts.167.w2", "model.layers.25.block_sparse_moe.experts.168.w2", "model.layers.25.block_sparse_moe.experts.169.w2", "model.layers.25.block_sparse_moe.experts.170.w2", "model.layers.25.block_sparse_moe.experts.171.w2", "model.layers.25.block_sparse_moe.experts.172.w2", "model.layers.25.block_sparse_moe.experts.173.w2", "model.layers.25.block_sparse_moe.experts.174.w2", "model.layers.25.block_sparse_moe.experts.175.w2", "model.layers.25.block_sparse_moe.experts.176.w2", "model.layers.25.block_sparse_moe.experts.177.w2", "model.layers.25.block_sparse_moe.experts.178.w2", "model.layers.25.block_sparse_moe.experts.179.w2", "model.layers.25.block_sparse_moe.experts.180.w2", "model.layers.25.block_sparse_moe.experts.181.w2", "model.layers.25.block_sparse_moe.experts.182.w2", "model.layers.25.block_sparse_moe.experts.183.w2", "model.layers.25.block_sparse_moe.experts.184.w2", "model.layers.25.block_sparse_moe.experts.185.w2", "model.layers.25.block_sparse_moe.experts.186.w2", "model.layers.25.block_sparse_moe.experts.187.w2", "model.layers.25.block_sparse_moe.experts.188.w2", "model.layers.25.block_sparse_moe.experts.189.w2", "model.layers.25.block_sparse_moe.experts.190.w2", "model.layers.25.block_sparse_moe.experts.191.w2", "model.layers.25.block_sparse_moe.experts.192.w2", "model.layers.25.block_sparse_moe.experts.193.w2", "model.layers.25.block_sparse_moe.experts.194.w2", "model.layers.25.block_sparse_moe.experts.195.w2", "model.layers.25.block_sparse_moe.experts.196.w2", "model.layers.25.block_sparse_moe.experts.197.w2", "model.layers.25.block_sparse_moe.experts.198.w2", "model.layers.25.block_sparse_moe.experts.199.w2", "model.layers.25.block_sparse_moe.experts.200.w2", "model.layers.25.block_sparse_moe.experts.201.w2", "model.layers.25.block_sparse_moe.experts.202.w2", "model.layers.25.block_sparse_moe.experts.203.w2", "model.layers.25.block_sparse_moe.experts.204.w2", "model.layers.25.block_sparse_moe.experts.205.w2", "model.layers.25.block_sparse_moe.experts.206.w2", "model.layers.25.block_sparse_moe.experts.207.w2", "model.layers.25.block_sparse_moe.experts.208.w2", "model.layers.25.block_sparse_moe.experts.209.w2", "model.layers.25.block_sparse_moe.experts.210.w2", "model.layers.25.block_sparse_moe.experts.211.w2", "model.layers.25.block_sparse_moe.experts.212.w2", "model.layers.25.block_sparse_moe.experts.213.w2", "model.layers.25.block_sparse_moe.experts.214.w2", "model.layers.25.block_sparse_moe.experts.215.w2", "model.layers.25.block_sparse_moe.experts.216.w2", "model.layers.25.block_sparse_moe.experts.217.w2", "model.layers.25.block_sparse_moe.experts.218.w2", "model.layers.25.block_sparse_moe.experts.219.w2", "model.layers.25.block_sparse_moe.experts.220.w2", "model.layers.25.block_sparse_moe.experts.221.w2", "model.layers.25.block_sparse_moe.experts.222.w2", "model.layers.25.block_sparse_moe.experts.223.w2", "model.layers.25.block_sparse_moe.experts.224.w2", "model.layers.25.block_sparse_moe.experts.225.w2", "model.layers.25.block_sparse_moe.experts.226.w2", "model.layers.25.block_sparse_moe.experts.227.w2", "model.layers.25.block_sparse_moe.experts.228.w2", "model.layers.25.block_sparse_moe.experts.229.w2", "model.layers.25.block_sparse_moe.experts.230.w2", "model.layers.25.block_sparse_moe.experts.231.w2", "model.layers.25.block_sparse_moe.experts.232.w2", "model.layers.25.block_sparse_moe.experts.233.w2", "model.layers.25.block_sparse_moe.experts.234.w2", "model.layers.25.block_sparse_moe.experts.235.w2", "model.layers.25.block_sparse_moe.experts.236.w2", "model.layers.25.block_sparse_moe.experts.237.w2", "model.layers.25.block_sparse_moe.experts.238.w2", "model.layers.25.block_sparse_moe.experts.239.w2", "model.layers.25.block_sparse_moe.experts.240.w2", "model.layers.25.block_sparse_moe.experts.241.w2", "model.layers.25.block_sparse_moe.experts.242.w2", "model.layers.25.block_sparse_moe.experts.243.w2", "model.layers.25.block_sparse_moe.experts.244.w2", "model.layers.25.block_sparse_moe.experts.245.w2", "model.layers.25.block_sparse_moe.experts.246.w2", "model.layers.25.block_sparse_moe.experts.247.w2", "model.layers.25.block_sparse_moe.experts.248.w2", "model.layers.25.block_sparse_moe.experts.249.w2", "model.layers.25.block_sparse_moe.experts.250.w2", "model.layers.25.block_sparse_moe.experts.251.w2", "model.layers.25.block_sparse_moe.experts.252.w2", "model.layers.25.block_sparse_moe.experts.253.w2", "model.layers.25.block_sparse_moe.experts.254.w2", "model.layers.25.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0016154840588569752, "dbits": 1207959552 } ] }, { "idx": 130, "layers": [ "model.layers.26.self_attn.q_proj" ], "candidates": [ { "dkld": -0.00019099563360214233, "dbits": 18874368 } ] }, { "idx": 131, "layers": [ "model.layers.26.self_attn.k_proj", "model.layers.26.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0034829478710889927, "dbits": 6291456 } ] }, { "idx": 132, "layers": [ "model.layers.26.self_attn.o_proj" ], "candidates": [ { "dkld": 0.002006999775767293, "dbits": 18874368 } ] }, { "idx": 133, "layers": [ "model.layers.26.block_sparse_moe.experts.0.w1", "model.layers.26.block_sparse_moe.experts.1.w1", "model.layers.26.block_sparse_moe.experts.2.w1", "model.layers.26.block_sparse_moe.experts.3.w1", "model.layers.26.block_sparse_moe.experts.4.w1", "model.layers.26.block_sparse_moe.experts.5.w1", "model.layers.26.block_sparse_moe.experts.6.w1", "model.layers.26.block_sparse_moe.experts.7.w1", "model.layers.26.block_sparse_moe.experts.8.w1", "model.layers.26.block_sparse_moe.experts.9.w1", "model.layers.26.block_sparse_moe.experts.10.w1", "model.layers.26.block_sparse_moe.experts.11.w1", "model.layers.26.block_sparse_moe.experts.12.w1", "model.layers.26.block_sparse_moe.experts.13.w1", "model.layers.26.block_sparse_moe.experts.14.w1", "model.layers.26.block_sparse_moe.experts.15.w1", "model.layers.26.block_sparse_moe.experts.16.w1", "model.layers.26.block_sparse_moe.experts.17.w1", "model.layers.26.block_sparse_moe.experts.18.w1", "model.layers.26.block_sparse_moe.experts.19.w1", "model.layers.26.block_sparse_moe.experts.20.w1", "model.layers.26.block_sparse_moe.experts.21.w1", "model.layers.26.block_sparse_moe.experts.22.w1", "model.layers.26.block_sparse_moe.experts.23.w1", "model.layers.26.block_sparse_moe.experts.24.w1", "model.layers.26.block_sparse_moe.experts.25.w1", "model.layers.26.block_sparse_moe.experts.26.w1", "model.layers.26.block_sparse_moe.experts.27.w1", "model.layers.26.block_sparse_moe.experts.28.w1", "model.layers.26.block_sparse_moe.experts.29.w1", "model.layers.26.block_sparse_moe.experts.30.w1", "model.layers.26.block_sparse_moe.experts.31.w1", "model.layers.26.block_sparse_moe.experts.32.w1", "model.layers.26.block_sparse_moe.experts.33.w1", "model.layers.26.block_sparse_moe.experts.34.w1", "model.layers.26.block_sparse_moe.experts.35.w1", "model.layers.26.block_sparse_moe.experts.36.w1", "model.layers.26.block_sparse_moe.experts.37.w1", "model.layers.26.block_sparse_moe.experts.38.w1", "model.layers.26.block_sparse_moe.experts.39.w1", "model.layers.26.block_sparse_moe.experts.40.w1", "model.layers.26.block_sparse_moe.experts.41.w1", "model.layers.26.block_sparse_moe.experts.42.w1", "model.layers.26.block_sparse_moe.experts.43.w1", "model.layers.26.block_sparse_moe.experts.44.w1", "model.layers.26.block_sparse_moe.experts.45.w1", "model.layers.26.block_sparse_moe.experts.46.w1", "model.layers.26.block_sparse_moe.experts.47.w1", "model.layers.26.block_sparse_moe.experts.48.w1", "model.layers.26.block_sparse_moe.experts.49.w1", "model.layers.26.block_sparse_moe.experts.50.w1", "model.layers.26.block_sparse_moe.experts.51.w1", "model.layers.26.block_sparse_moe.experts.52.w1", "model.layers.26.block_sparse_moe.experts.53.w1", "model.layers.26.block_sparse_moe.experts.54.w1", "model.layers.26.block_sparse_moe.experts.55.w1", "model.layers.26.block_sparse_moe.experts.56.w1", "model.layers.26.block_sparse_moe.experts.57.w1", "model.layers.26.block_sparse_moe.experts.58.w1", "model.layers.26.block_sparse_moe.experts.59.w1", "model.layers.26.block_sparse_moe.experts.60.w1", "model.layers.26.block_sparse_moe.experts.61.w1", "model.layers.26.block_sparse_moe.experts.62.w1", "model.layers.26.block_sparse_moe.experts.63.w1", "model.layers.26.block_sparse_moe.experts.64.w1", "model.layers.26.block_sparse_moe.experts.65.w1", "model.layers.26.block_sparse_moe.experts.66.w1", "model.layers.26.block_sparse_moe.experts.67.w1", "model.layers.26.block_sparse_moe.experts.68.w1", "model.layers.26.block_sparse_moe.experts.69.w1", "model.layers.26.block_sparse_moe.experts.70.w1", "model.layers.26.block_sparse_moe.experts.71.w1", "model.layers.26.block_sparse_moe.experts.72.w1", "model.layers.26.block_sparse_moe.experts.73.w1", "model.layers.26.block_sparse_moe.experts.74.w1", "model.layers.26.block_sparse_moe.experts.75.w1", "model.layers.26.block_sparse_moe.experts.76.w1", "model.layers.26.block_sparse_moe.experts.77.w1", "model.layers.26.block_sparse_moe.experts.78.w1", "model.layers.26.block_sparse_moe.experts.79.w1", "model.layers.26.block_sparse_moe.experts.80.w1", "model.layers.26.block_sparse_moe.experts.81.w1", "model.layers.26.block_sparse_moe.experts.82.w1", "model.layers.26.block_sparse_moe.experts.83.w1", "model.layers.26.block_sparse_moe.experts.84.w1", "model.layers.26.block_sparse_moe.experts.85.w1", "model.layers.26.block_sparse_moe.experts.86.w1", "model.layers.26.block_sparse_moe.experts.87.w1", "model.layers.26.block_sparse_moe.experts.88.w1", "model.layers.26.block_sparse_moe.experts.89.w1", "model.layers.26.block_sparse_moe.experts.90.w1", "model.layers.26.block_sparse_moe.experts.91.w1", "model.layers.26.block_sparse_moe.experts.92.w1", "model.layers.26.block_sparse_moe.experts.93.w1", "model.layers.26.block_sparse_moe.experts.94.w1", "model.layers.26.block_sparse_moe.experts.95.w1", "model.layers.26.block_sparse_moe.experts.96.w1", "model.layers.26.block_sparse_moe.experts.97.w1", "model.layers.26.block_sparse_moe.experts.98.w1", "model.layers.26.block_sparse_moe.experts.99.w1", "model.layers.26.block_sparse_moe.experts.100.w1", "model.layers.26.block_sparse_moe.experts.101.w1", "model.layers.26.block_sparse_moe.experts.102.w1", "model.layers.26.block_sparse_moe.experts.103.w1", "model.layers.26.block_sparse_moe.experts.104.w1", "model.layers.26.block_sparse_moe.experts.105.w1", "model.layers.26.block_sparse_moe.experts.106.w1", "model.layers.26.block_sparse_moe.experts.107.w1", "model.layers.26.block_sparse_moe.experts.108.w1", "model.layers.26.block_sparse_moe.experts.109.w1", "model.layers.26.block_sparse_moe.experts.110.w1", "model.layers.26.block_sparse_moe.experts.111.w1", "model.layers.26.block_sparse_moe.experts.112.w1", "model.layers.26.block_sparse_moe.experts.113.w1", "model.layers.26.block_sparse_moe.experts.114.w1", "model.layers.26.block_sparse_moe.experts.115.w1", "model.layers.26.block_sparse_moe.experts.116.w1", "model.layers.26.block_sparse_moe.experts.117.w1", "model.layers.26.block_sparse_moe.experts.118.w1", "model.layers.26.block_sparse_moe.experts.119.w1", "model.layers.26.block_sparse_moe.experts.120.w1", "model.layers.26.block_sparse_moe.experts.121.w1", "model.layers.26.block_sparse_moe.experts.122.w1", "model.layers.26.block_sparse_moe.experts.123.w1", "model.layers.26.block_sparse_moe.experts.124.w1", "model.layers.26.block_sparse_moe.experts.125.w1", "model.layers.26.block_sparse_moe.experts.126.w1", "model.layers.26.block_sparse_moe.experts.127.w1", "model.layers.26.block_sparse_moe.experts.128.w1", "model.layers.26.block_sparse_moe.experts.129.w1", "model.layers.26.block_sparse_moe.experts.130.w1", "model.layers.26.block_sparse_moe.experts.131.w1", "model.layers.26.block_sparse_moe.experts.132.w1", "model.layers.26.block_sparse_moe.experts.133.w1", "model.layers.26.block_sparse_moe.experts.134.w1", "model.layers.26.block_sparse_moe.experts.135.w1", "model.layers.26.block_sparse_moe.experts.136.w1", "model.layers.26.block_sparse_moe.experts.137.w1", "model.layers.26.block_sparse_moe.experts.138.w1", "model.layers.26.block_sparse_moe.experts.139.w1", "model.layers.26.block_sparse_moe.experts.140.w1", "model.layers.26.block_sparse_moe.experts.141.w1", "model.layers.26.block_sparse_moe.experts.142.w1", "model.layers.26.block_sparse_moe.experts.143.w1", "model.layers.26.block_sparse_moe.experts.144.w1", "model.layers.26.block_sparse_moe.experts.145.w1", "model.layers.26.block_sparse_moe.experts.146.w1", "model.layers.26.block_sparse_moe.experts.147.w1", "model.layers.26.block_sparse_moe.experts.148.w1", "model.layers.26.block_sparse_moe.experts.149.w1", "model.layers.26.block_sparse_moe.experts.150.w1", "model.layers.26.block_sparse_moe.experts.151.w1", "model.layers.26.block_sparse_moe.experts.152.w1", "model.layers.26.block_sparse_moe.experts.153.w1", "model.layers.26.block_sparse_moe.experts.154.w1", "model.layers.26.block_sparse_moe.experts.155.w1", "model.layers.26.block_sparse_moe.experts.156.w1", "model.layers.26.block_sparse_moe.experts.157.w1", "model.layers.26.block_sparse_moe.experts.158.w1", "model.layers.26.block_sparse_moe.experts.159.w1", "model.layers.26.block_sparse_moe.experts.160.w1", "model.layers.26.block_sparse_moe.experts.161.w1", "model.layers.26.block_sparse_moe.experts.162.w1", "model.layers.26.block_sparse_moe.experts.163.w1", "model.layers.26.block_sparse_moe.experts.164.w1", "model.layers.26.block_sparse_moe.experts.165.w1", "model.layers.26.block_sparse_moe.experts.166.w1", "model.layers.26.block_sparse_moe.experts.167.w1", "model.layers.26.block_sparse_moe.experts.168.w1", "model.layers.26.block_sparse_moe.experts.169.w1", "model.layers.26.block_sparse_moe.experts.170.w1", "model.layers.26.block_sparse_moe.experts.171.w1", "model.layers.26.block_sparse_moe.experts.172.w1", "model.layers.26.block_sparse_moe.experts.173.w1", "model.layers.26.block_sparse_moe.experts.174.w1", "model.layers.26.block_sparse_moe.experts.175.w1", "model.layers.26.block_sparse_moe.experts.176.w1", "model.layers.26.block_sparse_moe.experts.177.w1", "model.layers.26.block_sparse_moe.experts.178.w1", "model.layers.26.block_sparse_moe.experts.179.w1", "model.layers.26.block_sparse_moe.experts.180.w1", "model.layers.26.block_sparse_moe.experts.181.w1", "model.layers.26.block_sparse_moe.experts.182.w1", "model.layers.26.block_sparse_moe.experts.183.w1", "model.layers.26.block_sparse_moe.experts.184.w1", "model.layers.26.block_sparse_moe.experts.185.w1", "model.layers.26.block_sparse_moe.experts.186.w1", "model.layers.26.block_sparse_moe.experts.187.w1", "model.layers.26.block_sparse_moe.experts.188.w1", "model.layers.26.block_sparse_moe.experts.189.w1", "model.layers.26.block_sparse_moe.experts.190.w1", "model.layers.26.block_sparse_moe.experts.191.w1", "model.layers.26.block_sparse_moe.experts.192.w1", "model.layers.26.block_sparse_moe.experts.193.w1", "model.layers.26.block_sparse_moe.experts.194.w1", "model.layers.26.block_sparse_moe.experts.195.w1", "model.layers.26.block_sparse_moe.experts.196.w1", "model.layers.26.block_sparse_moe.experts.197.w1", "model.layers.26.block_sparse_moe.experts.198.w1", "model.layers.26.block_sparse_moe.experts.199.w1", "model.layers.26.block_sparse_moe.experts.200.w1", "model.layers.26.block_sparse_moe.experts.201.w1", "model.layers.26.block_sparse_moe.experts.202.w1", "model.layers.26.block_sparse_moe.experts.203.w1", "model.layers.26.block_sparse_moe.experts.204.w1", "model.layers.26.block_sparse_moe.experts.205.w1", "model.layers.26.block_sparse_moe.experts.206.w1", "model.layers.26.block_sparse_moe.experts.207.w1", "model.layers.26.block_sparse_moe.experts.208.w1", "model.layers.26.block_sparse_moe.experts.209.w1", "model.layers.26.block_sparse_moe.experts.210.w1", "model.layers.26.block_sparse_moe.experts.211.w1", "model.layers.26.block_sparse_moe.experts.212.w1", "model.layers.26.block_sparse_moe.experts.213.w1", "model.layers.26.block_sparse_moe.experts.214.w1", "model.layers.26.block_sparse_moe.experts.215.w1", "model.layers.26.block_sparse_moe.experts.216.w1", "model.layers.26.block_sparse_moe.experts.217.w1", "model.layers.26.block_sparse_moe.experts.218.w1", "model.layers.26.block_sparse_moe.experts.219.w1", "model.layers.26.block_sparse_moe.experts.220.w1", "model.layers.26.block_sparse_moe.experts.221.w1", "model.layers.26.block_sparse_moe.experts.222.w1", "model.layers.26.block_sparse_moe.experts.223.w1", "model.layers.26.block_sparse_moe.experts.224.w1", "model.layers.26.block_sparse_moe.experts.225.w1", "model.layers.26.block_sparse_moe.experts.226.w1", "model.layers.26.block_sparse_moe.experts.227.w1", "model.layers.26.block_sparse_moe.experts.228.w1", "model.layers.26.block_sparse_moe.experts.229.w1", "model.layers.26.block_sparse_moe.experts.230.w1", "model.layers.26.block_sparse_moe.experts.231.w1", "model.layers.26.block_sparse_moe.experts.232.w1", "model.layers.26.block_sparse_moe.experts.233.w1", "model.layers.26.block_sparse_moe.experts.234.w1", "model.layers.26.block_sparse_moe.experts.235.w1", "model.layers.26.block_sparse_moe.experts.236.w1", "model.layers.26.block_sparse_moe.experts.237.w1", "model.layers.26.block_sparse_moe.experts.238.w1", "model.layers.26.block_sparse_moe.experts.239.w1", "model.layers.26.block_sparse_moe.experts.240.w1", "model.layers.26.block_sparse_moe.experts.241.w1", "model.layers.26.block_sparse_moe.experts.242.w1", "model.layers.26.block_sparse_moe.experts.243.w1", "model.layers.26.block_sparse_moe.experts.244.w1", "model.layers.26.block_sparse_moe.experts.245.w1", "model.layers.26.block_sparse_moe.experts.246.w1", "model.layers.26.block_sparse_moe.experts.247.w1", "model.layers.26.block_sparse_moe.experts.248.w1", "model.layers.26.block_sparse_moe.experts.249.w1", "model.layers.26.block_sparse_moe.experts.250.w1", "model.layers.26.block_sparse_moe.experts.251.w1", "model.layers.26.block_sparse_moe.experts.252.w1", "model.layers.26.block_sparse_moe.experts.253.w1", "model.layers.26.block_sparse_moe.experts.254.w1", "model.layers.26.block_sparse_moe.experts.255.w1", "model.layers.26.block_sparse_moe.experts.0.w3", "model.layers.26.block_sparse_moe.experts.1.w3", "model.layers.26.block_sparse_moe.experts.2.w3", "model.layers.26.block_sparse_moe.experts.3.w3", "model.layers.26.block_sparse_moe.experts.4.w3", "model.layers.26.block_sparse_moe.experts.5.w3", "model.layers.26.block_sparse_moe.experts.6.w3", "model.layers.26.block_sparse_moe.experts.7.w3", "model.layers.26.block_sparse_moe.experts.8.w3", "model.layers.26.block_sparse_moe.experts.9.w3", "model.layers.26.block_sparse_moe.experts.10.w3", "model.layers.26.block_sparse_moe.experts.11.w3", "model.layers.26.block_sparse_moe.experts.12.w3", "model.layers.26.block_sparse_moe.experts.13.w3", "model.layers.26.block_sparse_moe.experts.14.w3", "model.layers.26.block_sparse_moe.experts.15.w3", "model.layers.26.block_sparse_moe.experts.16.w3", "model.layers.26.block_sparse_moe.experts.17.w3", "model.layers.26.block_sparse_moe.experts.18.w3", "model.layers.26.block_sparse_moe.experts.19.w3", "model.layers.26.block_sparse_moe.experts.20.w3", "model.layers.26.block_sparse_moe.experts.21.w3", "model.layers.26.block_sparse_moe.experts.22.w3", "model.layers.26.block_sparse_moe.experts.23.w3", "model.layers.26.block_sparse_moe.experts.24.w3", "model.layers.26.block_sparse_moe.experts.25.w3", "model.layers.26.block_sparse_moe.experts.26.w3", "model.layers.26.block_sparse_moe.experts.27.w3", "model.layers.26.block_sparse_moe.experts.28.w3", "model.layers.26.block_sparse_moe.experts.29.w3", "model.layers.26.block_sparse_moe.experts.30.w3", "model.layers.26.block_sparse_moe.experts.31.w3", "model.layers.26.block_sparse_moe.experts.32.w3", "model.layers.26.block_sparse_moe.experts.33.w3", "model.layers.26.block_sparse_moe.experts.34.w3", "model.layers.26.block_sparse_moe.experts.35.w3", "model.layers.26.block_sparse_moe.experts.36.w3", "model.layers.26.block_sparse_moe.experts.37.w3", "model.layers.26.block_sparse_moe.experts.38.w3", "model.layers.26.block_sparse_moe.experts.39.w3", "model.layers.26.block_sparse_moe.experts.40.w3", "model.layers.26.block_sparse_moe.experts.41.w3", "model.layers.26.block_sparse_moe.experts.42.w3", "model.layers.26.block_sparse_moe.experts.43.w3", "model.layers.26.block_sparse_moe.experts.44.w3", "model.layers.26.block_sparse_moe.experts.45.w3", "model.layers.26.block_sparse_moe.experts.46.w3", "model.layers.26.block_sparse_moe.experts.47.w3", "model.layers.26.block_sparse_moe.experts.48.w3", "model.layers.26.block_sparse_moe.experts.49.w3", "model.layers.26.block_sparse_moe.experts.50.w3", "model.layers.26.block_sparse_moe.experts.51.w3", "model.layers.26.block_sparse_moe.experts.52.w3", "model.layers.26.block_sparse_moe.experts.53.w3", "model.layers.26.block_sparse_moe.experts.54.w3", "model.layers.26.block_sparse_moe.experts.55.w3", "model.layers.26.block_sparse_moe.experts.56.w3", "model.layers.26.block_sparse_moe.experts.57.w3", "model.layers.26.block_sparse_moe.experts.58.w3", "model.layers.26.block_sparse_moe.experts.59.w3", "model.layers.26.block_sparse_moe.experts.60.w3", "model.layers.26.block_sparse_moe.experts.61.w3", "model.layers.26.block_sparse_moe.experts.62.w3", "model.layers.26.block_sparse_moe.experts.63.w3", "model.layers.26.block_sparse_moe.experts.64.w3", "model.layers.26.block_sparse_moe.experts.65.w3", "model.layers.26.block_sparse_moe.experts.66.w3", "model.layers.26.block_sparse_moe.experts.67.w3", "model.layers.26.block_sparse_moe.experts.68.w3", "model.layers.26.block_sparse_moe.experts.69.w3", "model.layers.26.block_sparse_moe.experts.70.w3", "model.layers.26.block_sparse_moe.experts.71.w3", "model.layers.26.block_sparse_moe.experts.72.w3", "model.layers.26.block_sparse_moe.experts.73.w3", "model.layers.26.block_sparse_moe.experts.74.w3", "model.layers.26.block_sparse_moe.experts.75.w3", "model.layers.26.block_sparse_moe.experts.76.w3", "model.layers.26.block_sparse_moe.experts.77.w3", "model.layers.26.block_sparse_moe.experts.78.w3", "model.layers.26.block_sparse_moe.experts.79.w3", "model.layers.26.block_sparse_moe.experts.80.w3", "model.layers.26.block_sparse_moe.experts.81.w3", "model.layers.26.block_sparse_moe.experts.82.w3", "model.layers.26.block_sparse_moe.experts.83.w3", "model.layers.26.block_sparse_moe.experts.84.w3", "model.layers.26.block_sparse_moe.experts.85.w3", "model.layers.26.block_sparse_moe.experts.86.w3", "model.layers.26.block_sparse_moe.experts.87.w3", "model.layers.26.block_sparse_moe.experts.88.w3", "model.layers.26.block_sparse_moe.experts.89.w3", "model.layers.26.block_sparse_moe.experts.90.w3", "model.layers.26.block_sparse_moe.experts.91.w3", "model.layers.26.block_sparse_moe.experts.92.w3", "model.layers.26.block_sparse_moe.experts.93.w3", "model.layers.26.block_sparse_moe.experts.94.w3", "model.layers.26.block_sparse_moe.experts.95.w3", "model.layers.26.block_sparse_moe.experts.96.w3", "model.layers.26.block_sparse_moe.experts.97.w3", "model.layers.26.block_sparse_moe.experts.98.w3", "model.layers.26.block_sparse_moe.experts.99.w3", "model.layers.26.block_sparse_moe.experts.100.w3", "model.layers.26.block_sparse_moe.experts.101.w3", "model.layers.26.block_sparse_moe.experts.102.w3", "model.layers.26.block_sparse_moe.experts.103.w3", "model.layers.26.block_sparse_moe.experts.104.w3", "model.layers.26.block_sparse_moe.experts.105.w3", "model.layers.26.block_sparse_moe.experts.106.w3", "model.layers.26.block_sparse_moe.experts.107.w3", "model.layers.26.block_sparse_moe.experts.108.w3", "model.layers.26.block_sparse_moe.experts.109.w3", "model.layers.26.block_sparse_moe.experts.110.w3", "model.layers.26.block_sparse_moe.experts.111.w3", "model.layers.26.block_sparse_moe.experts.112.w3", "model.layers.26.block_sparse_moe.experts.113.w3", "model.layers.26.block_sparse_moe.experts.114.w3", "model.layers.26.block_sparse_moe.experts.115.w3", "model.layers.26.block_sparse_moe.experts.116.w3", "model.layers.26.block_sparse_moe.experts.117.w3", "model.layers.26.block_sparse_moe.experts.118.w3", "model.layers.26.block_sparse_moe.experts.119.w3", "model.layers.26.block_sparse_moe.experts.120.w3", "model.layers.26.block_sparse_moe.experts.121.w3", "model.layers.26.block_sparse_moe.experts.122.w3", "model.layers.26.block_sparse_moe.experts.123.w3", "model.layers.26.block_sparse_moe.experts.124.w3", "model.layers.26.block_sparse_moe.experts.125.w3", "model.layers.26.block_sparse_moe.experts.126.w3", "model.layers.26.block_sparse_moe.experts.127.w3", "model.layers.26.block_sparse_moe.experts.128.w3", "model.layers.26.block_sparse_moe.experts.129.w3", "model.layers.26.block_sparse_moe.experts.130.w3", "model.layers.26.block_sparse_moe.experts.131.w3", "model.layers.26.block_sparse_moe.experts.132.w3", "model.layers.26.block_sparse_moe.experts.133.w3", "model.layers.26.block_sparse_moe.experts.134.w3", "model.layers.26.block_sparse_moe.experts.135.w3", "model.layers.26.block_sparse_moe.experts.136.w3", "model.layers.26.block_sparse_moe.experts.137.w3", "model.layers.26.block_sparse_moe.experts.138.w3", "model.layers.26.block_sparse_moe.experts.139.w3", "model.layers.26.block_sparse_moe.experts.140.w3", "model.layers.26.block_sparse_moe.experts.141.w3", "model.layers.26.block_sparse_moe.experts.142.w3", "model.layers.26.block_sparse_moe.experts.143.w3", "model.layers.26.block_sparse_moe.experts.144.w3", "model.layers.26.block_sparse_moe.experts.145.w3", "model.layers.26.block_sparse_moe.experts.146.w3", "model.layers.26.block_sparse_moe.experts.147.w3", "model.layers.26.block_sparse_moe.experts.148.w3", "model.layers.26.block_sparse_moe.experts.149.w3", "model.layers.26.block_sparse_moe.experts.150.w3", "model.layers.26.block_sparse_moe.experts.151.w3", "model.layers.26.block_sparse_moe.experts.152.w3", "model.layers.26.block_sparse_moe.experts.153.w3", "model.layers.26.block_sparse_moe.experts.154.w3", "model.layers.26.block_sparse_moe.experts.155.w3", "model.layers.26.block_sparse_moe.experts.156.w3", "model.layers.26.block_sparse_moe.experts.157.w3", "model.layers.26.block_sparse_moe.experts.158.w3", "model.layers.26.block_sparse_moe.experts.159.w3", "model.layers.26.block_sparse_moe.experts.160.w3", "model.layers.26.block_sparse_moe.experts.161.w3", "model.layers.26.block_sparse_moe.experts.162.w3", "model.layers.26.block_sparse_moe.experts.163.w3", "model.layers.26.block_sparse_moe.experts.164.w3", "model.layers.26.block_sparse_moe.experts.165.w3", "model.layers.26.block_sparse_moe.experts.166.w3", "model.layers.26.block_sparse_moe.experts.167.w3", "model.layers.26.block_sparse_moe.experts.168.w3", "model.layers.26.block_sparse_moe.experts.169.w3", "model.layers.26.block_sparse_moe.experts.170.w3", "model.layers.26.block_sparse_moe.experts.171.w3", "model.layers.26.block_sparse_moe.experts.172.w3", "model.layers.26.block_sparse_moe.experts.173.w3", "model.layers.26.block_sparse_moe.experts.174.w3", "model.layers.26.block_sparse_moe.experts.175.w3", "model.layers.26.block_sparse_moe.experts.176.w3", "model.layers.26.block_sparse_moe.experts.177.w3", "model.layers.26.block_sparse_moe.experts.178.w3", "model.layers.26.block_sparse_moe.experts.179.w3", "model.layers.26.block_sparse_moe.experts.180.w3", "model.layers.26.block_sparse_moe.experts.181.w3", "model.layers.26.block_sparse_moe.experts.182.w3", "model.layers.26.block_sparse_moe.experts.183.w3", "model.layers.26.block_sparse_moe.experts.184.w3", "model.layers.26.block_sparse_moe.experts.185.w3", "model.layers.26.block_sparse_moe.experts.186.w3", "model.layers.26.block_sparse_moe.experts.187.w3", "model.layers.26.block_sparse_moe.experts.188.w3", "model.layers.26.block_sparse_moe.experts.189.w3", "model.layers.26.block_sparse_moe.experts.190.w3", "model.layers.26.block_sparse_moe.experts.191.w3", "model.layers.26.block_sparse_moe.experts.192.w3", "model.layers.26.block_sparse_moe.experts.193.w3", "model.layers.26.block_sparse_moe.experts.194.w3", "model.layers.26.block_sparse_moe.experts.195.w3", "model.layers.26.block_sparse_moe.experts.196.w3", "model.layers.26.block_sparse_moe.experts.197.w3", "model.layers.26.block_sparse_moe.experts.198.w3", "model.layers.26.block_sparse_moe.experts.199.w3", "model.layers.26.block_sparse_moe.experts.200.w3", "model.layers.26.block_sparse_moe.experts.201.w3", "model.layers.26.block_sparse_moe.experts.202.w3", "model.layers.26.block_sparse_moe.experts.203.w3", "model.layers.26.block_sparse_moe.experts.204.w3", "model.layers.26.block_sparse_moe.experts.205.w3", "model.layers.26.block_sparse_moe.experts.206.w3", "model.layers.26.block_sparse_moe.experts.207.w3", "model.layers.26.block_sparse_moe.experts.208.w3", "model.layers.26.block_sparse_moe.experts.209.w3", "model.layers.26.block_sparse_moe.experts.210.w3", "model.layers.26.block_sparse_moe.experts.211.w3", "model.layers.26.block_sparse_moe.experts.212.w3", "model.layers.26.block_sparse_moe.experts.213.w3", "model.layers.26.block_sparse_moe.experts.214.w3", "model.layers.26.block_sparse_moe.experts.215.w3", "model.layers.26.block_sparse_moe.experts.216.w3", "model.layers.26.block_sparse_moe.experts.217.w3", "model.layers.26.block_sparse_moe.experts.218.w3", "model.layers.26.block_sparse_moe.experts.219.w3", "model.layers.26.block_sparse_moe.experts.220.w3", "model.layers.26.block_sparse_moe.experts.221.w3", "model.layers.26.block_sparse_moe.experts.222.w3", "model.layers.26.block_sparse_moe.experts.223.w3", "model.layers.26.block_sparse_moe.experts.224.w3", "model.layers.26.block_sparse_moe.experts.225.w3", "model.layers.26.block_sparse_moe.experts.226.w3", "model.layers.26.block_sparse_moe.experts.227.w3", "model.layers.26.block_sparse_moe.experts.228.w3", "model.layers.26.block_sparse_moe.experts.229.w3", "model.layers.26.block_sparse_moe.experts.230.w3", "model.layers.26.block_sparse_moe.experts.231.w3", "model.layers.26.block_sparse_moe.experts.232.w3", "model.layers.26.block_sparse_moe.experts.233.w3", "model.layers.26.block_sparse_moe.experts.234.w3", "model.layers.26.block_sparse_moe.experts.235.w3", "model.layers.26.block_sparse_moe.experts.236.w3", "model.layers.26.block_sparse_moe.experts.237.w3", "model.layers.26.block_sparse_moe.experts.238.w3", "model.layers.26.block_sparse_moe.experts.239.w3", "model.layers.26.block_sparse_moe.experts.240.w3", "model.layers.26.block_sparse_moe.experts.241.w3", "model.layers.26.block_sparse_moe.experts.242.w3", "model.layers.26.block_sparse_moe.experts.243.w3", "model.layers.26.block_sparse_moe.experts.244.w3", "model.layers.26.block_sparse_moe.experts.245.w3", "model.layers.26.block_sparse_moe.experts.246.w3", "model.layers.26.block_sparse_moe.experts.247.w3", "model.layers.26.block_sparse_moe.experts.248.w3", "model.layers.26.block_sparse_moe.experts.249.w3", "model.layers.26.block_sparse_moe.experts.250.w3", "model.layers.26.block_sparse_moe.experts.251.w3", "model.layers.26.block_sparse_moe.experts.252.w3", "model.layers.26.block_sparse_moe.experts.253.w3", "model.layers.26.block_sparse_moe.experts.254.w3", "model.layers.26.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0008041176944971196, "dbits": 2415919104 } ] }, { "idx": 134, "layers": [ "model.layers.26.block_sparse_moe.experts.0.w2", "model.layers.26.block_sparse_moe.experts.1.w2", "model.layers.26.block_sparse_moe.experts.2.w2", "model.layers.26.block_sparse_moe.experts.3.w2", "model.layers.26.block_sparse_moe.experts.4.w2", "model.layers.26.block_sparse_moe.experts.5.w2", "model.layers.26.block_sparse_moe.experts.6.w2", "model.layers.26.block_sparse_moe.experts.7.w2", "model.layers.26.block_sparse_moe.experts.8.w2", "model.layers.26.block_sparse_moe.experts.9.w2", "model.layers.26.block_sparse_moe.experts.10.w2", "model.layers.26.block_sparse_moe.experts.11.w2", "model.layers.26.block_sparse_moe.experts.12.w2", "model.layers.26.block_sparse_moe.experts.13.w2", "model.layers.26.block_sparse_moe.experts.14.w2", "model.layers.26.block_sparse_moe.experts.15.w2", "model.layers.26.block_sparse_moe.experts.16.w2", "model.layers.26.block_sparse_moe.experts.17.w2", "model.layers.26.block_sparse_moe.experts.18.w2", "model.layers.26.block_sparse_moe.experts.19.w2", "model.layers.26.block_sparse_moe.experts.20.w2", "model.layers.26.block_sparse_moe.experts.21.w2", "model.layers.26.block_sparse_moe.experts.22.w2", "model.layers.26.block_sparse_moe.experts.23.w2", "model.layers.26.block_sparse_moe.experts.24.w2", "model.layers.26.block_sparse_moe.experts.25.w2", "model.layers.26.block_sparse_moe.experts.26.w2", "model.layers.26.block_sparse_moe.experts.27.w2", "model.layers.26.block_sparse_moe.experts.28.w2", "model.layers.26.block_sparse_moe.experts.29.w2", "model.layers.26.block_sparse_moe.experts.30.w2", "model.layers.26.block_sparse_moe.experts.31.w2", "model.layers.26.block_sparse_moe.experts.32.w2", "model.layers.26.block_sparse_moe.experts.33.w2", "model.layers.26.block_sparse_moe.experts.34.w2", "model.layers.26.block_sparse_moe.experts.35.w2", "model.layers.26.block_sparse_moe.experts.36.w2", "model.layers.26.block_sparse_moe.experts.37.w2", "model.layers.26.block_sparse_moe.experts.38.w2", "model.layers.26.block_sparse_moe.experts.39.w2", "model.layers.26.block_sparse_moe.experts.40.w2", "model.layers.26.block_sparse_moe.experts.41.w2", "model.layers.26.block_sparse_moe.experts.42.w2", "model.layers.26.block_sparse_moe.experts.43.w2", "model.layers.26.block_sparse_moe.experts.44.w2", "model.layers.26.block_sparse_moe.experts.45.w2", "model.layers.26.block_sparse_moe.experts.46.w2", "model.layers.26.block_sparse_moe.experts.47.w2", "model.layers.26.block_sparse_moe.experts.48.w2", "model.layers.26.block_sparse_moe.experts.49.w2", "model.layers.26.block_sparse_moe.experts.50.w2", "model.layers.26.block_sparse_moe.experts.51.w2", "model.layers.26.block_sparse_moe.experts.52.w2", "model.layers.26.block_sparse_moe.experts.53.w2", "model.layers.26.block_sparse_moe.experts.54.w2", "model.layers.26.block_sparse_moe.experts.55.w2", "model.layers.26.block_sparse_moe.experts.56.w2", "model.layers.26.block_sparse_moe.experts.57.w2", "model.layers.26.block_sparse_moe.experts.58.w2", "model.layers.26.block_sparse_moe.experts.59.w2", "model.layers.26.block_sparse_moe.experts.60.w2", "model.layers.26.block_sparse_moe.experts.61.w2", "model.layers.26.block_sparse_moe.experts.62.w2", "model.layers.26.block_sparse_moe.experts.63.w2", "model.layers.26.block_sparse_moe.experts.64.w2", "model.layers.26.block_sparse_moe.experts.65.w2", "model.layers.26.block_sparse_moe.experts.66.w2", "model.layers.26.block_sparse_moe.experts.67.w2", "model.layers.26.block_sparse_moe.experts.68.w2", "model.layers.26.block_sparse_moe.experts.69.w2", "model.layers.26.block_sparse_moe.experts.70.w2", "model.layers.26.block_sparse_moe.experts.71.w2", "model.layers.26.block_sparse_moe.experts.72.w2", "model.layers.26.block_sparse_moe.experts.73.w2", "model.layers.26.block_sparse_moe.experts.74.w2", "model.layers.26.block_sparse_moe.experts.75.w2", "model.layers.26.block_sparse_moe.experts.76.w2", "model.layers.26.block_sparse_moe.experts.77.w2", "model.layers.26.block_sparse_moe.experts.78.w2", "model.layers.26.block_sparse_moe.experts.79.w2", "model.layers.26.block_sparse_moe.experts.80.w2", "model.layers.26.block_sparse_moe.experts.81.w2", "model.layers.26.block_sparse_moe.experts.82.w2", "model.layers.26.block_sparse_moe.experts.83.w2", "model.layers.26.block_sparse_moe.experts.84.w2", "model.layers.26.block_sparse_moe.experts.85.w2", "model.layers.26.block_sparse_moe.experts.86.w2", "model.layers.26.block_sparse_moe.experts.87.w2", "model.layers.26.block_sparse_moe.experts.88.w2", "model.layers.26.block_sparse_moe.experts.89.w2", "model.layers.26.block_sparse_moe.experts.90.w2", "model.layers.26.block_sparse_moe.experts.91.w2", "model.layers.26.block_sparse_moe.experts.92.w2", "model.layers.26.block_sparse_moe.experts.93.w2", "model.layers.26.block_sparse_moe.experts.94.w2", "model.layers.26.block_sparse_moe.experts.95.w2", "model.layers.26.block_sparse_moe.experts.96.w2", "model.layers.26.block_sparse_moe.experts.97.w2", "model.layers.26.block_sparse_moe.experts.98.w2", "model.layers.26.block_sparse_moe.experts.99.w2", "model.layers.26.block_sparse_moe.experts.100.w2", "model.layers.26.block_sparse_moe.experts.101.w2", "model.layers.26.block_sparse_moe.experts.102.w2", "model.layers.26.block_sparse_moe.experts.103.w2", "model.layers.26.block_sparse_moe.experts.104.w2", "model.layers.26.block_sparse_moe.experts.105.w2", "model.layers.26.block_sparse_moe.experts.106.w2", "model.layers.26.block_sparse_moe.experts.107.w2", "model.layers.26.block_sparse_moe.experts.108.w2", "model.layers.26.block_sparse_moe.experts.109.w2", "model.layers.26.block_sparse_moe.experts.110.w2", "model.layers.26.block_sparse_moe.experts.111.w2", "model.layers.26.block_sparse_moe.experts.112.w2", "model.layers.26.block_sparse_moe.experts.113.w2", "model.layers.26.block_sparse_moe.experts.114.w2", "model.layers.26.block_sparse_moe.experts.115.w2", "model.layers.26.block_sparse_moe.experts.116.w2", "model.layers.26.block_sparse_moe.experts.117.w2", "model.layers.26.block_sparse_moe.experts.118.w2", "model.layers.26.block_sparse_moe.experts.119.w2", "model.layers.26.block_sparse_moe.experts.120.w2", "model.layers.26.block_sparse_moe.experts.121.w2", "model.layers.26.block_sparse_moe.experts.122.w2", "model.layers.26.block_sparse_moe.experts.123.w2", "model.layers.26.block_sparse_moe.experts.124.w2", "model.layers.26.block_sparse_moe.experts.125.w2", "model.layers.26.block_sparse_moe.experts.126.w2", "model.layers.26.block_sparse_moe.experts.127.w2", "model.layers.26.block_sparse_moe.experts.128.w2", "model.layers.26.block_sparse_moe.experts.129.w2", "model.layers.26.block_sparse_moe.experts.130.w2", "model.layers.26.block_sparse_moe.experts.131.w2", "model.layers.26.block_sparse_moe.experts.132.w2", "model.layers.26.block_sparse_moe.experts.133.w2", "model.layers.26.block_sparse_moe.experts.134.w2", "model.layers.26.block_sparse_moe.experts.135.w2", "model.layers.26.block_sparse_moe.experts.136.w2", "model.layers.26.block_sparse_moe.experts.137.w2", "model.layers.26.block_sparse_moe.experts.138.w2", "model.layers.26.block_sparse_moe.experts.139.w2", "model.layers.26.block_sparse_moe.experts.140.w2", "model.layers.26.block_sparse_moe.experts.141.w2", "model.layers.26.block_sparse_moe.experts.142.w2", "model.layers.26.block_sparse_moe.experts.143.w2", "model.layers.26.block_sparse_moe.experts.144.w2", "model.layers.26.block_sparse_moe.experts.145.w2", "model.layers.26.block_sparse_moe.experts.146.w2", "model.layers.26.block_sparse_moe.experts.147.w2", "model.layers.26.block_sparse_moe.experts.148.w2", "model.layers.26.block_sparse_moe.experts.149.w2", "model.layers.26.block_sparse_moe.experts.150.w2", "model.layers.26.block_sparse_moe.experts.151.w2", "model.layers.26.block_sparse_moe.experts.152.w2", "model.layers.26.block_sparse_moe.experts.153.w2", "model.layers.26.block_sparse_moe.experts.154.w2", "model.layers.26.block_sparse_moe.experts.155.w2", "model.layers.26.block_sparse_moe.experts.156.w2", "model.layers.26.block_sparse_moe.experts.157.w2", "model.layers.26.block_sparse_moe.experts.158.w2", "model.layers.26.block_sparse_moe.experts.159.w2", "model.layers.26.block_sparse_moe.experts.160.w2", "model.layers.26.block_sparse_moe.experts.161.w2", "model.layers.26.block_sparse_moe.experts.162.w2", "model.layers.26.block_sparse_moe.experts.163.w2", "model.layers.26.block_sparse_moe.experts.164.w2", "model.layers.26.block_sparse_moe.experts.165.w2", "model.layers.26.block_sparse_moe.experts.166.w2", "model.layers.26.block_sparse_moe.experts.167.w2", "model.layers.26.block_sparse_moe.experts.168.w2", "model.layers.26.block_sparse_moe.experts.169.w2", "model.layers.26.block_sparse_moe.experts.170.w2", "model.layers.26.block_sparse_moe.experts.171.w2", "model.layers.26.block_sparse_moe.experts.172.w2", "model.layers.26.block_sparse_moe.experts.173.w2", "model.layers.26.block_sparse_moe.experts.174.w2", "model.layers.26.block_sparse_moe.experts.175.w2", "model.layers.26.block_sparse_moe.experts.176.w2", "model.layers.26.block_sparse_moe.experts.177.w2", "model.layers.26.block_sparse_moe.experts.178.w2", "model.layers.26.block_sparse_moe.experts.179.w2", "model.layers.26.block_sparse_moe.experts.180.w2", "model.layers.26.block_sparse_moe.experts.181.w2", "model.layers.26.block_sparse_moe.experts.182.w2", "model.layers.26.block_sparse_moe.experts.183.w2", "model.layers.26.block_sparse_moe.experts.184.w2", "model.layers.26.block_sparse_moe.experts.185.w2", "model.layers.26.block_sparse_moe.experts.186.w2", "model.layers.26.block_sparse_moe.experts.187.w2", "model.layers.26.block_sparse_moe.experts.188.w2", "model.layers.26.block_sparse_moe.experts.189.w2", "model.layers.26.block_sparse_moe.experts.190.w2", "model.layers.26.block_sparse_moe.experts.191.w2", "model.layers.26.block_sparse_moe.experts.192.w2", "model.layers.26.block_sparse_moe.experts.193.w2", "model.layers.26.block_sparse_moe.experts.194.w2", "model.layers.26.block_sparse_moe.experts.195.w2", "model.layers.26.block_sparse_moe.experts.196.w2", "model.layers.26.block_sparse_moe.experts.197.w2", "model.layers.26.block_sparse_moe.experts.198.w2", "model.layers.26.block_sparse_moe.experts.199.w2", "model.layers.26.block_sparse_moe.experts.200.w2", "model.layers.26.block_sparse_moe.experts.201.w2", "model.layers.26.block_sparse_moe.experts.202.w2", "model.layers.26.block_sparse_moe.experts.203.w2", "model.layers.26.block_sparse_moe.experts.204.w2", "model.layers.26.block_sparse_moe.experts.205.w2", "model.layers.26.block_sparse_moe.experts.206.w2", "model.layers.26.block_sparse_moe.experts.207.w2", "model.layers.26.block_sparse_moe.experts.208.w2", "model.layers.26.block_sparse_moe.experts.209.w2", "model.layers.26.block_sparse_moe.experts.210.w2", "model.layers.26.block_sparse_moe.experts.211.w2", "model.layers.26.block_sparse_moe.experts.212.w2", "model.layers.26.block_sparse_moe.experts.213.w2", "model.layers.26.block_sparse_moe.experts.214.w2", "model.layers.26.block_sparse_moe.experts.215.w2", "model.layers.26.block_sparse_moe.experts.216.w2", "model.layers.26.block_sparse_moe.experts.217.w2", "model.layers.26.block_sparse_moe.experts.218.w2", "model.layers.26.block_sparse_moe.experts.219.w2", "model.layers.26.block_sparse_moe.experts.220.w2", "model.layers.26.block_sparse_moe.experts.221.w2", "model.layers.26.block_sparse_moe.experts.222.w2", "model.layers.26.block_sparse_moe.experts.223.w2", "model.layers.26.block_sparse_moe.experts.224.w2", "model.layers.26.block_sparse_moe.experts.225.w2", "model.layers.26.block_sparse_moe.experts.226.w2", "model.layers.26.block_sparse_moe.experts.227.w2", "model.layers.26.block_sparse_moe.experts.228.w2", "model.layers.26.block_sparse_moe.experts.229.w2", "model.layers.26.block_sparse_moe.experts.230.w2", "model.layers.26.block_sparse_moe.experts.231.w2", "model.layers.26.block_sparse_moe.experts.232.w2", "model.layers.26.block_sparse_moe.experts.233.w2", "model.layers.26.block_sparse_moe.experts.234.w2", "model.layers.26.block_sparse_moe.experts.235.w2", "model.layers.26.block_sparse_moe.experts.236.w2", "model.layers.26.block_sparse_moe.experts.237.w2", "model.layers.26.block_sparse_moe.experts.238.w2", "model.layers.26.block_sparse_moe.experts.239.w2", "model.layers.26.block_sparse_moe.experts.240.w2", "model.layers.26.block_sparse_moe.experts.241.w2", "model.layers.26.block_sparse_moe.experts.242.w2", "model.layers.26.block_sparse_moe.experts.243.w2", "model.layers.26.block_sparse_moe.experts.244.w2", "model.layers.26.block_sparse_moe.experts.245.w2", "model.layers.26.block_sparse_moe.experts.246.w2", "model.layers.26.block_sparse_moe.experts.247.w2", "model.layers.26.block_sparse_moe.experts.248.w2", "model.layers.26.block_sparse_moe.experts.249.w2", "model.layers.26.block_sparse_moe.experts.250.w2", "model.layers.26.block_sparse_moe.experts.251.w2", "model.layers.26.block_sparse_moe.experts.252.w2", "model.layers.26.block_sparse_moe.experts.253.w2", "model.layers.26.block_sparse_moe.experts.254.w2", "model.layers.26.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -5.311630666254841e-05, "dbits": 1207959552 } ] }, { "idx": 135, "layers": [ "model.layers.27.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0009405244141816871, "dbits": 18874368 } ] }, { "idx": 136, "layers": [ "model.layers.27.self_attn.k_proj", "model.layers.27.self_attn.v_proj" ], "candidates": [ { "dkld": 0.007098213955759991, "dbits": 6291456 } ] }, { "idx": 137, "layers": [ "model.layers.27.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0011368021368980408, "dbits": 18874368 } ] }, { "idx": 138, "layers": [ "model.layers.27.block_sparse_moe.experts.0.w1", "model.layers.27.block_sparse_moe.experts.1.w1", "model.layers.27.block_sparse_moe.experts.2.w1", "model.layers.27.block_sparse_moe.experts.3.w1", "model.layers.27.block_sparse_moe.experts.4.w1", "model.layers.27.block_sparse_moe.experts.5.w1", "model.layers.27.block_sparse_moe.experts.6.w1", "model.layers.27.block_sparse_moe.experts.7.w1", "model.layers.27.block_sparse_moe.experts.8.w1", "model.layers.27.block_sparse_moe.experts.9.w1", "model.layers.27.block_sparse_moe.experts.10.w1", "model.layers.27.block_sparse_moe.experts.11.w1", "model.layers.27.block_sparse_moe.experts.12.w1", "model.layers.27.block_sparse_moe.experts.13.w1", "model.layers.27.block_sparse_moe.experts.14.w1", "model.layers.27.block_sparse_moe.experts.15.w1", "model.layers.27.block_sparse_moe.experts.16.w1", "model.layers.27.block_sparse_moe.experts.17.w1", "model.layers.27.block_sparse_moe.experts.18.w1", "model.layers.27.block_sparse_moe.experts.19.w1", "model.layers.27.block_sparse_moe.experts.20.w1", "model.layers.27.block_sparse_moe.experts.21.w1", "model.layers.27.block_sparse_moe.experts.22.w1", "model.layers.27.block_sparse_moe.experts.23.w1", "model.layers.27.block_sparse_moe.experts.24.w1", "model.layers.27.block_sparse_moe.experts.25.w1", "model.layers.27.block_sparse_moe.experts.26.w1", "model.layers.27.block_sparse_moe.experts.27.w1", "model.layers.27.block_sparse_moe.experts.28.w1", "model.layers.27.block_sparse_moe.experts.29.w1", "model.layers.27.block_sparse_moe.experts.30.w1", "model.layers.27.block_sparse_moe.experts.31.w1", "model.layers.27.block_sparse_moe.experts.32.w1", "model.layers.27.block_sparse_moe.experts.33.w1", "model.layers.27.block_sparse_moe.experts.34.w1", "model.layers.27.block_sparse_moe.experts.35.w1", "model.layers.27.block_sparse_moe.experts.36.w1", "model.layers.27.block_sparse_moe.experts.37.w1", "model.layers.27.block_sparse_moe.experts.38.w1", "model.layers.27.block_sparse_moe.experts.39.w1", "model.layers.27.block_sparse_moe.experts.40.w1", "model.layers.27.block_sparse_moe.experts.41.w1", "model.layers.27.block_sparse_moe.experts.42.w1", "model.layers.27.block_sparse_moe.experts.43.w1", "model.layers.27.block_sparse_moe.experts.44.w1", "model.layers.27.block_sparse_moe.experts.45.w1", "model.layers.27.block_sparse_moe.experts.46.w1", "model.layers.27.block_sparse_moe.experts.47.w1", "model.layers.27.block_sparse_moe.experts.48.w1", "model.layers.27.block_sparse_moe.experts.49.w1", "model.layers.27.block_sparse_moe.experts.50.w1", "model.layers.27.block_sparse_moe.experts.51.w1", "model.layers.27.block_sparse_moe.experts.52.w1", "model.layers.27.block_sparse_moe.experts.53.w1", "model.layers.27.block_sparse_moe.experts.54.w1", "model.layers.27.block_sparse_moe.experts.55.w1", "model.layers.27.block_sparse_moe.experts.56.w1", "model.layers.27.block_sparse_moe.experts.57.w1", "model.layers.27.block_sparse_moe.experts.58.w1", "model.layers.27.block_sparse_moe.experts.59.w1", "model.layers.27.block_sparse_moe.experts.60.w1", "model.layers.27.block_sparse_moe.experts.61.w1", "model.layers.27.block_sparse_moe.experts.62.w1", "model.layers.27.block_sparse_moe.experts.63.w1", "model.layers.27.block_sparse_moe.experts.64.w1", "model.layers.27.block_sparse_moe.experts.65.w1", "model.layers.27.block_sparse_moe.experts.66.w1", "model.layers.27.block_sparse_moe.experts.67.w1", "model.layers.27.block_sparse_moe.experts.68.w1", "model.layers.27.block_sparse_moe.experts.69.w1", "model.layers.27.block_sparse_moe.experts.70.w1", "model.layers.27.block_sparse_moe.experts.71.w1", "model.layers.27.block_sparse_moe.experts.72.w1", "model.layers.27.block_sparse_moe.experts.73.w1", "model.layers.27.block_sparse_moe.experts.74.w1", "model.layers.27.block_sparse_moe.experts.75.w1", "model.layers.27.block_sparse_moe.experts.76.w1", "model.layers.27.block_sparse_moe.experts.77.w1", "model.layers.27.block_sparse_moe.experts.78.w1", "model.layers.27.block_sparse_moe.experts.79.w1", "model.layers.27.block_sparse_moe.experts.80.w1", "model.layers.27.block_sparse_moe.experts.81.w1", "model.layers.27.block_sparse_moe.experts.82.w1", "model.layers.27.block_sparse_moe.experts.83.w1", "model.layers.27.block_sparse_moe.experts.84.w1", "model.layers.27.block_sparse_moe.experts.85.w1", "model.layers.27.block_sparse_moe.experts.86.w1", "model.layers.27.block_sparse_moe.experts.87.w1", "model.layers.27.block_sparse_moe.experts.88.w1", "model.layers.27.block_sparse_moe.experts.89.w1", "model.layers.27.block_sparse_moe.experts.90.w1", "model.layers.27.block_sparse_moe.experts.91.w1", "model.layers.27.block_sparse_moe.experts.92.w1", "model.layers.27.block_sparse_moe.experts.93.w1", "model.layers.27.block_sparse_moe.experts.94.w1", "model.layers.27.block_sparse_moe.experts.95.w1", "model.layers.27.block_sparse_moe.experts.96.w1", "model.layers.27.block_sparse_moe.experts.97.w1", "model.layers.27.block_sparse_moe.experts.98.w1", "model.layers.27.block_sparse_moe.experts.99.w1", "model.layers.27.block_sparse_moe.experts.100.w1", "model.layers.27.block_sparse_moe.experts.101.w1", "model.layers.27.block_sparse_moe.experts.102.w1", "model.layers.27.block_sparse_moe.experts.103.w1", "model.layers.27.block_sparse_moe.experts.104.w1", "model.layers.27.block_sparse_moe.experts.105.w1", "model.layers.27.block_sparse_moe.experts.106.w1", "model.layers.27.block_sparse_moe.experts.107.w1", "model.layers.27.block_sparse_moe.experts.108.w1", "model.layers.27.block_sparse_moe.experts.109.w1", "model.layers.27.block_sparse_moe.experts.110.w1", "model.layers.27.block_sparse_moe.experts.111.w1", "model.layers.27.block_sparse_moe.experts.112.w1", "model.layers.27.block_sparse_moe.experts.113.w1", "model.layers.27.block_sparse_moe.experts.114.w1", "model.layers.27.block_sparse_moe.experts.115.w1", "model.layers.27.block_sparse_moe.experts.116.w1", "model.layers.27.block_sparse_moe.experts.117.w1", "model.layers.27.block_sparse_moe.experts.118.w1", "model.layers.27.block_sparse_moe.experts.119.w1", "model.layers.27.block_sparse_moe.experts.120.w1", "model.layers.27.block_sparse_moe.experts.121.w1", "model.layers.27.block_sparse_moe.experts.122.w1", "model.layers.27.block_sparse_moe.experts.123.w1", "model.layers.27.block_sparse_moe.experts.124.w1", "model.layers.27.block_sparse_moe.experts.125.w1", "model.layers.27.block_sparse_moe.experts.126.w1", "model.layers.27.block_sparse_moe.experts.127.w1", "model.layers.27.block_sparse_moe.experts.128.w1", "model.layers.27.block_sparse_moe.experts.129.w1", "model.layers.27.block_sparse_moe.experts.130.w1", "model.layers.27.block_sparse_moe.experts.131.w1", "model.layers.27.block_sparse_moe.experts.132.w1", "model.layers.27.block_sparse_moe.experts.133.w1", "model.layers.27.block_sparse_moe.experts.134.w1", "model.layers.27.block_sparse_moe.experts.135.w1", "model.layers.27.block_sparse_moe.experts.136.w1", "model.layers.27.block_sparse_moe.experts.137.w1", "model.layers.27.block_sparse_moe.experts.138.w1", "model.layers.27.block_sparse_moe.experts.139.w1", "model.layers.27.block_sparse_moe.experts.140.w1", "model.layers.27.block_sparse_moe.experts.141.w1", "model.layers.27.block_sparse_moe.experts.142.w1", "model.layers.27.block_sparse_moe.experts.143.w1", "model.layers.27.block_sparse_moe.experts.144.w1", "model.layers.27.block_sparse_moe.experts.145.w1", "model.layers.27.block_sparse_moe.experts.146.w1", "model.layers.27.block_sparse_moe.experts.147.w1", "model.layers.27.block_sparse_moe.experts.148.w1", "model.layers.27.block_sparse_moe.experts.149.w1", "model.layers.27.block_sparse_moe.experts.150.w1", "model.layers.27.block_sparse_moe.experts.151.w1", "model.layers.27.block_sparse_moe.experts.152.w1", "model.layers.27.block_sparse_moe.experts.153.w1", "model.layers.27.block_sparse_moe.experts.154.w1", "model.layers.27.block_sparse_moe.experts.155.w1", "model.layers.27.block_sparse_moe.experts.156.w1", "model.layers.27.block_sparse_moe.experts.157.w1", "model.layers.27.block_sparse_moe.experts.158.w1", "model.layers.27.block_sparse_moe.experts.159.w1", "model.layers.27.block_sparse_moe.experts.160.w1", "model.layers.27.block_sparse_moe.experts.161.w1", "model.layers.27.block_sparse_moe.experts.162.w1", "model.layers.27.block_sparse_moe.experts.163.w1", "model.layers.27.block_sparse_moe.experts.164.w1", "model.layers.27.block_sparse_moe.experts.165.w1", "model.layers.27.block_sparse_moe.experts.166.w1", "model.layers.27.block_sparse_moe.experts.167.w1", "model.layers.27.block_sparse_moe.experts.168.w1", "model.layers.27.block_sparse_moe.experts.169.w1", "model.layers.27.block_sparse_moe.experts.170.w1", "model.layers.27.block_sparse_moe.experts.171.w1", "model.layers.27.block_sparse_moe.experts.172.w1", "model.layers.27.block_sparse_moe.experts.173.w1", "model.layers.27.block_sparse_moe.experts.174.w1", "model.layers.27.block_sparse_moe.experts.175.w1", "model.layers.27.block_sparse_moe.experts.176.w1", "model.layers.27.block_sparse_moe.experts.177.w1", "model.layers.27.block_sparse_moe.experts.178.w1", "model.layers.27.block_sparse_moe.experts.179.w1", "model.layers.27.block_sparse_moe.experts.180.w1", "model.layers.27.block_sparse_moe.experts.181.w1", "model.layers.27.block_sparse_moe.experts.182.w1", "model.layers.27.block_sparse_moe.experts.183.w1", "model.layers.27.block_sparse_moe.experts.184.w1", "model.layers.27.block_sparse_moe.experts.185.w1", "model.layers.27.block_sparse_moe.experts.186.w1", "model.layers.27.block_sparse_moe.experts.187.w1", "model.layers.27.block_sparse_moe.experts.188.w1", "model.layers.27.block_sparse_moe.experts.189.w1", "model.layers.27.block_sparse_moe.experts.190.w1", "model.layers.27.block_sparse_moe.experts.191.w1", "model.layers.27.block_sparse_moe.experts.192.w1", "model.layers.27.block_sparse_moe.experts.193.w1", "model.layers.27.block_sparse_moe.experts.194.w1", "model.layers.27.block_sparse_moe.experts.195.w1", "model.layers.27.block_sparse_moe.experts.196.w1", "model.layers.27.block_sparse_moe.experts.197.w1", "model.layers.27.block_sparse_moe.experts.198.w1", "model.layers.27.block_sparse_moe.experts.199.w1", "model.layers.27.block_sparse_moe.experts.200.w1", "model.layers.27.block_sparse_moe.experts.201.w1", "model.layers.27.block_sparse_moe.experts.202.w1", "model.layers.27.block_sparse_moe.experts.203.w1", "model.layers.27.block_sparse_moe.experts.204.w1", "model.layers.27.block_sparse_moe.experts.205.w1", "model.layers.27.block_sparse_moe.experts.206.w1", "model.layers.27.block_sparse_moe.experts.207.w1", "model.layers.27.block_sparse_moe.experts.208.w1", "model.layers.27.block_sparse_moe.experts.209.w1", "model.layers.27.block_sparse_moe.experts.210.w1", "model.layers.27.block_sparse_moe.experts.211.w1", "model.layers.27.block_sparse_moe.experts.212.w1", "model.layers.27.block_sparse_moe.experts.213.w1", "model.layers.27.block_sparse_moe.experts.214.w1", "model.layers.27.block_sparse_moe.experts.215.w1", "model.layers.27.block_sparse_moe.experts.216.w1", "model.layers.27.block_sparse_moe.experts.217.w1", "model.layers.27.block_sparse_moe.experts.218.w1", "model.layers.27.block_sparse_moe.experts.219.w1", "model.layers.27.block_sparse_moe.experts.220.w1", "model.layers.27.block_sparse_moe.experts.221.w1", "model.layers.27.block_sparse_moe.experts.222.w1", "model.layers.27.block_sparse_moe.experts.223.w1", "model.layers.27.block_sparse_moe.experts.224.w1", "model.layers.27.block_sparse_moe.experts.225.w1", "model.layers.27.block_sparse_moe.experts.226.w1", "model.layers.27.block_sparse_moe.experts.227.w1", "model.layers.27.block_sparse_moe.experts.228.w1", "model.layers.27.block_sparse_moe.experts.229.w1", "model.layers.27.block_sparse_moe.experts.230.w1", "model.layers.27.block_sparse_moe.experts.231.w1", "model.layers.27.block_sparse_moe.experts.232.w1", "model.layers.27.block_sparse_moe.experts.233.w1", "model.layers.27.block_sparse_moe.experts.234.w1", "model.layers.27.block_sparse_moe.experts.235.w1", "model.layers.27.block_sparse_moe.experts.236.w1", "model.layers.27.block_sparse_moe.experts.237.w1", "model.layers.27.block_sparse_moe.experts.238.w1", "model.layers.27.block_sparse_moe.experts.239.w1", "model.layers.27.block_sparse_moe.experts.240.w1", "model.layers.27.block_sparse_moe.experts.241.w1", "model.layers.27.block_sparse_moe.experts.242.w1", "model.layers.27.block_sparse_moe.experts.243.w1", "model.layers.27.block_sparse_moe.experts.244.w1", "model.layers.27.block_sparse_moe.experts.245.w1", "model.layers.27.block_sparse_moe.experts.246.w1", "model.layers.27.block_sparse_moe.experts.247.w1", "model.layers.27.block_sparse_moe.experts.248.w1", "model.layers.27.block_sparse_moe.experts.249.w1", "model.layers.27.block_sparse_moe.experts.250.w1", "model.layers.27.block_sparse_moe.experts.251.w1", "model.layers.27.block_sparse_moe.experts.252.w1", "model.layers.27.block_sparse_moe.experts.253.w1", "model.layers.27.block_sparse_moe.experts.254.w1", "model.layers.27.block_sparse_moe.experts.255.w1", "model.layers.27.block_sparse_moe.experts.0.w3", "model.layers.27.block_sparse_moe.experts.1.w3", "model.layers.27.block_sparse_moe.experts.2.w3", "model.layers.27.block_sparse_moe.experts.3.w3", "model.layers.27.block_sparse_moe.experts.4.w3", "model.layers.27.block_sparse_moe.experts.5.w3", "model.layers.27.block_sparse_moe.experts.6.w3", "model.layers.27.block_sparse_moe.experts.7.w3", "model.layers.27.block_sparse_moe.experts.8.w3", "model.layers.27.block_sparse_moe.experts.9.w3", "model.layers.27.block_sparse_moe.experts.10.w3", "model.layers.27.block_sparse_moe.experts.11.w3", "model.layers.27.block_sparse_moe.experts.12.w3", "model.layers.27.block_sparse_moe.experts.13.w3", "model.layers.27.block_sparse_moe.experts.14.w3", "model.layers.27.block_sparse_moe.experts.15.w3", "model.layers.27.block_sparse_moe.experts.16.w3", "model.layers.27.block_sparse_moe.experts.17.w3", "model.layers.27.block_sparse_moe.experts.18.w3", "model.layers.27.block_sparse_moe.experts.19.w3", "model.layers.27.block_sparse_moe.experts.20.w3", "model.layers.27.block_sparse_moe.experts.21.w3", "model.layers.27.block_sparse_moe.experts.22.w3", "model.layers.27.block_sparse_moe.experts.23.w3", "model.layers.27.block_sparse_moe.experts.24.w3", "model.layers.27.block_sparse_moe.experts.25.w3", "model.layers.27.block_sparse_moe.experts.26.w3", "model.layers.27.block_sparse_moe.experts.27.w3", "model.layers.27.block_sparse_moe.experts.28.w3", "model.layers.27.block_sparse_moe.experts.29.w3", "model.layers.27.block_sparse_moe.experts.30.w3", "model.layers.27.block_sparse_moe.experts.31.w3", "model.layers.27.block_sparse_moe.experts.32.w3", "model.layers.27.block_sparse_moe.experts.33.w3", "model.layers.27.block_sparse_moe.experts.34.w3", "model.layers.27.block_sparse_moe.experts.35.w3", "model.layers.27.block_sparse_moe.experts.36.w3", "model.layers.27.block_sparse_moe.experts.37.w3", "model.layers.27.block_sparse_moe.experts.38.w3", "model.layers.27.block_sparse_moe.experts.39.w3", "model.layers.27.block_sparse_moe.experts.40.w3", "model.layers.27.block_sparse_moe.experts.41.w3", "model.layers.27.block_sparse_moe.experts.42.w3", "model.layers.27.block_sparse_moe.experts.43.w3", "model.layers.27.block_sparse_moe.experts.44.w3", "model.layers.27.block_sparse_moe.experts.45.w3", "model.layers.27.block_sparse_moe.experts.46.w3", "model.layers.27.block_sparse_moe.experts.47.w3", "model.layers.27.block_sparse_moe.experts.48.w3", "model.layers.27.block_sparse_moe.experts.49.w3", "model.layers.27.block_sparse_moe.experts.50.w3", "model.layers.27.block_sparse_moe.experts.51.w3", "model.layers.27.block_sparse_moe.experts.52.w3", "model.layers.27.block_sparse_moe.experts.53.w3", "model.layers.27.block_sparse_moe.experts.54.w3", "model.layers.27.block_sparse_moe.experts.55.w3", "model.layers.27.block_sparse_moe.experts.56.w3", "model.layers.27.block_sparse_moe.experts.57.w3", "model.layers.27.block_sparse_moe.experts.58.w3", "model.layers.27.block_sparse_moe.experts.59.w3", "model.layers.27.block_sparse_moe.experts.60.w3", "model.layers.27.block_sparse_moe.experts.61.w3", "model.layers.27.block_sparse_moe.experts.62.w3", "model.layers.27.block_sparse_moe.experts.63.w3", "model.layers.27.block_sparse_moe.experts.64.w3", "model.layers.27.block_sparse_moe.experts.65.w3", "model.layers.27.block_sparse_moe.experts.66.w3", "model.layers.27.block_sparse_moe.experts.67.w3", "model.layers.27.block_sparse_moe.experts.68.w3", "model.layers.27.block_sparse_moe.experts.69.w3", "model.layers.27.block_sparse_moe.experts.70.w3", "model.layers.27.block_sparse_moe.experts.71.w3", "model.layers.27.block_sparse_moe.experts.72.w3", "model.layers.27.block_sparse_moe.experts.73.w3", "model.layers.27.block_sparse_moe.experts.74.w3", "model.layers.27.block_sparse_moe.experts.75.w3", "model.layers.27.block_sparse_moe.experts.76.w3", "model.layers.27.block_sparse_moe.experts.77.w3", "model.layers.27.block_sparse_moe.experts.78.w3", "model.layers.27.block_sparse_moe.experts.79.w3", "model.layers.27.block_sparse_moe.experts.80.w3", "model.layers.27.block_sparse_moe.experts.81.w3", "model.layers.27.block_sparse_moe.experts.82.w3", "model.layers.27.block_sparse_moe.experts.83.w3", "model.layers.27.block_sparse_moe.experts.84.w3", "model.layers.27.block_sparse_moe.experts.85.w3", "model.layers.27.block_sparse_moe.experts.86.w3", "model.layers.27.block_sparse_moe.experts.87.w3", "model.layers.27.block_sparse_moe.experts.88.w3", "model.layers.27.block_sparse_moe.experts.89.w3", "model.layers.27.block_sparse_moe.experts.90.w3", "model.layers.27.block_sparse_moe.experts.91.w3", "model.layers.27.block_sparse_moe.experts.92.w3", "model.layers.27.block_sparse_moe.experts.93.w3", "model.layers.27.block_sparse_moe.experts.94.w3", "model.layers.27.block_sparse_moe.experts.95.w3", "model.layers.27.block_sparse_moe.experts.96.w3", "model.layers.27.block_sparse_moe.experts.97.w3", "model.layers.27.block_sparse_moe.experts.98.w3", "model.layers.27.block_sparse_moe.experts.99.w3", "model.layers.27.block_sparse_moe.experts.100.w3", "model.layers.27.block_sparse_moe.experts.101.w3", "model.layers.27.block_sparse_moe.experts.102.w3", "model.layers.27.block_sparse_moe.experts.103.w3", "model.layers.27.block_sparse_moe.experts.104.w3", "model.layers.27.block_sparse_moe.experts.105.w3", "model.layers.27.block_sparse_moe.experts.106.w3", "model.layers.27.block_sparse_moe.experts.107.w3", "model.layers.27.block_sparse_moe.experts.108.w3", "model.layers.27.block_sparse_moe.experts.109.w3", "model.layers.27.block_sparse_moe.experts.110.w3", "model.layers.27.block_sparse_moe.experts.111.w3", "model.layers.27.block_sparse_moe.experts.112.w3", "model.layers.27.block_sparse_moe.experts.113.w3", "model.layers.27.block_sparse_moe.experts.114.w3", "model.layers.27.block_sparse_moe.experts.115.w3", "model.layers.27.block_sparse_moe.experts.116.w3", "model.layers.27.block_sparse_moe.experts.117.w3", "model.layers.27.block_sparse_moe.experts.118.w3", "model.layers.27.block_sparse_moe.experts.119.w3", "model.layers.27.block_sparse_moe.experts.120.w3", "model.layers.27.block_sparse_moe.experts.121.w3", "model.layers.27.block_sparse_moe.experts.122.w3", "model.layers.27.block_sparse_moe.experts.123.w3", "model.layers.27.block_sparse_moe.experts.124.w3", "model.layers.27.block_sparse_moe.experts.125.w3", "model.layers.27.block_sparse_moe.experts.126.w3", "model.layers.27.block_sparse_moe.experts.127.w3", "model.layers.27.block_sparse_moe.experts.128.w3", "model.layers.27.block_sparse_moe.experts.129.w3", "model.layers.27.block_sparse_moe.experts.130.w3", "model.layers.27.block_sparse_moe.experts.131.w3", "model.layers.27.block_sparse_moe.experts.132.w3", "model.layers.27.block_sparse_moe.experts.133.w3", "model.layers.27.block_sparse_moe.experts.134.w3", "model.layers.27.block_sparse_moe.experts.135.w3", "model.layers.27.block_sparse_moe.experts.136.w3", "model.layers.27.block_sparse_moe.experts.137.w3", "model.layers.27.block_sparse_moe.experts.138.w3", "model.layers.27.block_sparse_moe.experts.139.w3", "model.layers.27.block_sparse_moe.experts.140.w3", "model.layers.27.block_sparse_moe.experts.141.w3", "model.layers.27.block_sparse_moe.experts.142.w3", "model.layers.27.block_sparse_moe.experts.143.w3", "model.layers.27.block_sparse_moe.experts.144.w3", "model.layers.27.block_sparse_moe.experts.145.w3", "model.layers.27.block_sparse_moe.experts.146.w3", "model.layers.27.block_sparse_moe.experts.147.w3", "model.layers.27.block_sparse_moe.experts.148.w3", "model.layers.27.block_sparse_moe.experts.149.w3", "model.layers.27.block_sparse_moe.experts.150.w3", "model.layers.27.block_sparse_moe.experts.151.w3", "model.layers.27.block_sparse_moe.experts.152.w3", "model.layers.27.block_sparse_moe.experts.153.w3", "model.layers.27.block_sparse_moe.experts.154.w3", "model.layers.27.block_sparse_moe.experts.155.w3", "model.layers.27.block_sparse_moe.experts.156.w3", "model.layers.27.block_sparse_moe.experts.157.w3", "model.layers.27.block_sparse_moe.experts.158.w3", "model.layers.27.block_sparse_moe.experts.159.w3", "model.layers.27.block_sparse_moe.experts.160.w3", "model.layers.27.block_sparse_moe.experts.161.w3", "model.layers.27.block_sparse_moe.experts.162.w3", "model.layers.27.block_sparse_moe.experts.163.w3", "model.layers.27.block_sparse_moe.experts.164.w3", "model.layers.27.block_sparse_moe.experts.165.w3", "model.layers.27.block_sparse_moe.experts.166.w3", "model.layers.27.block_sparse_moe.experts.167.w3", "model.layers.27.block_sparse_moe.experts.168.w3", "model.layers.27.block_sparse_moe.experts.169.w3", "model.layers.27.block_sparse_moe.experts.170.w3", "model.layers.27.block_sparse_moe.experts.171.w3", "model.layers.27.block_sparse_moe.experts.172.w3", "model.layers.27.block_sparse_moe.experts.173.w3", "model.layers.27.block_sparse_moe.experts.174.w3", "model.layers.27.block_sparse_moe.experts.175.w3", "model.layers.27.block_sparse_moe.experts.176.w3", "model.layers.27.block_sparse_moe.experts.177.w3", "model.layers.27.block_sparse_moe.experts.178.w3", "model.layers.27.block_sparse_moe.experts.179.w3", "model.layers.27.block_sparse_moe.experts.180.w3", "model.layers.27.block_sparse_moe.experts.181.w3", "model.layers.27.block_sparse_moe.experts.182.w3", "model.layers.27.block_sparse_moe.experts.183.w3", "model.layers.27.block_sparse_moe.experts.184.w3", "model.layers.27.block_sparse_moe.experts.185.w3", "model.layers.27.block_sparse_moe.experts.186.w3", "model.layers.27.block_sparse_moe.experts.187.w3", "model.layers.27.block_sparse_moe.experts.188.w3", "model.layers.27.block_sparse_moe.experts.189.w3", "model.layers.27.block_sparse_moe.experts.190.w3", "model.layers.27.block_sparse_moe.experts.191.w3", "model.layers.27.block_sparse_moe.experts.192.w3", "model.layers.27.block_sparse_moe.experts.193.w3", "model.layers.27.block_sparse_moe.experts.194.w3", "model.layers.27.block_sparse_moe.experts.195.w3", "model.layers.27.block_sparse_moe.experts.196.w3", "model.layers.27.block_sparse_moe.experts.197.w3", "model.layers.27.block_sparse_moe.experts.198.w3", "model.layers.27.block_sparse_moe.experts.199.w3", "model.layers.27.block_sparse_moe.experts.200.w3", "model.layers.27.block_sparse_moe.experts.201.w3", "model.layers.27.block_sparse_moe.experts.202.w3", "model.layers.27.block_sparse_moe.experts.203.w3", "model.layers.27.block_sparse_moe.experts.204.w3", "model.layers.27.block_sparse_moe.experts.205.w3", "model.layers.27.block_sparse_moe.experts.206.w3", "model.layers.27.block_sparse_moe.experts.207.w3", "model.layers.27.block_sparse_moe.experts.208.w3", "model.layers.27.block_sparse_moe.experts.209.w3", "model.layers.27.block_sparse_moe.experts.210.w3", "model.layers.27.block_sparse_moe.experts.211.w3", "model.layers.27.block_sparse_moe.experts.212.w3", "model.layers.27.block_sparse_moe.experts.213.w3", "model.layers.27.block_sparse_moe.experts.214.w3", "model.layers.27.block_sparse_moe.experts.215.w3", "model.layers.27.block_sparse_moe.experts.216.w3", "model.layers.27.block_sparse_moe.experts.217.w3", "model.layers.27.block_sparse_moe.experts.218.w3", "model.layers.27.block_sparse_moe.experts.219.w3", "model.layers.27.block_sparse_moe.experts.220.w3", "model.layers.27.block_sparse_moe.experts.221.w3", "model.layers.27.block_sparse_moe.experts.222.w3", "model.layers.27.block_sparse_moe.experts.223.w3", "model.layers.27.block_sparse_moe.experts.224.w3", "model.layers.27.block_sparse_moe.experts.225.w3", "model.layers.27.block_sparse_moe.experts.226.w3", "model.layers.27.block_sparse_moe.experts.227.w3", "model.layers.27.block_sparse_moe.experts.228.w3", "model.layers.27.block_sparse_moe.experts.229.w3", "model.layers.27.block_sparse_moe.experts.230.w3", "model.layers.27.block_sparse_moe.experts.231.w3", "model.layers.27.block_sparse_moe.experts.232.w3", "model.layers.27.block_sparse_moe.experts.233.w3", "model.layers.27.block_sparse_moe.experts.234.w3", "model.layers.27.block_sparse_moe.experts.235.w3", "model.layers.27.block_sparse_moe.experts.236.w3", "model.layers.27.block_sparse_moe.experts.237.w3", "model.layers.27.block_sparse_moe.experts.238.w3", "model.layers.27.block_sparse_moe.experts.239.w3", "model.layers.27.block_sparse_moe.experts.240.w3", "model.layers.27.block_sparse_moe.experts.241.w3", "model.layers.27.block_sparse_moe.experts.242.w3", "model.layers.27.block_sparse_moe.experts.243.w3", "model.layers.27.block_sparse_moe.experts.244.w3", "model.layers.27.block_sparse_moe.experts.245.w3", "model.layers.27.block_sparse_moe.experts.246.w3", "model.layers.27.block_sparse_moe.experts.247.w3", "model.layers.27.block_sparse_moe.experts.248.w3", "model.layers.27.block_sparse_moe.experts.249.w3", "model.layers.27.block_sparse_moe.experts.250.w3", "model.layers.27.block_sparse_moe.experts.251.w3", "model.layers.27.block_sparse_moe.experts.252.w3", "model.layers.27.block_sparse_moe.experts.253.w3", "model.layers.27.block_sparse_moe.experts.254.w3", "model.layers.27.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0004864547401666752, "dbits": 2415919104 } ] }, { "idx": 139, "layers": [ "model.layers.27.block_sparse_moe.experts.0.w2", "model.layers.27.block_sparse_moe.experts.1.w2", "model.layers.27.block_sparse_moe.experts.2.w2", "model.layers.27.block_sparse_moe.experts.3.w2", "model.layers.27.block_sparse_moe.experts.4.w2", "model.layers.27.block_sparse_moe.experts.5.w2", "model.layers.27.block_sparse_moe.experts.6.w2", "model.layers.27.block_sparse_moe.experts.7.w2", "model.layers.27.block_sparse_moe.experts.8.w2", "model.layers.27.block_sparse_moe.experts.9.w2", "model.layers.27.block_sparse_moe.experts.10.w2", "model.layers.27.block_sparse_moe.experts.11.w2", "model.layers.27.block_sparse_moe.experts.12.w2", "model.layers.27.block_sparse_moe.experts.13.w2", "model.layers.27.block_sparse_moe.experts.14.w2", "model.layers.27.block_sparse_moe.experts.15.w2", "model.layers.27.block_sparse_moe.experts.16.w2", "model.layers.27.block_sparse_moe.experts.17.w2", "model.layers.27.block_sparse_moe.experts.18.w2", "model.layers.27.block_sparse_moe.experts.19.w2", "model.layers.27.block_sparse_moe.experts.20.w2", "model.layers.27.block_sparse_moe.experts.21.w2", "model.layers.27.block_sparse_moe.experts.22.w2", "model.layers.27.block_sparse_moe.experts.23.w2", "model.layers.27.block_sparse_moe.experts.24.w2", "model.layers.27.block_sparse_moe.experts.25.w2", "model.layers.27.block_sparse_moe.experts.26.w2", "model.layers.27.block_sparse_moe.experts.27.w2", "model.layers.27.block_sparse_moe.experts.28.w2", "model.layers.27.block_sparse_moe.experts.29.w2", "model.layers.27.block_sparse_moe.experts.30.w2", "model.layers.27.block_sparse_moe.experts.31.w2", "model.layers.27.block_sparse_moe.experts.32.w2", "model.layers.27.block_sparse_moe.experts.33.w2", "model.layers.27.block_sparse_moe.experts.34.w2", "model.layers.27.block_sparse_moe.experts.35.w2", "model.layers.27.block_sparse_moe.experts.36.w2", "model.layers.27.block_sparse_moe.experts.37.w2", "model.layers.27.block_sparse_moe.experts.38.w2", "model.layers.27.block_sparse_moe.experts.39.w2", "model.layers.27.block_sparse_moe.experts.40.w2", "model.layers.27.block_sparse_moe.experts.41.w2", "model.layers.27.block_sparse_moe.experts.42.w2", "model.layers.27.block_sparse_moe.experts.43.w2", "model.layers.27.block_sparse_moe.experts.44.w2", "model.layers.27.block_sparse_moe.experts.45.w2", "model.layers.27.block_sparse_moe.experts.46.w2", "model.layers.27.block_sparse_moe.experts.47.w2", "model.layers.27.block_sparse_moe.experts.48.w2", "model.layers.27.block_sparse_moe.experts.49.w2", "model.layers.27.block_sparse_moe.experts.50.w2", "model.layers.27.block_sparse_moe.experts.51.w2", "model.layers.27.block_sparse_moe.experts.52.w2", "model.layers.27.block_sparse_moe.experts.53.w2", "model.layers.27.block_sparse_moe.experts.54.w2", "model.layers.27.block_sparse_moe.experts.55.w2", "model.layers.27.block_sparse_moe.experts.56.w2", "model.layers.27.block_sparse_moe.experts.57.w2", "model.layers.27.block_sparse_moe.experts.58.w2", "model.layers.27.block_sparse_moe.experts.59.w2", "model.layers.27.block_sparse_moe.experts.60.w2", "model.layers.27.block_sparse_moe.experts.61.w2", "model.layers.27.block_sparse_moe.experts.62.w2", "model.layers.27.block_sparse_moe.experts.63.w2", "model.layers.27.block_sparse_moe.experts.64.w2", "model.layers.27.block_sparse_moe.experts.65.w2", "model.layers.27.block_sparse_moe.experts.66.w2", "model.layers.27.block_sparse_moe.experts.67.w2", "model.layers.27.block_sparse_moe.experts.68.w2", "model.layers.27.block_sparse_moe.experts.69.w2", "model.layers.27.block_sparse_moe.experts.70.w2", "model.layers.27.block_sparse_moe.experts.71.w2", "model.layers.27.block_sparse_moe.experts.72.w2", "model.layers.27.block_sparse_moe.experts.73.w2", "model.layers.27.block_sparse_moe.experts.74.w2", "model.layers.27.block_sparse_moe.experts.75.w2", "model.layers.27.block_sparse_moe.experts.76.w2", "model.layers.27.block_sparse_moe.experts.77.w2", "model.layers.27.block_sparse_moe.experts.78.w2", "model.layers.27.block_sparse_moe.experts.79.w2", "model.layers.27.block_sparse_moe.experts.80.w2", "model.layers.27.block_sparse_moe.experts.81.w2", "model.layers.27.block_sparse_moe.experts.82.w2", "model.layers.27.block_sparse_moe.experts.83.w2", "model.layers.27.block_sparse_moe.experts.84.w2", "model.layers.27.block_sparse_moe.experts.85.w2", "model.layers.27.block_sparse_moe.experts.86.w2", "model.layers.27.block_sparse_moe.experts.87.w2", "model.layers.27.block_sparse_moe.experts.88.w2", "model.layers.27.block_sparse_moe.experts.89.w2", "model.layers.27.block_sparse_moe.experts.90.w2", "model.layers.27.block_sparse_moe.experts.91.w2", "model.layers.27.block_sparse_moe.experts.92.w2", "model.layers.27.block_sparse_moe.experts.93.w2", "model.layers.27.block_sparse_moe.experts.94.w2", "model.layers.27.block_sparse_moe.experts.95.w2", "model.layers.27.block_sparse_moe.experts.96.w2", "model.layers.27.block_sparse_moe.experts.97.w2", "model.layers.27.block_sparse_moe.experts.98.w2", "model.layers.27.block_sparse_moe.experts.99.w2", "model.layers.27.block_sparse_moe.experts.100.w2", "model.layers.27.block_sparse_moe.experts.101.w2", "model.layers.27.block_sparse_moe.experts.102.w2", "model.layers.27.block_sparse_moe.experts.103.w2", "model.layers.27.block_sparse_moe.experts.104.w2", "model.layers.27.block_sparse_moe.experts.105.w2", "model.layers.27.block_sparse_moe.experts.106.w2", "model.layers.27.block_sparse_moe.experts.107.w2", "model.layers.27.block_sparse_moe.experts.108.w2", "model.layers.27.block_sparse_moe.experts.109.w2", "model.layers.27.block_sparse_moe.experts.110.w2", "model.layers.27.block_sparse_moe.experts.111.w2", "model.layers.27.block_sparse_moe.experts.112.w2", "model.layers.27.block_sparse_moe.experts.113.w2", "model.layers.27.block_sparse_moe.experts.114.w2", "model.layers.27.block_sparse_moe.experts.115.w2", "model.layers.27.block_sparse_moe.experts.116.w2", "model.layers.27.block_sparse_moe.experts.117.w2", "model.layers.27.block_sparse_moe.experts.118.w2", "model.layers.27.block_sparse_moe.experts.119.w2", "model.layers.27.block_sparse_moe.experts.120.w2", "model.layers.27.block_sparse_moe.experts.121.w2", "model.layers.27.block_sparse_moe.experts.122.w2", "model.layers.27.block_sparse_moe.experts.123.w2", "model.layers.27.block_sparse_moe.experts.124.w2", "model.layers.27.block_sparse_moe.experts.125.w2", "model.layers.27.block_sparse_moe.experts.126.w2", "model.layers.27.block_sparse_moe.experts.127.w2", "model.layers.27.block_sparse_moe.experts.128.w2", "model.layers.27.block_sparse_moe.experts.129.w2", "model.layers.27.block_sparse_moe.experts.130.w2", "model.layers.27.block_sparse_moe.experts.131.w2", "model.layers.27.block_sparse_moe.experts.132.w2", "model.layers.27.block_sparse_moe.experts.133.w2", "model.layers.27.block_sparse_moe.experts.134.w2", "model.layers.27.block_sparse_moe.experts.135.w2", "model.layers.27.block_sparse_moe.experts.136.w2", "model.layers.27.block_sparse_moe.experts.137.w2", "model.layers.27.block_sparse_moe.experts.138.w2", "model.layers.27.block_sparse_moe.experts.139.w2", "model.layers.27.block_sparse_moe.experts.140.w2", "model.layers.27.block_sparse_moe.experts.141.w2", "model.layers.27.block_sparse_moe.experts.142.w2", "model.layers.27.block_sparse_moe.experts.143.w2", "model.layers.27.block_sparse_moe.experts.144.w2", "model.layers.27.block_sparse_moe.experts.145.w2", "model.layers.27.block_sparse_moe.experts.146.w2", "model.layers.27.block_sparse_moe.experts.147.w2", "model.layers.27.block_sparse_moe.experts.148.w2", "model.layers.27.block_sparse_moe.experts.149.w2", "model.layers.27.block_sparse_moe.experts.150.w2", "model.layers.27.block_sparse_moe.experts.151.w2", "model.layers.27.block_sparse_moe.experts.152.w2", "model.layers.27.block_sparse_moe.experts.153.w2", "model.layers.27.block_sparse_moe.experts.154.w2", "model.layers.27.block_sparse_moe.experts.155.w2", "model.layers.27.block_sparse_moe.experts.156.w2", "model.layers.27.block_sparse_moe.experts.157.w2", "model.layers.27.block_sparse_moe.experts.158.w2", "model.layers.27.block_sparse_moe.experts.159.w2", "model.layers.27.block_sparse_moe.experts.160.w2", "model.layers.27.block_sparse_moe.experts.161.w2", "model.layers.27.block_sparse_moe.experts.162.w2", "model.layers.27.block_sparse_moe.experts.163.w2", "model.layers.27.block_sparse_moe.experts.164.w2", "model.layers.27.block_sparse_moe.experts.165.w2", "model.layers.27.block_sparse_moe.experts.166.w2", "model.layers.27.block_sparse_moe.experts.167.w2", "model.layers.27.block_sparse_moe.experts.168.w2", "model.layers.27.block_sparse_moe.experts.169.w2", "model.layers.27.block_sparse_moe.experts.170.w2", "model.layers.27.block_sparse_moe.experts.171.w2", "model.layers.27.block_sparse_moe.experts.172.w2", "model.layers.27.block_sparse_moe.experts.173.w2", "model.layers.27.block_sparse_moe.experts.174.w2", "model.layers.27.block_sparse_moe.experts.175.w2", "model.layers.27.block_sparse_moe.experts.176.w2", "model.layers.27.block_sparse_moe.experts.177.w2", "model.layers.27.block_sparse_moe.experts.178.w2", "model.layers.27.block_sparse_moe.experts.179.w2", "model.layers.27.block_sparse_moe.experts.180.w2", "model.layers.27.block_sparse_moe.experts.181.w2", "model.layers.27.block_sparse_moe.experts.182.w2", "model.layers.27.block_sparse_moe.experts.183.w2", "model.layers.27.block_sparse_moe.experts.184.w2", "model.layers.27.block_sparse_moe.experts.185.w2", "model.layers.27.block_sparse_moe.experts.186.w2", "model.layers.27.block_sparse_moe.experts.187.w2", "model.layers.27.block_sparse_moe.experts.188.w2", "model.layers.27.block_sparse_moe.experts.189.w2", "model.layers.27.block_sparse_moe.experts.190.w2", "model.layers.27.block_sparse_moe.experts.191.w2", "model.layers.27.block_sparse_moe.experts.192.w2", "model.layers.27.block_sparse_moe.experts.193.w2", "model.layers.27.block_sparse_moe.experts.194.w2", "model.layers.27.block_sparse_moe.experts.195.w2", "model.layers.27.block_sparse_moe.experts.196.w2", "model.layers.27.block_sparse_moe.experts.197.w2", "model.layers.27.block_sparse_moe.experts.198.w2", "model.layers.27.block_sparse_moe.experts.199.w2", "model.layers.27.block_sparse_moe.experts.200.w2", "model.layers.27.block_sparse_moe.experts.201.w2", "model.layers.27.block_sparse_moe.experts.202.w2", "model.layers.27.block_sparse_moe.experts.203.w2", "model.layers.27.block_sparse_moe.experts.204.w2", "model.layers.27.block_sparse_moe.experts.205.w2", "model.layers.27.block_sparse_moe.experts.206.w2", "model.layers.27.block_sparse_moe.experts.207.w2", "model.layers.27.block_sparse_moe.experts.208.w2", "model.layers.27.block_sparse_moe.experts.209.w2", "model.layers.27.block_sparse_moe.experts.210.w2", "model.layers.27.block_sparse_moe.experts.211.w2", "model.layers.27.block_sparse_moe.experts.212.w2", "model.layers.27.block_sparse_moe.experts.213.w2", "model.layers.27.block_sparse_moe.experts.214.w2", "model.layers.27.block_sparse_moe.experts.215.w2", "model.layers.27.block_sparse_moe.experts.216.w2", "model.layers.27.block_sparse_moe.experts.217.w2", "model.layers.27.block_sparse_moe.experts.218.w2", "model.layers.27.block_sparse_moe.experts.219.w2", "model.layers.27.block_sparse_moe.experts.220.w2", "model.layers.27.block_sparse_moe.experts.221.w2", "model.layers.27.block_sparse_moe.experts.222.w2", "model.layers.27.block_sparse_moe.experts.223.w2", "model.layers.27.block_sparse_moe.experts.224.w2", "model.layers.27.block_sparse_moe.experts.225.w2", "model.layers.27.block_sparse_moe.experts.226.w2", "model.layers.27.block_sparse_moe.experts.227.w2", "model.layers.27.block_sparse_moe.experts.228.w2", "model.layers.27.block_sparse_moe.experts.229.w2", "model.layers.27.block_sparse_moe.experts.230.w2", "model.layers.27.block_sparse_moe.experts.231.w2", "model.layers.27.block_sparse_moe.experts.232.w2", "model.layers.27.block_sparse_moe.experts.233.w2", "model.layers.27.block_sparse_moe.experts.234.w2", "model.layers.27.block_sparse_moe.experts.235.w2", "model.layers.27.block_sparse_moe.experts.236.w2", "model.layers.27.block_sparse_moe.experts.237.w2", "model.layers.27.block_sparse_moe.experts.238.w2", "model.layers.27.block_sparse_moe.experts.239.w2", "model.layers.27.block_sparse_moe.experts.240.w2", "model.layers.27.block_sparse_moe.experts.241.w2", "model.layers.27.block_sparse_moe.experts.242.w2", "model.layers.27.block_sparse_moe.experts.243.w2", "model.layers.27.block_sparse_moe.experts.244.w2", "model.layers.27.block_sparse_moe.experts.245.w2", "model.layers.27.block_sparse_moe.experts.246.w2", "model.layers.27.block_sparse_moe.experts.247.w2", "model.layers.27.block_sparse_moe.experts.248.w2", "model.layers.27.block_sparse_moe.experts.249.w2", "model.layers.27.block_sparse_moe.experts.250.w2", "model.layers.27.block_sparse_moe.experts.251.w2", "model.layers.27.block_sparse_moe.experts.252.w2", "model.layers.27.block_sparse_moe.experts.253.w2", "model.layers.27.block_sparse_moe.experts.254.w2", "model.layers.27.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0003693714737891818, "dbits": 1207959552 } ] }, { "idx": 140, "layers": [ "model.layers.28.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0019343633204698674, "dbits": 18874368 } ] }, { "idx": 141, "layers": [ "model.layers.28.self_attn.k_proj", "model.layers.28.self_attn.v_proj" ], "candidates": [ { "dkld": 0.00171702541410923, "dbits": 6291456 } ] }, { "idx": 142, "layers": [ "model.layers.28.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0006761509925127251, "dbits": 18874368 } ] }, { "idx": 143, "layers": [ "model.layers.28.block_sparse_moe.experts.0.w1", "model.layers.28.block_sparse_moe.experts.1.w1", "model.layers.28.block_sparse_moe.experts.2.w1", "model.layers.28.block_sparse_moe.experts.3.w1", "model.layers.28.block_sparse_moe.experts.4.w1", "model.layers.28.block_sparse_moe.experts.5.w1", "model.layers.28.block_sparse_moe.experts.6.w1", "model.layers.28.block_sparse_moe.experts.7.w1", "model.layers.28.block_sparse_moe.experts.8.w1", "model.layers.28.block_sparse_moe.experts.9.w1", "model.layers.28.block_sparse_moe.experts.10.w1", "model.layers.28.block_sparse_moe.experts.11.w1", "model.layers.28.block_sparse_moe.experts.12.w1", "model.layers.28.block_sparse_moe.experts.13.w1", "model.layers.28.block_sparse_moe.experts.14.w1", "model.layers.28.block_sparse_moe.experts.15.w1", "model.layers.28.block_sparse_moe.experts.16.w1", "model.layers.28.block_sparse_moe.experts.17.w1", "model.layers.28.block_sparse_moe.experts.18.w1", "model.layers.28.block_sparse_moe.experts.19.w1", "model.layers.28.block_sparse_moe.experts.20.w1", "model.layers.28.block_sparse_moe.experts.21.w1", "model.layers.28.block_sparse_moe.experts.22.w1", "model.layers.28.block_sparse_moe.experts.23.w1", "model.layers.28.block_sparse_moe.experts.24.w1", "model.layers.28.block_sparse_moe.experts.25.w1", "model.layers.28.block_sparse_moe.experts.26.w1", "model.layers.28.block_sparse_moe.experts.27.w1", "model.layers.28.block_sparse_moe.experts.28.w1", "model.layers.28.block_sparse_moe.experts.29.w1", "model.layers.28.block_sparse_moe.experts.30.w1", "model.layers.28.block_sparse_moe.experts.31.w1", "model.layers.28.block_sparse_moe.experts.32.w1", "model.layers.28.block_sparse_moe.experts.33.w1", "model.layers.28.block_sparse_moe.experts.34.w1", "model.layers.28.block_sparse_moe.experts.35.w1", "model.layers.28.block_sparse_moe.experts.36.w1", "model.layers.28.block_sparse_moe.experts.37.w1", "model.layers.28.block_sparse_moe.experts.38.w1", "model.layers.28.block_sparse_moe.experts.39.w1", "model.layers.28.block_sparse_moe.experts.40.w1", "model.layers.28.block_sparse_moe.experts.41.w1", "model.layers.28.block_sparse_moe.experts.42.w1", "model.layers.28.block_sparse_moe.experts.43.w1", "model.layers.28.block_sparse_moe.experts.44.w1", "model.layers.28.block_sparse_moe.experts.45.w1", "model.layers.28.block_sparse_moe.experts.46.w1", "model.layers.28.block_sparse_moe.experts.47.w1", "model.layers.28.block_sparse_moe.experts.48.w1", "model.layers.28.block_sparse_moe.experts.49.w1", "model.layers.28.block_sparse_moe.experts.50.w1", "model.layers.28.block_sparse_moe.experts.51.w1", "model.layers.28.block_sparse_moe.experts.52.w1", "model.layers.28.block_sparse_moe.experts.53.w1", "model.layers.28.block_sparse_moe.experts.54.w1", "model.layers.28.block_sparse_moe.experts.55.w1", "model.layers.28.block_sparse_moe.experts.56.w1", "model.layers.28.block_sparse_moe.experts.57.w1", "model.layers.28.block_sparse_moe.experts.58.w1", "model.layers.28.block_sparse_moe.experts.59.w1", "model.layers.28.block_sparse_moe.experts.60.w1", "model.layers.28.block_sparse_moe.experts.61.w1", "model.layers.28.block_sparse_moe.experts.62.w1", "model.layers.28.block_sparse_moe.experts.63.w1", "model.layers.28.block_sparse_moe.experts.64.w1", "model.layers.28.block_sparse_moe.experts.65.w1", "model.layers.28.block_sparse_moe.experts.66.w1", "model.layers.28.block_sparse_moe.experts.67.w1", "model.layers.28.block_sparse_moe.experts.68.w1", "model.layers.28.block_sparse_moe.experts.69.w1", "model.layers.28.block_sparse_moe.experts.70.w1", "model.layers.28.block_sparse_moe.experts.71.w1", "model.layers.28.block_sparse_moe.experts.72.w1", "model.layers.28.block_sparse_moe.experts.73.w1", "model.layers.28.block_sparse_moe.experts.74.w1", "model.layers.28.block_sparse_moe.experts.75.w1", "model.layers.28.block_sparse_moe.experts.76.w1", "model.layers.28.block_sparse_moe.experts.77.w1", "model.layers.28.block_sparse_moe.experts.78.w1", "model.layers.28.block_sparse_moe.experts.79.w1", "model.layers.28.block_sparse_moe.experts.80.w1", "model.layers.28.block_sparse_moe.experts.81.w1", "model.layers.28.block_sparse_moe.experts.82.w1", "model.layers.28.block_sparse_moe.experts.83.w1", "model.layers.28.block_sparse_moe.experts.84.w1", "model.layers.28.block_sparse_moe.experts.85.w1", "model.layers.28.block_sparse_moe.experts.86.w1", "model.layers.28.block_sparse_moe.experts.87.w1", "model.layers.28.block_sparse_moe.experts.88.w1", "model.layers.28.block_sparse_moe.experts.89.w1", "model.layers.28.block_sparse_moe.experts.90.w1", "model.layers.28.block_sparse_moe.experts.91.w1", "model.layers.28.block_sparse_moe.experts.92.w1", "model.layers.28.block_sparse_moe.experts.93.w1", "model.layers.28.block_sparse_moe.experts.94.w1", "model.layers.28.block_sparse_moe.experts.95.w1", "model.layers.28.block_sparse_moe.experts.96.w1", "model.layers.28.block_sparse_moe.experts.97.w1", "model.layers.28.block_sparse_moe.experts.98.w1", "model.layers.28.block_sparse_moe.experts.99.w1", "model.layers.28.block_sparse_moe.experts.100.w1", "model.layers.28.block_sparse_moe.experts.101.w1", "model.layers.28.block_sparse_moe.experts.102.w1", "model.layers.28.block_sparse_moe.experts.103.w1", "model.layers.28.block_sparse_moe.experts.104.w1", "model.layers.28.block_sparse_moe.experts.105.w1", "model.layers.28.block_sparse_moe.experts.106.w1", "model.layers.28.block_sparse_moe.experts.107.w1", "model.layers.28.block_sparse_moe.experts.108.w1", "model.layers.28.block_sparse_moe.experts.109.w1", "model.layers.28.block_sparse_moe.experts.110.w1", "model.layers.28.block_sparse_moe.experts.111.w1", "model.layers.28.block_sparse_moe.experts.112.w1", "model.layers.28.block_sparse_moe.experts.113.w1", "model.layers.28.block_sparse_moe.experts.114.w1", "model.layers.28.block_sparse_moe.experts.115.w1", "model.layers.28.block_sparse_moe.experts.116.w1", "model.layers.28.block_sparse_moe.experts.117.w1", "model.layers.28.block_sparse_moe.experts.118.w1", "model.layers.28.block_sparse_moe.experts.119.w1", "model.layers.28.block_sparse_moe.experts.120.w1", "model.layers.28.block_sparse_moe.experts.121.w1", "model.layers.28.block_sparse_moe.experts.122.w1", "model.layers.28.block_sparse_moe.experts.123.w1", "model.layers.28.block_sparse_moe.experts.124.w1", "model.layers.28.block_sparse_moe.experts.125.w1", "model.layers.28.block_sparse_moe.experts.126.w1", "model.layers.28.block_sparse_moe.experts.127.w1", "model.layers.28.block_sparse_moe.experts.128.w1", "model.layers.28.block_sparse_moe.experts.129.w1", "model.layers.28.block_sparse_moe.experts.130.w1", "model.layers.28.block_sparse_moe.experts.131.w1", "model.layers.28.block_sparse_moe.experts.132.w1", "model.layers.28.block_sparse_moe.experts.133.w1", "model.layers.28.block_sparse_moe.experts.134.w1", "model.layers.28.block_sparse_moe.experts.135.w1", "model.layers.28.block_sparse_moe.experts.136.w1", "model.layers.28.block_sparse_moe.experts.137.w1", "model.layers.28.block_sparse_moe.experts.138.w1", "model.layers.28.block_sparse_moe.experts.139.w1", "model.layers.28.block_sparse_moe.experts.140.w1", "model.layers.28.block_sparse_moe.experts.141.w1", "model.layers.28.block_sparse_moe.experts.142.w1", "model.layers.28.block_sparse_moe.experts.143.w1", "model.layers.28.block_sparse_moe.experts.144.w1", "model.layers.28.block_sparse_moe.experts.145.w1", "model.layers.28.block_sparse_moe.experts.146.w1", "model.layers.28.block_sparse_moe.experts.147.w1", "model.layers.28.block_sparse_moe.experts.148.w1", "model.layers.28.block_sparse_moe.experts.149.w1", "model.layers.28.block_sparse_moe.experts.150.w1", "model.layers.28.block_sparse_moe.experts.151.w1", "model.layers.28.block_sparse_moe.experts.152.w1", "model.layers.28.block_sparse_moe.experts.153.w1", "model.layers.28.block_sparse_moe.experts.154.w1", "model.layers.28.block_sparse_moe.experts.155.w1", "model.layers.28.block_sparse_moe.experts.156.w1", "model.layers.28.block_sparse_moe.experts.157.w1", "model.layers.28.block_sparse_moe.experts.158.w1", "model.layers.28.block_sparse_moe.experts.159.w1", "model.layers.28.block_sparse_moe.experts.160.w1", "model.layers.28.block_sparse_moe.experts.161.w1", "model.layers.28.block_sparse_moe.experts.162.w1", "model.layers.28.block_sparse_moe.experts.163.w1", "model.layers.28.block_sparse_moe.experts.164.w1", "model.layers.28.block_sparse_moe.experts.165.w1", "model.layers.28.block_sparse_moe.experts.166.w1", "model.layers.28.block_sparse_moe.experts.167.w1", "model.layers.28.block_sparse_moe.experts.168.w1", "model.layers.28.block_sparse_moe.experts.169.w1", "model.layers.28.block_sparse_moe.experts.170.w1", "model.layers.28.block_sparse_moe.experts.171.w1", "model.layers.28.block_sparse_moe.experts.172.w1", "model.layers.28.block_sparse_moe.experts.173.w1", "model.layers.28.block_sparse_moe.experts.174.w1", "model.layers.28.block_sparse_moe.experts.175.w1", "model.layers.28.block_sparse_moe.experts.176.w1", "model.layers.28.block_sparse_moe.experts.177.w1", "model.layers.28.block_sparse_moe.experts.178.w1", "model.layers.28.block_sparse_moe.experts.179.w1", "model.layers.28.block_sparse_moe.experts.180.w1", "model.layers.28.block_sparse_moe.experts.181.w1", "model.layers.28.block_sparse_moe.experts.182.w1", "model.layers.28.block_sparse_moe.experts.183.w1", "model.layers.28.block_sparse_moe.experts.184.w1", "model.layers.28.block_sparse_moe.experts.185.w1", "model.layers.28.block_sparse_moe.experts.186.w1", "model.layers.28.block_sparse_moe.experts.187.w1", "model.layers.28.block_sparse_moe.experts.188.w1", "model.layers.28.block_sparse_moe.experts.189.w1", "model.layers.28.block_sparse_moe.experts.190.w1", "model.layers.28.block_sparse_moe.experts.191.w1", "model.layers.28.block_sparse_moe.experts.192.w1", "model.layers.28.block_sparse_moe.experts.193.w1", "model.layers.28.block_sparse_moe.experts.194.w1", "model.layers.28.block_sparse_moe.experts.195.w1", "model.layers.28.block_sparse_moe.experts.196.w1", "model.layers.28.block_sparse_moe.experts.197.w1", "model.layers.28.block_sparse_moe.experts.198.w1", "model.layers.28.block_sparse_moe.experts.199.w1", "model.layers.28.block_sparse_moe.experts.200.w1", "model.layers.28.block_sparse_moe.experts.201.w1", "model.layers.28.block_sparse_moe.experts.202.w1", "model.layers.28.block_sparse_moe.experts.203.w1", "model.layers.28.block_sparse_moe.experts.204.w1", "model.layers.28.block_sparse_moe.experts.205.w1", "model.layers.28.block_sparse_moe.experts.206.w1", "model.layers.28.block_sparse_moe.experts.207.w1", "model.layers.28.block_sparse_moe.experts.208.w1", "model.layers.28.block_sparse_moe.experts.209.w1", "model.layers.28.block_sparse_moe.experts.210.w1", "model.layers.28.block_sparse_moe.experts.211.w1", "model.layers.28.block_sparse_moe.experts.212.w1", "model.layers.28.block_sparse_moe.experts.213.w1", "model.layers.28.block_sparse_moe.experts.214.w1", "model.layers.28.block_sparse_moe.experts.215.w1", "model.layers.28.block_sparse_moe.experts.216.w1", "model.layers.28.block_sparse_moe.experts.217.w1", "model.layers.28.block_sparse_moe.experts.218.w1", "model.layers.28.block_sparse_moe.experts.219.w1", "model.layers.28.block_sparse_moe.experts.220.w1", "model.layers.28.block_sparse_moe.experts.221.w1", "model.layers.28.block_sparse_moe.experts.222.w1", "model.layers.28.block_sparse_moe.experts.223.w1", "model.layers.28.block_sparse_moe.experts.224.w1", "model.layers.28.block_sparse_moe.experts.225.w1", "model.layers.28.block_sparse_moe.experts.226.w1", "model.layers.28.block_sparse_moe.experts.227.w1", "model.layers.28.block_sparse_moe.experts.228.w1", "model.layers.28.block_sparse_moe.experts.229.w1", "model.layers.28.block_sparse_moe.experts.230.w1", "model.layers.28.block_sparse_moe.experts.231.w1", "model.layers.28.block_sparse_moe.experts.232.w1", "model.layers.28.block_sparse_moe.experts.233.w1", "model.layers.28.block_sparse_moe.experts.234.w1", "model.layers.28.block_sparse_moe.experts.235.w1", "model.layers.28.block_sparse_moe.experts.236.w1", "model.layers.28.block_sparse_moe.experts.237.w1", "model.layers.28.block_sparse_moe.experts.238.w1", "model.layers.28.block_sparse_moe.experts.239.w1", "model.layers.28.block_sparse_moe.experts.240.w1", "model.layers.28.block_sparse_moe.experts.241.w1", "model.layers.28.block_sparse_moe.experts.242.w1", "model.layers.28.block_sparse_moe.experts.243.w1", "model.layers.28.block_sparse_moe.experts.244.w1", "model.layers.28.block_sparse_moe.experts.245.w1", "model.layers.28.block_sparse_moe.experts.246.w1", "model.layers.28.block_sparse_moe.experts.247.w1", "model.layers.28.block_sparse_moe.experts.248.w1", "model.layers.28.block_sparse_moe.experts.249.w1", "model.layers.28.block_sparse_moe.experts.250.w1", "model.layers.28.block_sparse_moe.experts.251.w1", "model.layers.28.block_sparse_moe.experts.252.w1", "model.layers.28.block_sparse_moe.experts.253.w1", "model.layers.28.block_sparse_moe.experts.254.w1", "model.layers.28.block_sparse_moe.experts.255.w1", "model.layers.28.block_sparse_moe.experts.0.w3", "model.layers.28.block_sparse_moe.experts.1.w3", "model.layers.28.block_sparse_moe.experts.2.w3", "model.layers.28.block_sparse_moe.experts.3.w3", "model.layers.28.block_sparse_moe.experts.4.w3", "model.layers.28.block_sparse_moe.experts.5.w3", "model.layers.28.block_sparse_moe.experts.6.w3", "model.layers.28.block_sparse_moe.experts.7.w3", "model.layers.28.block_sparse_moe.experts.8.w3", "model.layers.28.block_sparse_moe.experts.9.w3", "model.layers.28.block_sparse_moe.experts.10.w3", "model.layers.28.block_sparse_moe.experts.11.w3", "model.layers.28.block_sparse_moe.experts.12.w3", "model.layers.28.block_sparse_moe.experts.13.w3", "model.layers.28.block_sparse_moe.experts.14.w3", "model.layers.28.block_sparse_moe.experts.15.w3", "model.layers.28.block_sparse_moe.experts.16.w3", "model.layers.28.block_sparse_moe.experts.17.w3", "model.layers.28.block_sparse_moe.experts.18.w3", "model.layers.28.block_sparse_moe.experts.19.w3", "model.layers.28.block_sparse_moe.experts.20.w3", "model.layers.28.block_sparse_moe.experts.21.w3", "model.layers.28.block_sparse_moe.experts.22.w3", "model.layers.28.block_sparse_moe.experts.23.w3", "model.layers.28.block_sparse_moe.experts.24.w3", "model.layers.28.block_sparse_moe.experts.25.w3", "model.layers.28.block_sparse_moe.experts.26.w3", "model.layers.28.block_sparse_moe.experts.27.w3", "model.layers.28.block_sparse_moe.experts.28.w3", "model.layers.28.block_sparse_moe.experts.29.w3", "model.layers.28.block_sparse_moe.experts.30.w3", "model.layers.28.block_sparse_moe.experts.31.w3", "model.layers.28.block_sparse_moe.experts.32.w3", "model.layers.28.block_sparse_moe.experts.33.w3", "model.layers.28.block_sparse_moe.experts.34.w3", "model.layers.28.block_sparse_moe.experts.35.w3", "model.layers.28.block_sparse_moe.experts.36.w3", "model.layers.28.block_sparse_moe.experts.37.w3", "model.layers.28.block_sparse_moe.experts.38.w3", "model.layers.28.block_sparse_moe.experts.39.w3", "model.layers.28.block_sparse_moe.experts.40.w3", "model.layers.28.block_sparse_moe.experts.41.w3", "model.layers.28.block_sparse_moe.experts.42.w3", "model.layers.28.block_sparse_moe.experts.43.w3", "model.layers.28.block_sparse_moe.experts.44.w3", "model.layers.28.block_sparse_moe.experts.45.w3", "model.layers.28.block_sparse_moe.experts.46.w3", "model.layers.28.block_sparse_moe.experts.47.w3", "model.layers.28.block_sparse_moe.experts.48.w3", "model.layers.28.block_sparse_moe.experts.49.w3", "model.layers.28.block_sparse_moe.experts.50.w3", "model.layers.28.block_sparse_moe.experts.51.w3", "model.layers.28.block_sparse_moe.experts.52.w3", "model.layers.28.block_sparse_moe.experts.53.w3", "model.layers.28.block_sparse_moe.experts.54.w3", "model.layers.28.block_sparse_moe.experts.55.w3", "model.layers.28.block_sparse_moe.experts.56.w3", "model.layers.28.block_sparse_moe.experts.57.w3", "model.layers.28.block_sparse_moe.experts.58.w3", "model.layers.28.block_sparse_moe.experts.59.w3", "model.layers.28.block_sparse_moe.experts.60.w3", "model.layers.28.block_sparse_moe.experts.61.w3", "model.layers.28.block_sparse_moe.experts.62.w3", "model.layers.28.block_sparse_moe.experts.63.w3", "model.layers.28.block_sparse_moe.experts.64.w3", "model.layers.28.block_sparse_moe.experts.65.w3", "model.layers.28.block_sparse_moe.experts.66.w3", "model.layers.28.block_sparse_moe.experts.67.w3", "model.layers.28.block_sparse_moe.experts.68.w3", "model.layers.28.block_sparse_moe.experts.69.w3", "model.layers.28.block_sparse_moe.experts.70.w3", "model.layers.28.block_sparse_moe.experts.71.w3", "model.layers.28.block_sparse_moe.experts.72.w3", "model.layers.28.block_sparse_moe.experts.73.w3", "model.layers.28.block_sparse_moe.experts.74.w3", "model.layers.28.block_sparse_moe.experts.75.w3", "model.layers.28.block_sparse_moe.experts.76.w3", "model.layers.28.block_sparse_moe.experts.77.w3", "model.layers.28.block_sparse_moe.experts.78.w3", "model.layers.28.block_sparse_moe.experts.79.w3", "model.layers.28.block_sparse_moe.experts.80.w3", "model.layers.28.block_sparse_moe.experts.81.w3", "model.layers.28.block_sparse_moe.experts.82.w3", "model.layers.28.block_sparse_moe.experts.83.w3", "model.layers.28.block_sparse_moe.experts.84.w3", "model.layers.28.block_sparse_moe.experts.85.w3", "model.layers.28.block_sparse_moe.experts.86.w3", "model.layers.28.block_sparse_moe.experts.87.w3", "model.layers.28.block_sparse_moe.experts.88.w3", "model.layers.28.block_sparse_moe.experts.89.w3", "model.layers.28.block_sparse_moe.experts.90.w3", "model.layers.28.block_sparse_moe.experts.91.w3", "model.layers.28.block_sparse_moe.experts.92.w3", "model.layers.28.block_sparse_moe.experts.93.w3", "model.layers.28.block_sparse_moe.experts.94.w3", "model.layers.28.block_sparse_moe.experts.95.w3", "model.layers.28.block_sparse_moe.experts.96.w3", "model.layers.28.block_sparse_moe.experts.97.w3", "model.layers.28.block_sparse_moe.experts.98.w3", "model.layers.28.block_sparse_moe.experts.99.w3", "model.layers.28.block_sparse_moe.experts.100.w3", "model.layers.28.block_sparse_moe.experts.101.w3", "model.layers.28.block_sparse_moe.experts.102.w3", "model.layers.28.block_sparse_moe.experts.103.w3", "model.layers.28.block_sparse_moe.experts.104.w3", "model.layers.28.block_sparse_moe.experts.105.w3", "model.layers.28.block_sparse_moe.experts.106.w3", "model.layers.28.block_sparse_moe.experts.107.w3", "model.layers.28.block_sparse_moe.experts.108.w3", "model.layers.28.block_sparse_moe.experts.109.w3", "model.layers.28.block_sparse_moe.experts.110.w3", "model.layers.28.block_sparse_moe.experts.111.w3", "model.layers.28.block_sparse_moe.experts.112.w3", "model.layers.28.block_sparse_moe.experts.113.w3", "model.layers.28.block_sparse_moe.experts.114.w3", "model.layers.28.block_sparse_moe.experts.115.w3", "model.layers.28.block_sparse_moe.experts.116.w3", "model.layers.28.block_sparse_moe.experts.117.w3", "model.layers.28.block_sparse_moe.experts.118.w3", "model.layers.28.block_sparse_moe.experts.119.w3", "model.layers.28.block_sparse_moe.experts.120.w3", "model.layers.28.block_sparse_moe.experts.121.w3", "model.layers.28.block_sparse_moe.experts.122.w3", "model.layers.28.block_sparse_moe.experts.123.w3", "model.layers.28.block_sparse_moe.experts.124.w3", "model.layers.28.block_sparse_moe.experts.125.w3", "model.layers.28.block_sparse_moe.experts.126.w3", "model.layers.28.block_sparse_moe.experts.127.w3", "model.layers.28.block_sparse_moe.experts.128.w3", "model.layers.28.block_sparse_moe.experts.129.w3", "model.layers.28.block_sparse_moe.experts.130.w3", "model.layers.28.block_sparse_moe.experts.131.w3", "model.layers.28.block_sparse_moe.experts.132.w3", "model.layers.28.block_sparse_moe.experts.133.w3", "model.layers.28.block_sparse_moe.experts.134.w3", "model.layers.28.block_sparse_moe.experts.135.w3", "model.layers.28.block_sparse_moe.experts.136.w3", "model.layers.28.block_sparse_moe.experts.137.w3", "model.layers.28.block_sparse_moe.experts.138.w3", "model.layers.28.block_sparse_moe.experts.139.w3", "model.layers.28.block_sparse_moe.experts.140.w3", "model.layers.28.block_sparse_moe.experts.141.w3", "model.layers.28.block_sparse_moe.experts.142.w3", "model.layers.28.block_sparse_moe.experts.143.w3", "model.layers.28.block_sparse_moe.experts.144.w3", "model.layers.28.block_sparse_moe.experts.145.w3", "model.layers.28.block_sparse_moe.experts.146.w3", "model.layers.28.block_sparse_moe.experts.147.w3", "model.layers.28.block_sparse_moe.experts.148.w3", "model.layers.28.block_sparse_moe.experts.149.w3", "model.layers.28.block_sparse_moe.experts.150.w3", "model.layers.28.block_sparse_moe.experts.151.w3", "model.layers.28.block_sparse_moe.experts.152.w3", "model.layers.28.block_sparse_moe.experts.153.w3", "model.layers.28.block_sparse_moe.experts.154.w3", "model.layers.28.block_sparse_moe.experts.155.w3", "model.layers.28.block_sparse_moe.experts.156.w3", "model.layers.28.block_sparse_moe.experts.157.w3", "model.layers.28.block_sparse_moe.experts.158.w3", "model.layers.28.block_sparse_moe.experts.159.w3", "model.layers.28.block_sparse_moe.experts.160.w3", "model.layers.28.block_sparse_moe.experts.161.w3", "model.layers.28.block_sparse_moe.experts.162.w3", "model.layers.28.block_sparse_moe.experts.163.w3", "model.layers.28.block_sparse_moe.experts.164.w3", "model.layers.28.block_sparse_moe.experts.165.w3", "model.layers.28.block_sparse_moe.experts.166.w3", "model.layers.28.block_sparse_moe.experts.167.w3", "model.layers.28.block_sparse_moe.experts.168.w3", "model.layers.28.block_sparse_moe.experts.169.w3", "model.layers.28.block_sparse_moe.experts.170.w3", "model.layers.28.block_sparse_moe.experts.171.w3", "model.layers.28.block_sparse_moe.experts.172.w3", "model.layers.28.block_sparse_moe.experts.173.w3", "model.layers.28.block_sparse_moe.experts.174.w3", "model.layers.28.block_sparse_moe.experts.175.w3", "model.layers.28.block_sparse_moe.experts.176.w3", "model.layers.28.block_sparse_moe.experts.177.w3", "model.layers.28.block_sparse_moe.experts.178.w3", "model.layers.28.block_sparse_moe.experts.179.w3", "model.layers.28.block_sparse_moe.experts.180.w3", "model.layers.28.block_sparse_moe.experts.181.w3", "model.layers.28.block_sparse_moe.experts.182.w3", "model.layers.28.block_sparse_moe.experts.183.w3", "model.layers.28.block_sparse_moe.experts.184.w3", "model.layers.28.block_sparse_moe.experts.185.w3", "model.layers.28.block_sparse_moe.experts.186.w3", "model.layers.28.block_sparse_moe.experts.187.w3", "model.layers.28.block_sparse_moe.experts.188.w3", "model.layers.28.block_sparse_moe.experts.189.w3", "model.layers.28.block_sparse_moe.experts.190.w3", "model.layers.28.block_sparse_moe.experts.191.w3", "model.layers.28.block_sparse_moe.experts.192.w3", "model.layers.28.block_sparse_moe.experts.193.w3", "model.layers.28.block_sparse_moe.experts.194.w3", "model.layers.28.block_sparse_moe.experts.195.w3", "model.layers.28.block_sparse_moe.experts.196.w3", "model.layers.28.block_sparse_moe.experts.197.w3", "model.layers.28.block_sparse_moe.experts.198.w3", "model.layers.28.block_sparse_moe.experts.199.w3", "model.layers.28.block_sparse_moe.experts.200.w3", "model.layers.28.block_sparse_moe.experts.201.w3", "model.layers.28.block_sparse_moe.experts.202.w3", "model.layers.28.block_sparse_moe.experts.203.w3", "model.layers.28.block_sparse_moe.experts.204.w3", "model.layers.28.block_sparse_moe.experts.205.w3", "model.layers.28.block_sparse_moe.experts.206.w3", "model.layers.28.block_sparse_moe.experts.207.w3", "model.layers.28.block_sparse_moe.experts.208.w3", "model.layers.28.block_sparse_moe.experts.209.w3", "model.layers.28.block_sparse_moe.experts.210.w3", "model.layers.28.block_sparse_moe.experts.211.w3", "model.layers.28.block_sparse_moe.experts.212.w3", "model.layers.28.block_sparse_moe.experts.213.w3", "model.layers.28.block_sparse_moe.experts.214.w3", "model.layers.28.block_sparse_moe.experts.215.w3", "model.layers.28.block_sparse_moe.experts.216.w3", "model.layers.28.block_sparse_moe.experts.217.w3", "model.layers.28.block_sparse_moe.experts.218.w3", "model.layers.28.block_sparse_moe.experts.219.w3", "model.layers.28.block_sparse_moe.experts.220.w3", "model.layers.28.block_sparse_moe.experts.221.w3", "model.layers.28.block_sparse_moe.experts.222.w3", "model.layers.28.block_sparse_moe.experts.223.w3", "model.layers.28.block_sparse_moe.experts.224.w3", "model.layers.28.block_sparse_moe.experts.225.w3", "model.layers.28.block_sparse_moe.experts.226.w3", "model.layers.28.block_sparse_moe.experts.227.w3", "model.layers.28.block_sparse_moe.experts.228.w3", "model.layers.28.block_sparse_moe.experts.229.w3", "model.layers.28.block_sparse_moe.experts.230.w3", "model.layers.28.block_sparse_moe.experts.231.w3", "model.layers.28.block_sparse_moe.experts.232.w3", "model.layers.28.block_sparse_moe.experts.233.w3", "model.layers.28.block_sparse_moe.experts.234.w3", "model.layers.28.block_sparse_moe.experts.235.w3", "model.layers.28.block_sparse_moe.experts.236.w3", "model.layers.28.block_sparse_moe.experts.237.w3", "model.layers.28.block_sparse_moe.experts.238.w3", "model.layers.28.block_sparse_moe.experts.239.w3", "model.layers.28.block_sparse_moe.experts.240.w3", "model.layers.28.block_sparse_moe.experts.241.w3", "model.layers.28.block_sparse_moe.experts.242.w3", "model.layers.28.block_sparse_moe.experts.243.w3", "model.layers.28.block_sparse_moe.experts.244.w3", "model.layers.28.block_sparse_moe.experts.245.w3", "model.layers.28.block_sparse_moe.experts.246.w3", "model.layers.28.block_sparse_moe.experts.247.w3", "model.layers.28.block_sparse_moe.experts.248.w3", "model.layers.28.block_sparse_moe.experts.249.w3", "model.layers.28.block_sparse_moe.experts.250.w3", "model.layers.28.block_sparse_moe.experts.251.w3", "model.layers.28.block_sparse_moe.experts.252.w3", "model.layers.28.block_sparse_moe.experts.253.w3", "model.layers.28.block_sparse_moe.experts.254.w3", "model.layers.28.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0004614517092704773, "dbits": 2415919104 } ] }, { "idx": 144, "layers": [ "model.layers.28.block_sparse_moe.experts.0.w2", "model.layers.28.block_sparse_moe.experts.1.w2", "model.layers.28.block_sparse_moe.experts.2.w2", "model.layers.28.block_sparse_moe.experts.3.w2", "model.layers.28.block_sparse_moe.experts.4.w2", "model.layers.28.block_sparse_moe.experts.5.w2", "model.layers.28.block_sparse_moe.experts.6.w2", "model.layers.28.block_sparse_moe.experts.7.w2", "model.layers.28.block_sparse_moe.experts.8.w2", "model.layers.28.block_sparse_moe.experts.9.w2", "model.layers.28.block_sparse_moe.experts.10.w2", "model.layers.28.block_sparse_moe.experts.11.w2", "model.layers.28.block_sparse_moe.experts.12.w2", "model.layers.28.block_sparse_moe.experts.13.w2", "model.layers.28.block_sparse_moe.experts.14.w2", "model.layers.28.block_sparse_moe.experts.15.w2", "model.layers.28.block_sparse_moe.experts.16.w2", "model.layers.28.block_sparse_moe.experts.17.w2", "model.layers.28.block_sparse_moe.experts.18.w2", "model.layers.28.block_sparse_moe.experts.19.w2", "model.layers.28.block_sparse_moe.experts.20.w2", "model.layers.28.block_sparse_moe.experts.21.w2", "model.layers.28.block_sparse_moe.experts.22.w2", "model.layers.28.block_sparse_moe.experts.23.w2", "model.layers.28.block_sparse_moe.experts.24.w2", "model.layers.28.block_sparse_moe.experts.25.w2", "model.layers.28.block_sparse_moe.experts.26.w2", "model.layers.28.block_sparse_moe.experts.27.w2", "model.layers.28.block_sparse_moe.experts.28.w2", "model.layers.28.block_sparse_moe.experts.29.w2", "model.layers.28.block_sparse_moe.experts.30.w2", "model.layers.28.block_sparse_moe.experts.31.w2", "model.layers.28.block_sparse_moe.experts.32.w2", "model.layers.28.block_sparse_moe.experts.33.w2", "model.layers.28.block_sparse_moe.experts.34.w2", "model.layers.28.block_sparse_moe.experts.35.w2", "model.layers.28.block_sparse_moe.experts.36.w2", "model.layers.28.block_sparse_moe.experts.37.w2", "model.layers.28.block_sparse_moe.experts.38.w2", "model.layers.28.block_sparse_moe.experts.39.w2", "model.layers.28.block_sparse_moe.experts.40.w2", "model.layers.28.block_sparse_moe.experts.41.w2", "model.layers.28.block_sparse_moe.experts.42.w2", "model.layers.28.block_sparse_moe.experts.43.w2", "model.layers.28.block_sparse_moe.experts.44.w2", "model.layers.28.block_sparse_moe.experts.45.w2", "model.layers.28.block_sparse_moe.experts.46.w2", "model.layers.28.block_sparse_moe.experts.47.w2", "model.layers.28.block_sparse_moe.experts.48.w2", "model.layers.28.block_sparse_moe.experts.49.w2", "model.layers.28.block_sparse_moe.experts.50.w2", "model.layers.28.block_sparse_moe.experts.51.w2", "model.layers.28.block_sparse_moe.experts.52.w2", "model.layers.28.block_sparse_moe.experts.53.w2", "model.layers.28.block_sparse_moe.experts.54.w2", "model.layers.28.block_sparse_moe.experts.55.w2", "model.layers.28.block_sparse_moe.experts.56.w2", "model.layers.28.block_sparse_moe.experts.57.w2", "model.layers.28.block_sparse_moe.experts.58.w2", "model.layers.28.block_sparse_moe.experts.59.w2", "model.layers.28.block_sparse_moe.experts.60.w2", "model.layers.28.block_sparse_moe.experts.61.w2", "model.layers.28.block_sparse_moe.experts.62.w2", "model.layers.28.block_sparse_moe.experts.63.w2", "model.layers.28.block_sparse_moe.experts.64.w2", "model.layers.28.block_sparse_moe.experts.65.w2", "model.layers.28.block_sparse_moe.experts.66.w2", "model.layers.28.block_sparse_moe.experts.67.w2", "model.layers.28.block_sparse_moe.experts.68.w2", "model.layers.28.block_sparse_moe.experts.69.w2", "model.layers.28.block_sparse_moe.experts.70.w2", "model.layers.28.block_sparse_moe.experts.71.w2", "model.layers.28.block_sparse_moe.experts.72.w2", "model.layers.28.block_sparse_moe.experts.73.w2", "model.layers.28.block_sparse_moe.experts.74.w2", "model.layers.28.block_sparse_moe.experts.75.w2", "model.layers.28.block_sparse_moe.experts.76.w2", "model.layers.28.block_sparse_moe.experts.77.w2", "model.layers.28.block_sparse_moe.experts.78.w2", "model.layers.28.block_sparse_moe.experts.79.w2", "model.layers.28.block_sparse_moe.experts.80.w2", "model.layers.28.block_sparse_moe.experts.81.w2", "model.layers.28.block_sparse_moe.experts.82.w2", "model.layers.28.block_sparse_moe.experts.83.w2", "model.layers.28.block_sparse_moe.experts.84.w2", "model.layers.28.block_sparse_moe.experts.85.w2", "model.layers.28.block_sparse_moe.experts.86.w2", "model.layers.28.block_sparse_moe.experts.87.w2", "model.layers.28.block_sparse_moe.experts.88.w2", "model.layers.28.block_sparse_moe.experts.89.w2", "model.layers.28.block_sparse_moe.experts.90.w2", "model.layers.28.block_sparse_moe.experts.91.w2", "model.layers.28.block_sparse_moe.experts.92.w2", "model.layers.28.block_sparse_moe.experts.93.w2", "model.layers.28.block_sparse_moe.experts.94.w2", "model.layers.28.block_sparse_moe.experts.95.w2", "model.layers.28.block_sparse_moe.experts.96.w2", "model.layers.28.block_sparse_moe.experts.97.w2", "model.layers.28.block_sparse_moe.experts.98.w2", "model.layers.28.block_sparse_moe.experts.99.w2", "model.layers.28.block_sparse_moe.experts.100.w2", "model.layers.28.block_sparse_moe.experts.101.w2", "model.layers.28.block_sparse_moe.experts.102.w2", "model.layers.28.block_sparse_moe.experts.103.w2", "model.layers.28.block_sparse_moe.experts.104.w2", "model.layers.28.block_sparse_moe.experts.105.w2", "model.layers.28.block_sparse_moe.experts.106.w2", "model.layers.28.block_sparse_moe.experts.107.w2", "model.layers.28.block_sparse_moe.experts.108.w2", "model.layers.28.block_sparse_moe.experts.109.w2", "model.layers.28.block_sparse_moe.experts.110.w2", "model.layers.28.block_sparse_moe.experts.111.w2", "model.layers.28.block_sparse_moe.experts.112.w2", "model.layers.28.block_sparse_moe.experts.113.w2", "model.layers.28.block_sparse_moe.experts.114.w2", "model.layers.28.block_sparse_moe.experts.115.w2", "model.layers.28.block_sparse_moe.experts.116.w2", "model.layers.28.block_sparse_moe.experts.117.w2", "model.layers.28.block_sparse_moe.experts.118.w2", "model.layers.28.block_sparse_moe.experts.119.w2", "model.layers.28.block_sparse_moe.experts.120.w2", "model.layers.28.block_sparse_moe.experts.121.w2", "model.layers.28.block_sparse_moe.experts.122.w2", "model.layers.28.block_sparse_moe.experts.123.w2", "model.layers.28.block_sparse_moe.experts.124.w2", "model.layers.28.block_sparse_moe.experts.125.w2", "model.layers.28.block_sparse_moe.experts.126.w2", "model.layers.28.block_sparse_moe.experts.127.w2", "model.layers.28.block_sparse_moe.experts.128.w2", "model.layers.28.block_sparse_moe.experts.129.w2", "model.layers.28.block_sparse_moe.experts.130.w2", "model.layers.28.block_sparse_moe.experts.131.w2", "model.layers.28.block_sparse_moe.experts.132.w2", "model.layers.28.block_sparse_moe.experts.133.w2", "model.layers.28.block_sparse_moe.experts.134.w2", "model.layers.28.block_sparse_moe.experts.135.w2", "model.layers.28.block_sparse_moe.experts.136.w2", "model.layers.28.block_sparse_moe.experts.137.w2", "model.layers.28.block_sparse_moe.experts.138.w2", "model.layers.28.block_sparse_moe.experts.139.w2", "model.layers.28.block_sparse_moe.experts.140.w2", "model.layers.28.block_sparse_moe.experts.141.w2", "model.layers.28.block_sparse_moe.experts.142.w2", "model.layers.28.block_sparse_moe.experts.143.w2", "model.layers.28.block_sparse_moe.experts.144.w2", "model.layers.28.block_sparse_moe.experts.145.w2", "model.layers.28.block_sparse_moe.experts.146.w2", "model.layers.28.block_sparse_moe.experts.147.w2", "model.layers.28.block_sparse_moe.experts.148.w2", "model.layers.28.block_sparse_moe.experts.149.w2", "model.layers.28.block_sparse_moe.experts.150.w2", "model.layers.28.block_sparse_moe.experts.151.w2", "model.layers.28.block_sparse_moe.experts.152.w2", "model.layers.28.block_sparse_moe.experts.153.w2", "model.layers.28.block_sparse_moe.experts.154.w2", "model.layers.28.block_sparse_moe.experts.155.w2", "model.layers.28.block_sparse_moe.experts.156.w2", "model.layers.28.block_sparse_moe.experts.157.w2", "model.layers.28.block_sparse_moe.experts.158.w2", "model.layers.28.block_sparse_moe.experts.159.w2", "model.layers.28.block_sparse_moe.experts.160.w2", "model.layers.28.block_sparse_moe.experts.161.w2", "model.layers.28.block_sparse_moe.experts.162.w2", "model.layers.28.block_sparse_moe.experts.163.w2", "model.layers.28.block_sparse_moe.experts.164.w2", "model.layers.28.block_sparse_moe.experts.165.w2", "model.layers.28.block_sparse_moe.experts.166.w2", "model.layers.28.block_sparse_moe.experts.167.w2", "model.layers.28.block_sparse_moe.experts.168.w2", "model.layers.28.block_sparse_moe.experts.169.w2", "model.layers.28.block_sparse_moe.experts.170.w2", "model.layers.28.block_sparse_moe.experts.171.w2", "model.layers.28.block_sparse_moe.experts.172.w2", "model.layers.28.block_sparse_moe.experts.173.w2", "model.layers.28.block_sparse_moe.experts.174.w2", "model.layers.28.block_sparse_moe.experts.175.w2", "model.layers.28.block_sparse_moe.experts.176.w2", "model.layers.28.block_sparse_moe.experts.177.w2", "model.layers.28.block_sparse_moe.experts.178.w2", "model.layers.28.block_sparse_moe.experts.179.w2", "model.layers.28.block_sparse_moe.experts.180.w2", "model.layers.28.block_sparse_moe.experts.181.w2", "model.layers.28.block_sparse_moe.experts.182.w2", "model.layers.28.block_sparse_moe.experts.183.w2", "model.layers.28.block_sparse_moe.experts.184.w2", "model.layers.28.block_sparse_moe.experts.185.w2", "model.layers.28.block_sparse_moe.experts.186.w2", "model.layers.28.block_sparse_moe.experts.187.w2", "model.layers.28.block_sparse_moe.experts.188.w2", "model.layers.28.block_sparse_moe.experts.189.w2", "model.layers.28.block_sparse_moe.experts.190.w2", "model.layers.28.block_sparse_moe.experts.191.w2", "model.layers.28.block_sparse_moe.experts.192.w2", "model.layers.28.block_sparse_moe.experts.193.w2", "model.layers.28.block_sparse_moe.experts.194.w2", "model.layers.28.block_sparse_moe.experts.195.w2", "model.layers.28.block_sparse_moe.experts.196.w2", "model.layers.28.block_sparse_moe.experts.197.w2", "model.layers.28.block_sparse_moe.experts.198.w2", "model.layers.28.block_sparse_moe.experts.199.w2", "model.layers.28.block_sparse_moe.experts.200.w2", "model.layers.28.block_sparse_moe.experts.201.w2", "model.layers.28.block_sparse_moe.experts.202.w2", "model.layers.28.block_sparse_moe.experts.203.w2", "model.layers.28.block_sparse_moe.experts.204.w2", "model.layers.28.block_sparse_moe.experts.205.w2", "model.layers.28.block_sparse_moe.experts.206.w2", "model.layers.28.block_sparse_moe.experts.207.w2", "model.layers.28.block_sparse_moe.experts.208.w2", "model.layers.28.block_sparse_moe.experts.209.w2", "model.layers.28.block_sparse_moe.experts.210.w2", "model.layers.28.block_sparse_moe.experts.211.w2", "model.layers.28.block_sparse_moe.experts.212.w2", "model.layers.28.block_sparse_moe.experts.213.w2", "model.layers.28.block_sparse_moe.experts.214.w2", "model.layers.28.block_sparse_moe.experts.215.w2", "model.layers.28.block_sparse_moe.experts.216.w2", "model.layers.28.block_sparse_moe.experts.217.w2", "model.layers.28.block_sparse_moe.experts.218.w2", "model.layers.28.block_sparse_moe.experts.219.w2", "model.layers.28.block_sparse_moe.experts.220.w2", "model.layers.28.block_sparse_moe.experts.221.w2", "model.layers.28.block_sparse_moe.experts.222.w2", "model.layers.28.block_sparse_moe.experts.223.w2", "model.layers.28.block_sparse_moe.experts.224.w2", "model.layers.28.block_sparse_moe.experts.225.w2", "model.layers.28.block_sparse_moe.experts.226.w2", "model.layers.28.block_sparse_moe.experts.227.w2", "model.layers.28.block_sparse_moe.experts.228.w2", "model.layers.28.block_sparse_moe.experts.229.w2", "model.layers.28.block_sparse_moe.experts.230.w2", "model.layers.28.block_sparse_moe.experts.231.w2", "model.layers.28.block_sparse_moe.experts.232.w2", "model.layers.28.block_sparse_moe.experts.233.w2", "model.layers.28.block_sparse_moe.experts.234.w2", "model.layers.28.block_sparse_moe.experts.235.w2", "model.layers.28.block_sparse_moe.experts.236.w2", "model.layers.28.block_sparse_moe.experts.237.w2", "model.layers.28.block_sparse_moe.experts.238.w2", "model.layers.28.block_sparse_moe.experts.239.w2", "model.layers.28.block_sparse_moe.experts.240.w2", "model.layers.28.block_sparse_moe.experts.241.w2", "model.layers.28.block_sparse_moe.experts.242.w2", "model.layers.28.block_sparse_moe.experts.243.w2", "model.layers.28.block_sparse_moe.experts.244.w2", "model.layers.28.block_sparse_moe.experts.245.w2", "model.layers.28.block_sparse_moe.experts.246.w2", "model.layers.28.block_sparse_moe.experts.247.w2", "model.layers.28.block_sparse_moe.experts.248.w2", "model.layers.28.block_sparse_moe.experts.249.w2", "model.layers.28.block_sparse_moe.experts.250.w2", "model.layers.28.block_sparse_moe.experts.251.w2", "model.layers.28.block_sparse_moe.experts.252.w2", "model.layers.28.block_sparse_moe.experts.253.w2", "model.layers.28.block_sparse_moe.experts.254.w2", "model.layers.28.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 6.888210773464687e-05, "dbits": 1207959552 } ] }, { "idx": 145, "layers": [ "model.layers.29.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0007671285420656204, "dbits": 18874368 } ] }, { "idx": 146, "layers": [ "model.layers.29.self_attn.k_proj", "model.layers.29.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0025399699807167053, "dbits": 6291456 } ] }, { "idx": 147, "layers": [ "model.layers.29.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0003397256135940774, "dbits": 18874368 } ] }, { "idx": 148, "layers": [ "model.layers.29.block_sparse_moe.experts.0.w1", "model.layers.29.block_sparse_moe.experts.1.w1", "model.layers.29.block_sparse_moe.experts.2.w1", "model.layers.29.block_sparse_moe.experts.3.w1", "model.layers.29.block_sparse_moe.experts.4.w1", "model.layers.29.block_sparse_moe.experts.5.w1", "model.layers.29.block_sparse_moe.experts.6.w1", "model.layers.29.block_sparse_moe.experts.7.w1", "model.layers.29.block_sparse_moe.experts.8.w1", "model.layers.29.block_sparse_moe.experts.9.w1", "model.layers.29.block_sparse_moe.experts.10.w1", "model.layers.29.block_sparse_moe.experts.11.w1", "model.layers.29.block_sparse_moe.experts.12.w1", "model.layers.29.block_sparse_moe.experts.13.w1", "model.layers.29.block_sparse_moe.experts.14.w1", "model.layers.29.block_sparse_moe.experts.15.w1", "model.layers.29.block_sparse_moe.experts.16.w1", "model.layers.29.block_sparse_moe.experts.17.w1", "model.layers.29.block_sparse_moe.experts.18.w1", "model.layers.29.block_sparse_moe.experts.19.w1", "model.layers.29.block_sparse_moe.experts.20.w1", "model.layers.29.block_sparse_moe.experts.21.w1", "model.layers.29.block_sparse_moe.experts.22.w1", "model.layers.29.block_sparse_moe.experts.23.w1", "model.layers.29.block_sparse_moe.experts.24.w1", "model.layers.29.block_sparse_moe.experts.25.w1", "model.layers.29.block_sparse_moe.experts.26.w1", "model.layers.29.block_sparse_moe.experts.27.w1", "model.layers.29.block_sparse_moe.experts.28.w1", "model.layers.29.block_sparse_moe.experts.29.w1", "model.layers.29.block_sparse_moe.experts.30.w1", "model.layers.29.block_sparse_moe.experts.31.w1", "model.layers.29.block_sparse_moe.experts.32.w1", "model.layers.29.block_sparse_moe.experts.33.w1", "model.layers.29.block_sparse_moe.experts.34.w1", "model.layers.29.block_sparse_moe.experts.35.w1", "model.layers.29.block_sparse_moe.experts.36.w1", "model.layers.29.block_sparse_moe.experts.37.w1", "model.layers.29.block_sparse_moe.experts.38.w1", "model.layers.29.block_sparse_moe.experts.39.w1", "model.layers.29.block_sparse_moe.experts.40.w1", "model.layers.29.block_sparse_moe.experts.41.w1", "model.layers.29.block_sparse_moe.experts.42.w1", "model.layers.29.block_sparse_moe.experts.43.w1", "model.layers.29.block_sparse_moe.experts.44.w1", "model.layers.29.block_sparse_moe.experts.45.w1", "model.layers.29.block_sparse_moe.experts.46.w1", "model.layers.29.block_sparse_moe.experts.47.w1", "model.layers.29.block_sparse_moe.experts.48.w1", "model.layers.29.block_sparse_moe.experts.49.w1", "model.layers.29.block_sparse_moe.experts.50.w1", "model.layers.29.block_sparse_moe.experts.51.w1", "model.layers.29.block_sparse_moe.experts.52.w1", "model.layers.29.block_sparse_moe.experts.53.w1", "model.layers.29.block_sparse_moe.experts.54.w1", "model.layers.29.block_sparse_moe.experts.55.w1", "model.layers.29.block_sparse_moe.experts.56.w1", "model.layers.29.block_sparse_moe.experts.57.w1", "model.layers.29.block_sparse_moe.experts.58.w1", "model.layers.29.block_sparse_moe.experts.59.w1", "model.layers.29.block_sparse_moe.experts.60.w1", "model.layers.29.block_sparse_moe.experts.61.w1", "model.layers.29.block_sparse_moe.experts.62.w1", "model.layers.29.block_sparse_moe.experts.63.w1", "model.layers.29.block_sparse_moe.experts.64.w1", "model.layers.29.block_sparse_moe.experts.65.w1", "model.layers.29.block_sparse_moe.experts.66.w1", "model.layers.29.block_sparse_moe.experts.67.w1", "model.layers.29.block_sparse_moe.experts.68.w1", "model.layers.29.block_sparse_moe.experts.69.w1", "model.layers.29.block_sparse_moe.experts.70.w1", "model.layers.29.block_sparse_moe.experts.71.w1", "model.layers.29.block_sparse_moe.experts.72.w1", "model.layers.29.block_sparse_moe.experts.73.w1", "model.layers.29.block_sparse_moe.experts.74.w1", "model.layers.29.block_sparse_moe.experts.75.w1", "model.layers.29.block_sparse_moe.experts.76.w1", "model.layers.29.block_sparse_moe.experts.77.w1", "model.layers.29.block_sparse_moe.experts.78.w1", "model.layers.29.block_sparse_moe.experts.79.w1", "model.layers.29.block_sparse_moe.experts.80.w1", "model.layers.29.block_sparse_moe.experts.81.w1", "model.layers.29.block_sparse_moe.experts.82.w1", "model.layers.29.block_sparse_moe.experts.83.w1", "model.layers.29.block_sparse_moe.experts.84.w1", "model.layers.29.block_sparse_moe.experts.85.w1", "model.layers.29.block_sparse_moe.experts.86.w1", "model.layers.29.block_sparse_moe.experts.87.w1", "model.layers.29.block_sparse_moe.experts.88.w1", "model.layers.29.block_sparse_moe.experts.89.w1", "model.layers.29.block_sparse_moe.experts.90.w1", "model.layers.29.block_sparse_moe.experts.91.w1", "model.layers.29.block_sparse_moe.experts.92.w1", "model.layers.29.block_sparse_moe.experts.93.w1", "model.layers.29.block_sparse_moe.experts.94.w1", "model.layers.29.block_sparse_moe.experts.95.w1", "model.layers.29.block_sparse_moe.experts.96.w1", "model.layers.29.block_sparse_moe.experts.97.w1", "model.layers.29.block_sparse_moe.experts.98.w1", "model.layers.29.block_sparse_moe.experts.99.w1", "model.layers.29.block_sparse_moe.experts.100.w1", "model.layers.29.block_sparse_moe.experts.101.w1", "model.layers.29.block_sparse_moe.experts.102.w1", "model.layers.29.block_sparse_moe.experts.103.w1", "model.layers.29.block_sparse_moe.experts.104.w1", "model.layers.29.block_sparse_moe.experts.105.w1", "model.layers.29.block_sparse_moe.experts.106.w1", "model.layers.29.block_sparse_moe.experts.107.w1", "model.layers.29.block_sparse_moe.experts.108.w1", "model.layers.29.block_sparse_moe.experts.109.w1", "model.layers.29.block_sparse_moe.experts.110.w1", "model.layers.29.block_sparse_moe.experts.111.w1", "model.layers.29.block_sparse_moe.experts.112.w1", "model.layers.29.block_sparse_moe.experts.113.w1", "model.layers.29.block_sparse_moe.experts.114.w1", "model.layers.29.block_sparse_moe.experts.115.w1", "model.layers.29.block_sparse_moe.experts.116.w1", "model.layers.29.block_sparse_moe.experts.117.w1", "model.layers.29.block_sparse_moe.experts.118.w1", "model.layers.29.block_sparse_moe.experts.119.w1", "model.layers.29.block_sparse_moe.experts.120.w1", "model.layers.29.block_sparse_moe.experts.121.w1", "model.layers.29.block_sparse_moe.experts.122.w1", "model.layers.29.block_sparse_moe.experts.123.w1", "model.layers.29.block_sparse_moe.experts.124.w1", "model.layers.29.block_sparse_moe.experts.125.w1", "model.layers.29.block_sparse_moe.experts.126.w1", "model.layers.29.block_sparse_moe.experts.127.w1", "model.layers.29.block_sparse_moe.experts.128.w1", "model.layers.29.block_sparse_moe.experts.129.w1", "model.layers.29.block_sparse_moe.experts.130.w1", "model.layers.29.block_sparse_moe.experts.131.w1", "model.layers.29.block_sparse_moe.experts.132.w1", "model.layers.29.block_sparse_moe.experts.133.w1", "model.layers.29.block_sparse_moe.experts.134.w1", "model.layers.29.block_sparse_moe.experts.135.w1", "model.layers.29.block_sparse_moe.experts.136.w1", "model.layers.29.block_sparse_moe.experts.137.w1", "model.layers.29.block_sparse_moe.experts.138.w1", "model.layers.29.block_sparse_moe.experts.139.w1", "model.layers.29.block_sparse_moe.experts.140.w1", "model.layers.29.block_sparse_moe.experts.141.w1", "model.layers.29.block_sparse_moe.experts.142.w1", "model.layers.29.block_sparse_moe.experts.143.w1", "model.layers.29.block_sparse_moe.experts.144.w1", "model.layers.29.block_sparse_moe.experts.145.w1", "model.layers.29.block_sparse_moe.experts.146.w1", "model.layers.29.block_sparse_moe.experts.147.w1", "model.layers.29.block_sparse_moe.experts.148.w1", "model.layers.29.block_sparse_moe.experts.149.w1", "model.layers.29.block_sparse_moe.experts.150.w1", "model.layers.29.block_sparse_moe.experts.151.w1", "model.layers.29.block_sparse_moe.experts.152.w1", "model.layers.29.block_sparse_moe.experts.153.w1", "model.layers.29.block_sparse_moe.experts.154.w1", "model.layers.29.block_sparse_moe.experts.155.w1", "model.layers.29.block_sparse_moe.experts.156.w1", "model.layers.29.block_sparse_moe.experts.157.w1", "model.layers.29.block_sparse_moe.experts.158.w1", "model.layers.29.block_sparse_moe.experts.159.w1", "model.layers.29.block_sparse_moe.experts.160.w1", "model.layers.29.block_sparse_moe.experts.161.w1", "model.layers.29.block_sparse_moe.experts.162.w1", "model.layers.29.block_sparse_moe.experts.163.w1", "model.layers.29.block_sparse_moe.experts.164.w1", "model.layers.29.block_sparse_moe.experts.165.w1", "model.layers.29.block_sparse_moe.experts.166.w1", "model.layers.29.block_sparse_moe.experts.167.w1", "model.layers.29.block_sparse_moe.experts.168.w1", "model.layers.29.block_sparse_moe.experts.169.w1", "model.layers.29.block_sparse_moe.experts.170.w1", "model.layers.29.block_sparse_moe.experts.171.w1", "model.layers.29.block_sparse_moe.experts.172.w1", "model.layers.29.block_sparse_moe.experts.173.w1", "model.layers.29.block_sparse_moe.experts.174.w1", "model.layers.29.block_sparse_moe.experts.175.w1", "model.layers.29.block_sparse_moe.experts.176.w1", "model.layers.29.block_sparse_moe.experts.177.w1", "model.layers.29.block_sparse_moe.experts.178.w1", "model.layers.29.block_sparse_moe.experts.179.w1", "model.layers.29.block_sparse_moe.experts.180.w1", "model.layers.29.block_sparse_moe.experts.181.w1", "model.layers.29.block_sparse_moe.experts.182.w1", "model.layers.29.block_sparse_moe.experts.183.w1", "model.layers.29.block_sparse_moe.experts.184.w1", "model.layers.29.block_sparse_moe.experts.185.w1", "model.layers.29.block_sparse_moe.experts.186.w1", "model.layers.29.block_sparse_moe.experts.187.w1", "model.layers.29.block_sparse_moe.experts.188.w1", "model.layers.29.block_sparse_moe.experts.189.w1", "model.layers.29.block_sparse_moe.experts.190.w1", "model.layers.29.block_sparse_moe.experts.191.w1", "model.layers.29.block_sparse_moe.experts.192.w1", "model.layers.29.block_sparse_moe.experts.193.w1", "model.layers.29.block_sparse_moe.experts.194.w1", "model.layers.29.block_sparse_moe.experts.195.w1", "model.layers.29.block_sparse_moe.experts.196.w1", "model.layers.29.block_sparse_moe.experts.197.w1", "model.layers.29.block_sparse_moe.experts.198.w1", "model.layers.29.block_sparse_moe.experts.199.w1", "model.layers.29.block_sparse_moe.experts.200.w1", "model.layers.29.block_sparse_moe.experts.201.w1", "model.layers.29.block_sparse_moe.experts.202.w1", "model.layers.29.block_sparse_moe.experts.203.w1", "model.layers.29.block_sparse_moe.experts.204.w1", "model.layers.29.block_sparse_moe.experts.205.w1", "model.layers.29.block_sparse_moe.experts.206.w1", "model.layers.29.block_sparse_moe.experts.207.w1", "model.layers.29.block_sparse_moe.experts.208.w1", "model.layers.29.block_sparse_moe.experts.209.w1", "model.layers.29.block_sparse_moe.experts.210.w1", "model.layers.29.block_sparse_moe.experts.211.w1", "model.layers.29.block_sparse_moe.experts.212.w1", "model.layers.29.block_sparse_moe.experts.213.w1", "model.layers.29.block_sparse_moe.experts.214.w1", "model.layers.29.block_sparse_moe.experts.215.w1", "model.layers.29.block_sparse_moe.experts.216.w1", "model.layers.29.block_sparse_moe.experts.217.w1", "model.layers.29.block_sparse_moe.experts.218.w1", "model.layers.29.block_sparse_moe.experts.219.w1", "model.layers.29.block_sparse_moe.experts.220.w1", "model.layers.29.block_sparse_moe.experts.221.w1", "model.layers.29.block_sparse_moe.experts.222.w1", "model.layers.29.block_sparse_moe.experts.223.w1", "model.layers.29.block_sparse_moe.experts.224.w1", "model.layers.29.block_sparse_moe.experts.225.w1", "model.layers.29.block_sparse_moe.experts.226.w1", "model.layers.29.block_sparse_moe.experts.227.w1", "model.layers.29.block_sparse_moe.experts.228.w1", "model.layers.29.block_sparse_moe.experts.229.w1", "model.layers.29.block_sparse_moe.experts.230.w1", "model.layers.29.block_sparse_moe.experts.231.w1", "model.layers.29.block_sparse_moe.experts.232.w1", "model.layers.29.block_sparse_moe.experts.233.w1", "model.layers.29.block_sparse_moe.experts.234.w1", "model.layers.29.block_sparse_moe.experts.235.w1", "model.layers.29.block_sparse_moe.experts.236.w1", "model.layers.29.block_sparse_moe.experts.237.w1", "model.layers.29.block_sparse_moe.experts.238.w1", "model.layers.29.block_sparse_moe.experts.239.w1", "model.layers.29.block_sparse_moe.experts.240.w1", "model.layers.29.block_sparse_moe.experts.241.w1", "model.layers.29.block_sparse_moe.experts.242.w1", "model.layers.29.block_sparse_moe.experts.243.w1", "model.layers.29.block_sparse_moe.experts.244.w1", "model.layers.29.block_sparse_moe.experts.245.w1", "model.layers.29.block_sparse_moe.experts.246.w1", "model.layers.29.block_sparse_moe.experts.247.w1", "model.layers.29.block_sparse_moe.experts.248.w1", "model.layers.29.block_sparse_moe.experts.249.w1", "model.layers.29.block_sparse_moe.experts.250.w1", "model.layers.29.block_sparse_moe.experts.251.w1", "model.layers.29.block_sparse_moe.experts.252.w1", "model.layers.29.block_sparse_moe.experts.253.w1", "model.layers.29.block_sparse_moe.experts.254.w1", "model.layers.29.block_sparse_moe.experts.255.w1", "model.layers.29.block_sparse_moe.experts.0.w3", "model.layers.29.block_sparse_moe.experts.1.w3", "model.layers.29.block_sparse_moe.experts.2.w3", "model.layers.29.block_sparse_moe.experts.3.w3", "model.layers.29.block_sparse_moe.experts.4.w3", "model.layers.29.block_sparse_moe.experts.5.w3", "model.layers.29.block_sparse_moe.experts.6.w3", "model.layers.29.block_sparse_moe.experts.7.w3", "model.layers.29.block_sparse_moe.experts.8.w3", "model.layers.29.block_sparse_moe.experts.9.w3", "model.layers.29.block_sparse_moe.experts.10.w3", "model.layers.29.block_sparse_moe.experts.11.w3", "model.layers.29.block_sparse_moe.experts.12.w3", "model.layers.29.block_sparse_moe.experts.13.w3", "model.layers.29.block_sparse_moe.experts.14.w3", "model.layers.29.block_sparse_moe.experts.15.w3", "model.layers.29.block_sparse_moe.experts.16.w3", "model.layers.29.block_sparse_moe.experts.17.w3", "model.layers.29.block_sparse_moe.experts.18.w3", "model.layers.29.block_sparse_moe.experts.19.w3", "model.layers.29.block_sparse_moe.experts.20.w3", "model.layers.29.block_sparse_moe.experts.21.w3", "model.layers.29.block_sparse_moe.experts.22.w3", "model.layers.29.block_sparse_moe.experts.23.w3", "model.layers.29.block_sparse_moe.experts.24.w3", "model.layers.29.block_sparse_moe.experts.25.w3", "model.layers.29.block_sparse_moe.experts.26.w3", "model.layers.29.block_sparse_moe.experts.27.w3", "model.layers.29.block_sparse_moe.experts.28.w3", "model.layers.29.block_sparse_moe.experts.29.w3", "model.layers.29.block_sparse_moe.experts.30.w3", "model.layers.29.block_sparse_moe.experts.31.w3", "model.layers.29.block_sparse_moe.experts.32.w3", "model.layers.29.block_sparse_moe.experts.33.w3", "model.layers.29.block_sparse_moe.experts.34.w3", "model.layers.29.block_sparse_moe.experts.35.w3", "model.layers.29.block_sparse_moe.experts.36.w3", "model.layers.29.block_sparse_moe.experts.37.w3", "model.layers.29.block_sparse_moe.experts.38.w3", "model.layers.29.block_sparse_moe.experts.39.w3", "model.layers.29.block_sparse_moe.experts.40.w3", "model.layers.29.block_sparse_moe.experts.41.w3", "model.layers.29.block_sparse_moe.experts.42.w3", "model.layers.29.block_sparse_moe.experts.43.w3", "model.layers.29.block_sparse_moe.experts.44.w3", "model.layers.29.block_sparse_moe.experts.45.w3", "model.layers.29.block_sparse_moe.experts.46.w3", "model.layers.29.block_sparse_moe.experts.47.w3", "model.layers.29.block_sparse_moe.experts.48.w3", "model.layers.29.block_sparse_moe.experts.49.w3", "model.layers.29.block_sparse_moe.experts.50.w3", "model.layers.29.block_sparse_moe.experts.51.w3", "model.layers.29.block_sparse_moe.experts.52.w3", "model.layers.29.block_sparse_moe.experts.53.w3", "model.layers.29.block_sparse_moe.experts.54.w3", "model.layers.29.block_sparse_moe.experts.55.w3", "model.layers.29.block_sparse_moe.experts.56.w3", "model.layers.29.block_sparse_moe.experts.57.w3", "model.layers.29.block_sparse_moe.experts.58.w3", "model.layers.29.block_sparse_moe.experts.59.w3", "model.layers.29.block_sparse_moe.experts.60.w3", "model.layers.29.block_sparse_moe.experts.61.w3", "model.layers.29.block_sparse_moe.experts.62.w3", "model.layers.29.block_sparse_moe.experts.63.w3", "model.layers.29.block_sparse_moe.experts.64.w3", "model.layers.29.block_sparse_moe.experts.65.w3", "model.layers.29.block_sparse_moe.experts.66.w3", "model.layers.29.block_sparse_moe.experts.67.w3", "model.layers.29.block_sparse_moe.experts.68.w3", "model.layers.29.block_sparse_moe.experts.69.w3", "model.layers.29.block_sparse_moe.experts.70.w3", "model.layers.29.block_sparse_moe.experts.71.w3", "model.layers.29.block_sparse_moe.experts.72.w3", "model.layers.29.block_sparse_moe.experts.73.w3", "model.layers.29.block_sparse_moe.experts.74.w3", "model.layers.29.block_sparse_moe.experts.75.w3", "model.layers.29.block_sparse_moe.experts.76.w3", "model.layers.29.block_sparse_moe.experts.77.w3", "model.layers.29.block_sparse_moe.experts.78.w3", "model.layers.29.block_sparse_moe.experts.79.w3", "model.layers.29.block_sparse_moe.experts.80.w3", "model.layers.29.block_sparse_moe.experts.81.w3", "model.layers.29.block_sparse_moe.experts.82.w3", "model.layers.29.block_sparse_moe.experts.83.w3", "model.layers.29.block_sparse_moe.experts.84.w3", "model.layers.29.block_sparse_moe.experts.85.w3", "model.layers.29.block_sparse_moe.experts.86.w3", "model.layers.29.block_sparse_moe.experts.87.w3", "model.layers.29.block_sparse_moe.experts.88.w3", "model.layers.29.block_sparse_moe.experts.89.w3", "model.layers.29.block_sparse_moe.experts.90.w3", "model.layers.29.block_sparse_moe.experts.91.w3", "model.layers.29.block_sparse_moe.experts.92.w3", "model.layers.29.block_sparse_moe.experts.93.w3", "model.layers.29.block_sparse_moe.experts.94.w3", "model.layers.29.block_sparse_moe.experts.95.w3", "model.layers.29.block_sparse_moe.experts.96.w3", "model.layers.29.block_sparse_moe.experts.97.w3", "model.layers.29.block_sparse_moe.experts.98.w3", "model.layers.29.block_sparse_moe.experts.99.w3", "model.layers.29.block_sparse_moe.experts.100.w3", "model.layers.29.block_sparse_moe.experts.101.w3", "model.layers.29.block_sparse_moe.experts.102.w3", "model.layers.29.block_sparse_moe.experts.103.w3", "model.layers.29.block_sparse_moe.experts.104.w3", "model.layers.29.block_sparse_moe.experts.105.w3", "model.layers.29.block_sparse_moe.experts.106.w3", "model.layers.29.block_sparse_moe.experts.107.w3", "model.layers.29.block_sparse_moe.experts.108.w3", "model.layers.29.block_sparse_moe.experts.109.w3", "model.layers.29.block_sparse_moe.experts.110.w3", "model.layers.29.block_sparse_moe.experts.111.w3", "model.layers.29.block_sparse_moe.experts.112.w3", "model.layers.29.block_sparse_moe.experts.113.w3", "model.layers.29.block_sparse_moe.experts.114.w3", "model.layers.29.block_sparse_moe.experts.115.w3", "model.layers.29.block_sparse_moe.experts.116.w3", "model.layers.29.block_sparse_moe.experts.117.w3", "model.layers.29.block_sparse_moe.experts.118.w3", "model.layers.29.block_sparse_moe.experts.119.w3", "model.layers.29.block_sparse_moe.experts.120.w3", "model.layers.29.block_sparse_moe.experts.121.w3", "model.layers.29.block_sparse_moe.experts.122.w3", "model.layers.29.block_sparse_moe.experts.123.w3", "model.layers.29.block_sparse_moe.experts.124.w3", "model.layers.29.block_sparse_moe.experts.125.w3", "model.layers.29.block_sparse_moe.experts.126.w3", "model.layers.29.block_sparse_moe.experts.127.w3", "model.layers.29.block_sparse_moe.experts.128.w3", "model.layers.29.block_sparse_moe.experts.129.w3", "model.layers.29.block_sparse_moe.experts.130.w3", "model.layers.29.block_sparse_moe.experts.131.w3", "model.layers.29.block_sparse_moe.experts.132.w3", "model.layers.29.block_sparse_moe.experts.133.w3", "model.layers.29.block_sparse_moe.experts.134.w3", "model.layers.29.block_sparse_moe.experts.135.w3", "model.layers.29.block_sparse_moe.experts.136.w3", "model.layers.29.block_sparse_moe.experts.137.w3", "model.layers.29.block_sparse_moe.experts.138.w3", "model.layers.29.block_sparse_moe.experts.139.w3", "model.layers.29.block_sparse_moe.experts.140.w3", "model.layers.29.block_sparse_moe.experts.141.w3", "model.layers.29.block_sparse_moe.experts.142.w3", "model.layers.29.block_sparse_moe.experts.143.w3", "model.layers.29.block_sparse_moe.experts.144.w3", "model.layers.29.block_sparse_moe.experts.145.w3", "model.layers.29.block_sparse_moe.experts.146.w3", "model.layers.29.block_sparse_moe.experts.147.w3", "model.layers.29.block_sparse_moe.experts.148.w3", "model.layers.29.block_sparse_moe.experts.149.w3", "model.layers.29.block_sparse_moe.experts.150.w3", "model.layers.29.block_sparse_moe.experts.151.w3", "model.layers.29.block_sparse_moe.experts.152.w3", "model.layers.29.block_sparse_moe.experts.153.w3", "model.layers.29.block_sparse_moe.experts.154.w3", "model.layers.29.block_sparse_moe.experts.155.w3", "model.layers.29.block_sparse_moe.experts.156.w3", "model.layers.29.block_sparse_moe.experts.157.w3", "model.layers.29.block_sparse_moe.experts.158.w3", "model.layers.29.block_sparse_moe.experts.159.w3", "model.layers.29.block_sparse_moe.experts.160.w3", "model.layers.29.block_sparse_moe.experts.161.w3", "model.layers.29.block_sparse_moe.experts.162.w3", "model.layers.29.block_sparse_moe.experts.163.w3", "model.layers.29.block_sparse_moe.experts.164.w3", "model.layers.29.block_sparse_moe.experts.165.w3", "model.layers.29.block_sparse_moe.experts.166.w3", "model.layers.29.block_sparse_moe.experts.167.w3", "model.layers.29.block_sparse_moe.experts.168.w3", "model.layers.29.block_sparse_moe.experts.169.w3", "model.layers.29.block_sparse_moe.experts.170.w3", "model.layers.29.block_sparse_moe.experts.171.w3", "model.layers.29.block_sparse_moe.experts.172.w3", "model.layers.29.block_sparse_moe.experts.173.w3", "model.layers.29.block_sparse_moe.experts.174.w3", "model.layers.29.block_sparse_moe.experts.175.w3", "model.layers.29.block_sparse_moe.experts.176.w3", "model.layers.29.block_sparse_moe.experts.177.w3", "model.layers.29.block_sparse_moe.experts.178.w3", "model.layers.29.block_sparse_moe.experts.179.w3", "model.layers.29.block_sparse_moe.experts.180.w3", "model.layers.29.block_sparse_moe.experts.181.w3", "model.layers.29.block_sparse_moe.experts.182.w3", "model.layers.29.block_sparse_moe.experts.183.w3", "model.layers.29.block_sparse_moe.experts.184.w3", "model.layers.29.block_sparse_moe.experts.185.w3", "model.layers.29.block_sparse_moe.experts.186.w3", "model.layers.29.block_sparse_moe.experts.187.w3", "model.layers.29.block_sparse_moe.experts.188.w3", "model.layers.29.block_sparse_moe.experts.189.w3", "model.layers.29.block_sparse_moe.experts.190.w3", "model.layers.29.block_sparse_moe.experts.191.w3", "model.layers.29.block_sparse_moe.experts.192.w3", "model.layers.29.block_sparse_moe.experts.193.w3", "model.layers.29.block_sparse_moe.experts.194.w3", "model.layers.29.block_sparse_moe.experts.195.w3", "model.layers.29.block_sparse_moe.experts.196.w3", "model.layers.29.block_sparse_moe.experts.197.w3", "model.layers.29.block_sparse_moe.experts.198.w3", "model.layers.29.block_sparse_moe.experts.199.w3", "model.layers.29.block_sparse_moe.experts.200.w3", "model.layers.29.block_sparse_moe.experts.201.w3", "model.layers.29.block_sparse_moe.experts.202.w3", "model.layers.29.block_sparse_moe.experts.203.w3", "model.layers.29.block_sparse_moe.experts.204.w3", "model.layers.29.block_sparse_moe.experts.205.w3", "model.layers.29.block_sparse_moe.experts.206.w3", "model.layers.29.block_sparse_moe.experts.207.w3", "model.layers.29.block_sparse_moe.experts.208.w3", "model.layers.29.block_sparse_moe.experts.209.w3", "model.layers.29.block_sparse_moe.experts.210.w3", "model.layers.29.block_sparse_moe.experts.211.w3", "model.layers.29.block_sparse_moe.experts.212.w3", "model.layers.29.block_sparse_moe.experts.213.w3", "model.layers.29.block_sparse_moe.experts.214.w3", "model.layers.29.block_sparse_moe.experts.215.w3", "model.layers.29.block_sparse_moe.experts.216.w3", "model.layers.29.block_sparse_moe.experts.217.w3", "model.layers.29.block_sparse_moe.experts.218.w3", "model.layers.29.block_sparse_moe.experts.219.w3", "model.layers.29.block_sparse_moe.experts.220.w3", "model.layers.29.block_sparse_moe.experts.221.w3", "model.layers.29.block_sparse_moe.experts.222.w3", "model.layers.29.block_sparse_moe.experts.223.w3", "model.layers.29.block_sparse_moe.experts.224.w3", "model.layers.29.block_sparse_moe.experts.225.w3", "model.layers.29.block_sparse_moe.experts.226.w3", "model.layers.29.block_sparse_moe.experts.227.w3", "model.layers.29.block_sparse_moe.experts.228.w3", "model.layers.29.block_sparse_moe.experts.229.w3", "model.layers.29.block_sparse_moe.experts.230.w3", "model.layers.29.block_sparse_moe.experts.231.w3", "model.layers.29.block_sparse_moe.experts.232.w3", "model.layers.29.block_sparse_moe.experts.233.w3", "model.layers.29.block_sparse_moe.experts.234.w3", "model.layers.29.block_sparse_moe.experts.235.w3", "model.layers.29.block_sparse_moe.experts.236.w3", "model.layers.29.block_sparse_moe.experts.237.w3", "model.layers.29.block_sparse_moe.experts.238.w3", "model.layers.29.block_sparse_moe.experts.239.w3", "model.layers.29.block_sparse_moe.experts.240.w3", "model.layers.29.block_sparse_moe.experts.241.w3", "model.layers.29.block_sparse_moe.experts.242.w3", "model.layers.29.block_sparse_moe.experts.243.w3", "model.layers.29.block_sparse_moe.experts.244.w3", "model.layers.29.block_sparse_moe.experts.245.w3", "model.layers.29.block_sparse_moe.experts.246.w3", "model.layers.29.block_sparse_moe.experts.247.w3", "model.layers.29.block_sparse_moe.experts.248.w3", "model.layers.29.block_sparse_moe.experts.249.w3", "model.layers.29.block_sparse_moe.experts.250.w3", "model.layers.29.block_sparse_moe.experts.251.w3", "model.layers.29.block_sparse_moe.experts.252.w3", "model.layers.29.block_sparse_moe.experts.253.w3", "model.layers.29.block_sparse_moe.experts.254.w3", "model.layers.29.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0008041739463806041, "dbits": 2415919104 } ] }, { "idx": 149, "layers": [ "model.layers.29.block_sparse_moe.experts.0.w2", "model.layers.29.block_sparse_moe.experts.1.w2", "model.layers.29.block_sparse_moe.experts.2.w2", "model.layers.29.block_sparse_moe.experts.3.w2", "model.layers.29.block_sparse_moe.experts.4.w2", "model.layers.29.block_sparse_moe.experts.5.w2", "model.layers.29.block_sparse_moe.experts.6.w2", "model.layers.29.block_sparse_moe.experts.7.w2", "model.layers.29.block_sparse_moe.experts.8.w2", "model.layers.29.block_sparse_moe.experts.9.w2", "model.layers.29.block_sparse_moe.experts.10.w2", "model.layers.29.block_sparse_moe.experts.11.w2", "model.layers.29.block_sparse_moe.experts.12.w2", "model.layers.29.block_sparse_moe.experts.13.w2", "model.layers.29.block_sparse_moe.experts.14.w2", "model.layers.29.block_sparse_moe.experts.15.w2", "model.layers.29.block_sparse_moe.experts.16.w2", "model.layers.29.block_sparse_moe.experts.17.w2", "model.layers.29.block_sparse_moe.experts.18.w2", "model.layers.29.block_sparse_moe.experts.19.w2", "model.layers.29.block_sparse_moe.experts.20.w2", "model.layers.29.block_sparse_moe.experts.21.w2", "model.layers.29.block_sparse_moe.experts.22.w2", "model.layers.29.block_sparse_moe.experts.23.w2", "model.layers.29.block_sparse_moe.experts.24.w2", "model.layers.29.block_sparse_moe.experts.25.w2", "model.layers.29.block_sparse_moe.experts.26.w2", "model.layers.29.block_sparse_moe.experts.27.w2", "model.layers.29.block_sparse_moe.experts.28.w2", "model.layers.29.block_sparse_moe.experts.29.w2", "model.layers.29.block_sparse_moe.experts.30.w2", "model.layers.29.block_sparse_moe.experts.31.w2", "model.layers.29.block_sparse_moe.experts.32.w2", "model.layers.29.block_sparse_moe.experts.33.w2", "model.layers.29.block_sparse_moe.experts.34.w2", "model.layers.29.block_sparse_moe.experts.35.w2", "model.layers.29.block_sparse_moe.experts.36.w2", "model.layers.29.block_sparse_moe.experts.37.w2", "model.layers.29.block_sparse_moe.experts.38.w2", "model.layers.29.block_sparse_moe.experts.39.w2", "model.layers.29.block_sparse_moe.experts.40.w2", "model.layers.29.block_sparse_moe.experts.41.w2", "model.layers.29.block_sparse_moe.experts.42.w2", "model.layers.29.block_sparse_moe.experts.43.w2", "model.layers.29.block_sparse_moe.experts.44.w2", "model.layers.29.block_sparse_moe.experts.45.w2", "model.layers.29.block_sparse_moe.experts.46.w2", "model.layers.29.block_sparse_moe.experts.47.w2", "model.layers.29.block_sparse_moe.experts.48.w2", "model.layers.29.block_sparse_moe.experts.49.w2", "model.layers.29.block_sparse_moe.experts.50.w2", "model.layers.29.block_sparse_moe.experts.51.w2", "model.layers.29.block_sparse_moe.experts.52.w2", "model.layers.29.block_sparse_moe.experts.53.w2", "model.layers.29.block_sparse_moe.experts.54.w2", "model.layers.29.block_sparse_moe.experts.55.w2", "model.layers.29.block_sparse_moe.experts.56.w2", "model.layers.29.block_sparse_moe.experts.57.w2", "model.layers.29.block_sparse_moe.experts.58.w2", "model.layers.29.block_sparse_moe.experts.59.w2", "model.layers.29.block_sparse_moe.experts.60.w2", "model.layers.29.block_sparse_moe.experts.61.w2", "model.layers.29.block_sparse_moe.experts.62.w2", "model.layers.29.block_sparse_moe.experts.63.w2", "model.layers.29.block_sparse_moe.experts.64.w2", "model.layers.29.block_sparse_moe.experts.65.w2", "model.layers.29.block_sparse_moe.experts.66.w2", "model.layers.29.block_sparse_moe.experts.67.w2", "model.layers.29.block_sparse_moe.experts.68.w2", "model.layers.29.block_sparse_moe.experts.69.w2", "model.layers.29.block_sparse_moe.experts.70.w2", "model.layers.29.block_sparse_moe.experts.71.w2", "model.layers.29.block_sparse_moe.experts.72.w2", "model.layers.29.block_sparse_moe.experts.73.w2", "model.layers.29.block_sparse_moe.experts.74.w2", "model.layers.29.block_sparse_moe.experts.75.w2", "model.layers.29.block_sparse_moe.experts.76.w2", "model.layers.29.block_sparse_moe.experts.77.w2", "model.layers.29.block_sparse_moe.experts.78.w2", "model.layers.29.block_sparse_moe.experts.79.w2", "model.layers.29.block_sparse_moe.experts.80.w2", "model.layers.29.block_sparse_moe.experts.81.w2", "model.layers.29.block_sparse_moe.experts.82.w2", "model.layers.29.block_sparse_moe.experts.83.w2", "model.layers.29.block_sparse_moe.experts.84.w2", "model.layers.29.block_sparse_moe.experts.85.w2", "model.layers.29.block_sparse_moe.experts.86.w2", "model.layers.29.block_sparse_moe.experts.87.w2", "model.layers.29.block_sparse_moe.experts.88.w2", "model.layers.29.block_sparse_moe.experts.89.w2", "model.layers.29.block_sparse_moe.experts.90.w2", "model.layers.29.block_sparse_moe.experts.91.w2", "model.layers.29.block_sparse_moe.experts.92.w2", "model.layers.29.block_sparse_moe.experts.93.w2", "model.layers.29.block_sparse_moe.experts.94.w2", "model.layers.29.block_sparse_moe.experts.95.w2", "model.layers.29.block_sparse_moe.experts.96.w2", "model.layers.29.block_sparse_moe.experts.97.w2", "model.layers.29.block_sparse_moe.experts.98.w2", "model.layers.29.block_sparse_moe.experts.99.w2", "model.layers.29.block_sparse_moe.experts.100.w2", "model.layers.29.block_sparse_moe.experts.101.w2", "model.layers.29.block_sparse_moe.experts.102.w2", "model.layers.29.block_sparse_moe.experts.103.w2", "model.layers.29.block_sparse_moe.experts.104.w2", "model.layers.29.block_sparse_moe.experts.105.w2", "model.layers.29.block_sparse_moe.experts.106.w2", "model.layers.29.block_sparse_moe.experts.107.w2", "model.layers.29.block_sparse_moe.experts.108.w2", "model.layers.29.block_sparse_moe.experts.109.w2", "model.layers.29.block_sparse_moe.experts.110.w2", "model.layers.29.block_sparse_moe.experts.111.w2", "model.layers.29.block_sparse_moe.experts.112.w2", "model.layers.29.block_sparse_moe.experts.113.w2", "model.layers.29.block_sparse_moe.experts.114.w2", "model.layers.29.block_sparse_moe.experts.115.w2", "model.layers.29.block_sparse_moe.experts.116.w2", "model.layers.29.block_sparse_moe.experts.117.w2", "model.layers.29.block_sparse_moe.experts.118.w2", "model.layers.29.block_sparse_moe.experts.119.w2", "model.layers.29.block_sparse_moe.experts.120.w2", "model.layers.29.block_sparse_moe.experts.121.w2", "model.layers.29.block_sparse_moe.experts.122.w2", "model.layers.29.block_sparse_moe.experts.123.w2", "model.layers.29.block_sparse_moe.experts.124.w2", "model.layers.29.block_sparse_moe.experts.125.w2", "model.layers.29.block_sparse_moe.experts.126.w2", "model.layers.29.block_sparse_moe.experts.127.w2", "model.layers.29.block_sparse_moe.experts.128.w2", "model.layers.29.block_sparse_moe.experts.129.w2", "model.layers.29.block_sparse_moe.experts.130.w2", "model.layers.29.block_sparse_moe.experts.131.w2", "model.layers.29.block_sparse_moe.experts.132.w2", "model.layers.29.block_sparse_moe.experts.133.w2", "model.layers.29.block_sparse_moe.experts.134.w2", "model.layers.29.block_sparse_moe.experts.135.w2", "model.layers.29.block_sparse_moe.experts.136.w2", "model.layers.29.block_sparse_moe.experts.137.w2", "model.layers.29.block_sparse_moe.experts.138.w2", "model.layers.29.block_sparse_moe.experts.139.w2", "model.layers.29.block_sparse_moe.experts.140.w2", "model.layers.29.block_sparse_moe.experts.141.w2", "model.layers.29.block_sparse_moe.experts.142.w2", "model.layers.29.block_sparse_moe.experts.143.w2", "model.layers.29.block_sparse_moe.experts.144.w2", "model.layers.29.block_sparse_moe.experts.145.w2", "model.layers.29.block_sparse_moe.experts.146.w2", "model.layers.29.block_sparse_moe.experts.147.w2", "model.layers.29.block_sparse_moe.experts.148.w2", "model.layers.29.block_sparse_moe.experts.149.w2", "model.layers.29.block_sparse_moe.experts.150.w2", "model.layers.29.block_sparse_moe.experts.151.w2", "model.layers.29.block_sparse_moe.experts.152.w2", "model.layers.29.block_sparse_moe.experts.153.w2", "model.layers.29.block_sparse_moe.experts.154.w2", "model.layers.29.block_sparse_moe.experts.155.w2", "model.layers.29.block_sparse_moe.experts.156.w2", "model.layers.29.block_sparse_moe.experts.157.w2", "model.layers.29.block_sparse_moe.experts.158.w2", "model.layers.29.block_sparse_moe.experts.159.w2", "model.layers.29.block_sparse_moe.experts.160.w2", "model.layers.29.block_sparse_moe.experts.161.w2", "model.layers.29.block_sparse_moe.experts.162.w2", "model.layers.29.block_sparse_moe.experts.163.w2", "model.layers.29.block_sparse_moe.experts.164.w2", "model.layers.29.block_sparse_moe.experts.165.w2", "model.layers.29.block_sparse_moe.experts.166.w2", "model.layers.29.block_sparse_moe.experts.167.w2", "model.layers.29.block_sparse_moe.experts.168.w2", "model.layers.29.block_sparse_moe.experts.169.w2", "model.layers.29.block_sparse_moe.experts.170.w2", "model.layers.29.block_sparse_moe.experts.171.w2", "model.layers.29.block_sparse_moe.experts.172.w2", "model.layers.29.block_sparse_moe.experts.173.w2", "model.layers.29.block_sparse_moe.experts.174.w2", "model.layers.29.block_sparse_moe.experts.175.w2", "model.layers.29.block_sparse_moe.experts.176.w2", "model.layers.29.block_sparse_moe.experts.177.w2", "model.layers.29.block_sparse_moe.experts.178.w2", "model.layers.29.block_sparse_moe.experts.179.w2", "model.layers.29.block_sparse_moe.experts.180.w2", "model.layers.29.block_sparse_moe.experts.181.w2", "model.layers.29.block_sparse_moe.experts.182.w2", "model.layers.29.block_sparse_moe.experts.183.w2", "model.layers.29.block_sparse_moe.experts.184.w2", "model.layers.29.block_sparse_moe.experts.185.w2", "model.layers.29.block_sparse_moe.experts.186.w2", "model.layers.29.block_sparse_moe.experts.187.w2", "model.layers.29.block_sparse_moe.experts.188.w2", "model.layers.29.block_sparse_moe.experts.189.w2", "model.layers.29.block_sparse_moe.experts.190.w2", "model.layers.29.block_sparse_moe.experts.191.w2", "model.layers.29.block_sparse_moe.experts.192.w2", "model.layers.29.block_sparse_moe.experts.193.w2", "model.layers.29.block_sparse_moe.experts.194.w2", "model.layers.29.block_sparse_moe.experts.195.w2", "model.layers.29.block_sparse_moe.experts.196.w2", "model.layers.29.block_sparse_moe.experts.197.w2", "model.layers.29.block_sparse_moe.experts.198.w2", "model.layers.29.block_sparse_moe.experts.199.w2", "model.layers.29.block_sparse_moe.experts.200.w2", "model.layers.29.block_sparse_moe.experts.201.w2", "model.layers.29.block_sparse_moe.experts.202.w2", "model.layers.29.block_sparse_moe.experts.203.w2", "model.layers.29.block_sparse_moe.experts.204.w2", "model.layers.29.block_sparse_moe.experts.205.w2", "model.layers.29.block_sparse_moe.experts.206.w2", "model.layers.29.block_sparse_moe.experts.207.w2", "model.layers.29.block_sparse_moe.experts.208.w2", "model.layers.29.block_sparse_moe.experts.209.w2", "model.layers.29.block_sparse_moe.experts.210.w2", "model.layers.29.block_sparse_moe.experts.211.w2", "model.layers.29.block_sparse_moe.experts.212.w2", "model.layers.29.block_sparse_moe.experts.213.w2", "model.layers.29.block_sparse_moe.experts.214.w2", "model.layers.29.block_sparse_moe.experts.215.w2", "model.layers.29.block_sparse_moe.experts.216.w2", "model.layers.29.block_sparse_moe.experts.217.w2", "model.layers.29.block_sparse_moe.experts.218.w2", "model.layers.29.block_sparse_moe.experts.219.w2", "model.layers.29.block_sparse_moe.experts.220.w2", "model.layers.29.block_sparse_moe.experts.221.w2", "model.layers.29.block_sparse_moe.experts.222.w2", "model.layers.29.block_sparse_moe.experts.223.w2", "model.layers.29.block_sparse_moe.experts.224.w2", "model.layers.29.block_sparse_moe.experts.225.w2", "model.layers.29.block_sparse_moe.experts.226.w2", "model.layers.29.block_sparse_moe.experts.227.w2", "model.layers.29.block_sparse_moe.experts.228.w2", "model.layers.29.block_sparse_moe.experts.229.w2", "model.layers.29.block_sparse_moe.experts.230.w2", "model.layers.29.block_sparse_moe.experts.231.w2", "model.layers.29.block_sparse_moe.experts.232.w2", "model.layers.29.block_sparse_moe.experts.233.w2", "model.layers.29.block_sparse_moe.experts.234.w2", "model.layers.29.block_sparse_moe.experts.235.w2", "model.layers.29.block_sparse_moe.experts.236.w2", "model.layers.29.block_sparse_moe.experts.237.w2", "model.layers.29.block_sparse_moe.experts.238.w2", "model.layers.29.block_sparse_moe.experts.239.w2", "model.layers.29.block_sparse_moe.experts.240.w2", "model.layers.29.block_sparse_moe.experts.241.w2", "model.layers.29.block_sparse_moe.experts.242.w2", "model.layers.29.block_sparse_moe.experts.243.w2", "model.layers.29.block_sparse_moe.experts.244.w2", "model.layers.29.block_sparse_moe.experts.245.w2", "model.layers.29.block_sparse_moe.experts.246.w2", "model.layers.29.block_sparse_moe.experts.247.w2", "model.layers.29.block_sparse_moe.experts.248.w2", "model.layers.29.block_sparse_moe.experts.249.w2", "model.layers.29.block_sparse_moe.experts.250.w2", "model.layers.29.block_sparse_moe.experts.251.w2", "model.layers.29.block_sparse_moe.experts.252.w2", "model.layers.29.block_sparse_moe.experts.253.w2", "model.layers.29.block_sparse_moe.experts.254.w2", "model.layers.29.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.001021645590662923, "dbits": 1207959552 } ] }, { "idx": 150, "layers": [ "model.layers.30.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0002501998096704483, "dbits": 18874368 } ] }, { "idx": 151, "layers": [ "model.layers.30.self_attn.k_proj", "model.layers.30.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0029356561601162068, "dbits": 6291456 } ] }, { "idx": 152, "layers": [ "model.layers.30.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0032961677759885677, "dbits": 18874368 } ] }, { "idx": 153, "layers": [ "model.layers.30.block_sparse_moe.experts.0.w1", "model.layers.30.block_sparse_moe.experts.1.w1", "model.layers.30.block_sparse_moe.experts.2.w1", "model.layers.30.block_sparse_moe.experts.3.w1", "model.layers.30.block_sparse_moe.experts.4.w1", "model.layers.30.block_sparse_moe.experts.5.w1", "model.layers.30.block_sparse_moe.experts.6.w1", "model.layers.30.block_sparse_moe.experts.7.w1", "model.layers.30.block_sparse_moe.experts.8.w1", "model.layers.30.block_sparse_moe.experts.9.w1", "model.layers.30.block_sparse_moe.experts.10.w1", "model.layers.30.block_sparse_moe.experts.11.w1", "model.layers.30.block_sparse_moe.experts.12.w1", "model.layers.30.block_sparse_moe.experts.13.w1", "model.layers.30.block_sparse_moe.experts.14.w1", "model.layers.30.block_sparse_moe.experts.15.w1", "model.layers.30.block_sparse_moe.experts.16.w1", "model.layers.30.block_sparse_moe.experts.17.w1", "model.layers.30.block_sparse_moe.experts.18.w1", "model.layers.30.block_sparse_moe.experts.19.w1", "model.layers.30.block_sparse_moe.experts.20.w1", "model.layers.30.block_sparse_moe.experts.21.w1", "model.layers.30.block_sparse_moe.experts.22.w1", "model.layers.30.block_sparse_moe.experts.23.w1", "model.layers.30.block_sparse_moe.experts.24.w1", "model.layers.30.block_sparse_moe.experts.25.w1", "model.layers.30.block_sparse_moe.experts.26.w1", "model.layers.30.block_sparse_moe.experts.27.w1", "model.layers.30.block_sparse_moe.experts.28.w1", "model.layers.30.block_sparse_moe.experts.29.w1", "model.layers.30.block_sparse_moe.experts.30.w1", "model.layers.30.block_sparse_moe.experts.31.w1", "model.layers.30.block_sparse_moe.experts.32.w1", "model.layers.30.block_sparse_moe.experts.33.w1", "model.layers.30.block_sparse_moe.experts.34.w1", "model.layers.30.block_sparse_moe.experts.35.w1", "model.layers.30.block_sparse_moe.experts.36.w1", "model.layers.30.block_sparse_moe.experts.37.w1", "model.layers.30.block_sparse_moe.experts.38.w1", "model.layers.30.block_sparse_moe.experts.39.w1", "model.layers.30.block_sparse_moe.experts.40.w1", "model.layers.30.block_sparse_moe.experts.41.w1", "model.layers.30.block_sparse_moe.experts.42.w1", "model.layers.30.block_sparse_moe.experts.43.w1", "model.layers.30.block_sparse_moe.experts.44.w1", "model.layers.30.block_sparse_moe.experts.45.w1", "model.layers.30.block_sparse_moe.experts.46.w1", "model.layers.30.block_sparse_moe.experts.47.w1", "model.layers.30.block_sparse_moe.experts.48.w1", "model.layers.30.block_sparse_moe.experts.49.w1", "model.layers.30.block_sparse_moe.experts.50.w1", "model.layers.30.block_sparse_moe.experts.51.w1", "model.layers.30.block_sparse_moe.experts.52.w1", "model.layers.30.block_sparse_moe.experts.53.w1", "model.layers.30.block_sparse_moe.experts.54.w1", "model.layers.30.block_sparse_moe.experts.55.w1", "model.layers.30.block_sparse_moe.experts.56.w1", "model.layers.30.block_sparse_moe.experts.57.w1", "model.layers.30.block_sparse_moe.experts.58.w1", "model.layers.30.block_sparse_moe.experts.59.w1", "model.layers.30.block_sparse_moe.experts.60.w1", "model.layers.30.block_sparse_moe.experts.61.w1", "model.layers.30.block_sparse_moe.experts.62.w1", "model.layers.30.block_sparse_moe.experts.63.w1", "model.layers.30.block_sparse_moe.experts.64.w1", "model.layers.30.block_sparse_moe.experts.65.w1", "model.layers.30.block_sparse_moe.experts.66.w1", "model.layers.30.block_sparse_moe.experts.67.w1", "model.layers.30.block_sparse_moe.experts.68.w1", "model.layers.30.block_sparse_moe.experts.69.w1", "model.layers.30.block_sparse_moe.experts.70.w1", "model.layers.30.block_sparse_moe.experts.71.w1", "model.layers.30.block_sparse_moe.experts.72.w1", "model.layers.30.block_sparse_moe.experts.73.w1", "model.layers.30.block_sparse_moe.experts.74.w1", "model.layers.30.block_sparse_moe.experts.75.w1", "model.layers.30.block_sparse_moe.experts.76.w1", "model.layers.30.block_sparse_moe.experts.77.w1", "model.layers.30.block_sparse_moe.experts.78.w1", "model.layers.30.block_sparse_moe.experts.79.w1", "model.layers.30.block_sparse_moe.experts.80.w1", "model.layers.30.block_sparse_moe.experts.81.w1", "model.layers.30.block_sparse_moe.experts.82.w1", "model.layers.30.block_sparse_moe.experts.83.w1", "model.layers.30.block_sparse_moe.experts.84.w1", "model.layers.30.block_sparse_moe.experts.85.w1", "model.layers.30.block_sparse_moe.experts.86.w1", "model.layers.30.block_sparse_moe.experts.87.w1", "model.layers.30.block_sparse_moe.experts.88.w1", "model.layers.30.block_sparse_moe.experts.89.w1", "model.layers.30.block_sparse_moe.experts.90.w1", "model.layers.30.block_sparse_moe.experts.91.w1", "model.layers.30.block_sparse_moe.experts.92.w1", "model.layers.30.block_sparse_moe.experts.93.w1", "model.layers.30.block_sparse_moe.experts.94.w1", "model.layers.30.block_sparse_moe.experts.95.w1", "model.layers.30.block_sparse_moe.experts.96.w1", "model.layers.30.block_sparse_moe.experts.97.w1", "model.layers.30.block_sparse_moe.experts.98.w1", "model.layers.30.block_sparse_moe.experts.99.w1", "model.layers.30.block_sparse_moe.experts.100.w1", "model.layers.30.block_sparse_moe.experts.101.w1", "model.layers.30.block_sparse_moe.experts.102.w1", "model.layers.30.block_sparse_moe.experts.103.w1", "model.layers.30.block_sparse_moe.experts.104.w1", "model.layers.30.block_sparse_moe.experts.105.w1", "model.layers.30.block_sparse_moe.experts.106.w1", "model.layers.30.block_sparse_moe.experts.107.w1", "model.layers.30.block_sparse_moe.experts.108.w1", "model.layers.30.block_sparse_moe.experts.109.w1", "model.layers.30.block_sparse_moe.experts.110.w1", "model.layers.30.block_sparse_moe.experts.111.w1", "model.layers.30.block_sparse_moe.experts.112.w1", "model.layers.30.block_sparse_moe.experts.113.w1", "model.layers.30.block_sparse_moe.experts.114.w1", "model.layers.30.block_sparse_moe.experts.115.w1", "model.layers.30.block_sparse_moe.experts.116.w1", "model.layers.30.block_sparse_moe.experts.117.w1", "model.layers.30.block_sparse_moe.experts.118.w1", "model.layers.30.block_sparse_moe.experts.119.w1", "model.layers.30.block_sparse_moe.experts.120.w1", "model.layers.30.block_sparse_moe.experts.121.w1", "model.layers.30.block_sparse_moe.experts.122.w1", "model.layers.30.block_sparse_moe.experts.123.w1", "model.layers.30.block_sparse_moe.experts.124.w1", "model.layers.30.block_sparse_moe.experts.125.w1", "model.layers.30.block_sparse_moe.experts.126.w1", "model.layers.30.block_sparse_moe.experts.127.w1", "model.layers.30.block_sparse_moe.experts.128.w1", "model.layers.30.block_sparse_moe.experts.129.w1", "model.layers.30.block_sparse_moe.experts.130.w1", "model.layers.30.block_sparse_moe.experts.131.w1", "model.layers.30.block_sparse_moe.experts.132.w1", "model.layers.30.block_sparse_moe.experts.133.w1", "model.layers.30.block_sparse_moe.experts.134.w1", "model.layers.30.block_sparse_moe.experts.135.w1", "model.layers.30.block_sparse_moe.experts.136.w1", "model.layers.30.block_sparse_moe.experts.137.w1", "model.layers.30.block_sparse_moe.experts.138.w1", "model.layers.30.block_sparse_moe.experts.139.w1", "model.layers.30.block_sparse_moe.experts.140.w1", "model.layers.30.block_sparse_moe.experts.141.w1", "model.layers.30.block_sparse_moe.experts.142.w1", "model.layers.30.block_sparse_moe.experts.143.w1", "model.layers.30.block_sparse_moe.experts.144.w1", "model.layers.30.block_sparse_moe.experts.145.w1", "model.layers.30.block_sparse_moe.experts.146.w1", "model.layers.30.block_sparse_moe.experts.147.w1", "model.layers.30.block_sparse_moe.experts.148.w1", "model.layers.30.block_sparse_moe.experts.149.w1", "model.layers.30.block_sparse_moe.experts.150.w1", "model.layers.30.block_sparse_moe.experts.151.w1", "model.layers.30.block_sparse_moe.experts.152.w1", "model.layers.30.block_sparse_moe.experts.153.w1", "model.layers.30.block_sparse_moe.experts.154.w1", "model.layers.30.block_sparse_moe.experts.155.w1", "model.layers.30.block_sparse_moe.experts.156.w1", "model.layers.30.block_sparse_moe.experts.157.w1", "model.layers.30.block_sparse_moe.experts.158.w1", "model.layers.30.block_sparse_moe.experts.159.w1", "model.layers.30.block_sparse_moe.experts.160.w1", "model.layers.30.block_sparse_moe.experts.161.w1", "model.layers.30.block_sparse_moe.experts.162.w1", "model.layers.30.block_sparse_moe.experts.163.w1", "model.layers.30.block_sparse_moe.experts.164.w1", "model.layers.30.block_sparse_moe.experts.165.w1", "model.layers.30.block_sparse_moe.experts.166.w1", "model.layers.30.block_sparse_moe.experts.167.w1", "model.layers.30.block_sparse_moe.experts.168.w1", "model.layers.30.block_sparse_moe.experts.169.w1", "model.layers.30.block_sparse_moe.experts.170.w1", "model.layers.30.block_sparse_moe.experts.171.w1", "model.layers.30.block_sparse_moe.experts.172.w1", "model.layers.30.block_sparse_moe.experts.173.w1", "model.layers.30.block_sparse_moe.experts.174.w1", "model.layers.30.block_sparse_moe.experts.175.w1", "model.layers.30.block_sparse_moe.experts.176.w1", "model.layers.30.block_sparse_moe.experts.177.w1", "model.layers.30.block_sparse_moe.experts.178.w1", "model.layers.30.block_sparse_moe.experts.179.w1", "model.layers.30.block_sparse_moe.experts.180.w1", "model.layers.30.block_sparse_moe.experts.181.w1", "model.layers.30.block_sparse_moe.experts.182.w1", "model.layers.30.block_sparse_moe.experts.183.w1", "model.layers.30.block_sparse_moe.experts.184.w1", "model.layers.30.block_sparse_moe.experts.185.w1", "model.layers.30.block_sparse_moe.experts.186.w1", "model.layers.30.block_sparse_moe.experts.187.w1", "model.layers.30.block_sparse_moe.experts.188.w1", "model.layers.30.block_sparse_moe.experts.189.w1", "model.layers.30.block_sparse_moe.experts.190.w1", "model.layers.30.block_sparse_moe.experts.191.w1", "model.layers.30.block_sparse_moe.experts.192.w1", "model.layers.30.block_sparse_moe.experts.193.w1", "model.layers.30.block_sparse_moe.experts.194.w1", "model.layers.30.block_sparse_moe.experts.195.w1", "model.layers.30.block_sparse_moe.experts.196.w1", "model.layers.30.block_sparse_moe.experts.197.w1", "model.layers.30.block_sparse_moe.experts.198.w1", "model.layers.30.block_sparse_moe.experts.199.w1", "model.layers.30.block_sparse_moe.experts.200.w1", "model.layers.30.block_sparse_moe.experts.201.w1", "model.layers.30.block_sparse_moe.experts.202.w1", "model.layers.30.block_sparse_moe.experts.203.w1", "model.layers.30.block_sparse_moe.experts.204.w1", "model.layers.30.block_sparse_moe.experts.205.w1", "model.layers.30.block_sparse_moe.experts.206.w1", "model.layers.30.block_sparse_moe.experts.207.w1", "model.layers.30.block_sparse_moe.experts.208.w1", "model.layers.30.block_sparse_moe.experts.209.w1", "model.layers.30.block_sparse_moe.experts.210.w1", "model.layers.30.block_sparse_moe.experts.211.w1", "model.layers.30.block_sparse_moe.experts.212.w1", "model.layers.30.block_sparse_moe.experts.213.w1", "model.layers.30.block_sparse_moe.experts.214.w1", "model.layers.30.block_sparse_moe.experts.215.w1", "model.layers.30.block_sparse_moe.experts.216.w1", "model.layers.30.block_sparse_moe.experts.217.w1", "model.layers.30.block_sparse_moe.experts.218.w1", "model.layers.30.block_sparse_moe.experts.219.w1", "model.layers.30.block_sparse_moe.experts.220.w1", "model.layers.30.block_sparse_moe.experts.221.w1", "model.layers.30.block_sparse_moe.experts.222.w1", "model.layers.30.block_sparse_moe.experts.223.w1", "model.layers.30.block_sparse_moe.experts.224.w1", "model.layers.30.block_sparse_moe.experts.225.w1", "model.layers.30.block_sparse_moe.experts.226.w1", "model.layers.30.block_sparse_moe.experts.227.w1", "model.layers.30.block_sparse_moe.experts.228.w1", "model.layers.30.block_sparse_moe.experts.229.w1", "model.layers.30.block_sparse_moe.experts.230.w1", "model.layers.30.block_sparse_moe.experts.231.w1", "model.layers.30.block_sparse_moe.experts.232.w1", "model.layers.30.block_sparse_moe.experts.233.w1", "model.layers.30.block_sparse_moe.experts.234.w1", "model.layers.30.block_sparse_moe.experts.235.w1", "model.layers.30.block_sparse_moe.experts.236.w1", "model.layers.30.block_sparse_moe.experts.237.w1", "model.layers.30.block_sparse_moe.experts.238.w1", "model.layers.30.block_sparse_moe.experts.239.w1", "model.layers.30.block_sparse_moe.experts.240.w1", "model.layers.30.block_sparse_moe.experts.241.w1", "model.layers.30.block_sparse_moe.experts.242.w1", "model.layers.30.block_sparse_moe.experts.243.w1", "model.layers.30.block_sparse_moe.experts.244.w1", "model.layers.30.block_sparse_moe.experts.245.w1", "model.layers.30.block_sparse_moe.experts.246.w1", "model.layers.30.block_sparse_moe.experts.247.w1", "model.layers.30.block_sparse_moe.experts.248.w1", "model.layers.30.block_sparse_moe.experts.249.w1", "model.layers.30.block_sparse_moe.experts.250.w1", "model.layers.30.block_sparse_moe.experts.251.w1", "model.layers.30.block_sparse_moe.experts.252.w1", "model.layers.30.block_sparse_moe.experts.253.w1", "model.layers.30.block_sparse_moe.experts.254.w1", "model.layers.30.block_sparse_moe.experts.255.w1", "model.layers.30.block_sparse_moe.experts.0.w3", "model.layers.30.block_sparse_moe.experts.1.w3", "model.layers.30.block_sparse_moe.experts.2.w3", "model.layers.30.block_sparse_moe.experts.3.w3", "model.layers.30.block_sparse_moe.experts.4.w3", "model.layers.30.block_sparse_moe.experts.5.w3", "model.layers.30.block_sparse_moe.experts.6.w3", "model.layers.30.block_sparse_moe.experts.7.w3", "model.layers.30.block_sparse_moe.experts.8.w3", "model.layers.30.block_sparse_moe.experts.9.w3", "model.layers.30.block_sparse_moe.experts.10.w3", "model.layers.30.block_sparse_moe.experts.11.w3", "model.layers.30.block_sparse_moe.experts.12.w3", "model.layers.30.block_sparse_moe.experts.13.w3", "model.layers.30.block_sparse_moe.experts.14.w3", "model.layers.30.block_sparse_moe.experts.15.w3", "model.layers.30.block_sparse_moe.experts.16.w3", "model.layers.30.block_sparse_moe.experts.17.w3", "model.layers.30.block_sparse_moe.experts.18.w3", "model.layers.30.block_sparse_moe.experts.19.w3", "model.layers.30.block_sparse_moe.experts.20.w3", "model.layers.30.block_sparse_moe.experts.21.w3", "model.layers.30.block_sparse_moe.experts.22.w3", "model.layers.30.block_sparse_moe.experts.23.w3", "model.layers.30.block_sparse_moe.experts.24.w3", "model.layers.30.block_sparse_moe.experts.25.w3", "model.layers.30.block_sparse_moe.experts.26.w3", "model.layers.30.block_sparse_moe.experts.27.w3", "model.layers.30.block_sparse_moe.experts.28.w3", "model.layers.30.block_sparse_moe.experts.29.w3", "model.layers.30.block_sparse_moe.experts.30.w3", "model.layers.30.block_sparse_moe.experts.31.w3", "model.layers.30.block_sparse_moe.experts.32.w3", "model.layers.30.block_sparse_moe.experts.33.w3", "model.layers.30.block_sparse_moe.experts.34.w3", "model.layers.30.block_sparse_moe.experts.35.w3", "model.layers.30.block_sparse_moe.experts.36.w3", "model.layers.30.block_sparse_moe.experts.37.w3", "model.layers.30.block_sparse_moe.experts.38.w3", "model.layers.30.block_sparse_moe.experts.39.w3", "model.layers.30.block_sparse_moe.experts.40.w3", "model.layers.30.block_sparse_moe.experts.41.w3", "model.layers.30.block_sparse_moe.experts.42.w3", "model.layers.30.block_sparse_moe.experts.43.w3", "model.layers.30.block_sparse_moe.experts.44.w3", "model.layers.30.block_sparse_moe.experts.45.w3", "model.layers.30.block_sparse_moe.experts.46.w3", "model.layers.30.block_sparse_moe.experts.47.w3", "model.layers.30.block_sparse_moe.experts.48.w3", "model.layers.30.block_sparse_moe.experts.49.w3", "model.layers.30.block_sparse_moe.experts.50.w3", "model.layers.30.block_sparse_moe.experts.51.w3", "model.layers.30.block_sparse_moe.experts.52.w3", "model.layers.30.block_sparse_moe.experts.53.w3", "model.layers.30.block_sparse_moe.experts.54.w3", "model.layers.30.block_sparse_moe.experts.55.w3", "model.layers.30.block_sparse_moe.experts.56.w3", "model.layers.30.block_sparse_moe.experts.57.w3", "model.layers.30.block_sparse_moe.experts.58.w3", "model.layers.30.block_sparse_moe.experts.59.w3", "model.layers.30.block_sparse_moe.experts.60.w3", "model.layers.30.block_sparse_moe.experts.61.w3", "model.layers.30.block_sparse_moe.experts.62.w3", "model.layers.30.block_sparse_moe.experts.63.w3", "model.layers.30.block_sparse_moe.experts.64.w3", "model.layers.30.block_sparse_moe.experts.65.w3", "model.layers.30.block_sparse_moe.experts.66.w3", "model.layers.30.block_sparse_moe.experts.67.w3", "model.layers.30.block_sparse_moe.experts.68.w3", "model.layers.30.block_sparse_moe.experts.69.w3", "model.layers.30.block_sparse_moe.experts.70.w3", "model.layers.30.block_sparse_moe.experts.71.w3", "model.layers.30.block_sparse_moe.experts.72.w3", "model.layers.30.block_sparse_moe.experts.73.w3", "model.layers.30.block_sparse_moe.experts.74.w3", "model.layers.30.block_sparse_moe.experts.75.w3", "model.layers.30.block_sparse_moe.experts.76.w3", "model.layers.30.block_sparse_moe.experts.77.w3", "model.layers.30.block_sparse_moe.experts.78.w3", "model.layers.30.block_sparse_moe.experts.79.w3", "model.layers.30.block_sparse_moe.experts.80.w3", "model.layers.30.block_sparse_moe.experts.81.w3", "model.layers.30.block_sparse_moe.experts.82.w3", "model.layers.30.block_sparse_moe.experts.83.w3", "model.layers.30.block_sparse_moe.experts.84.w3", "model.layers.30.block_sparse_moe.experts.85.w3", "model.layers.30.block_sparse_moe.experts.86.w3", "model.layers.30.block_sparse_moe.experts.87.w3", "model.layers.30.block_sparse_moe.experts.88.w3", "model.layers.30.block_sparse_moe.experts.89.w3", "model.layers.30.block_sparse_moe.experts.90.w3", "model.layers.30.block_sparse_moe.experts.91.w3", "model.layers.30.block_sparse_moe.experts.92.w3", "model.layers.30.block_sparse_moe.experts.93.w3", "model.layers.30.block_sparse_moe.experts.94.w3", "model.layers.30.block_sparse_moe.experts.95.w3", "model.layers.30.block_sparse_moe.experts.96.w3", "model.layers.30.block_sparse_moe.experts.97.w3", "model.layers.30.block_sparse_moe.experts.98.w3", "model.layers.30.block_sparse_moe.experts.99.w3", "model.layers.30.block_sparse_moe.experts.100.w3", "model.layers.30.block_sparse_moe.experts.101.w3", "model.layers.30.block_sparse_moe.experts.102.w3", "model.layers.30.block_sparse_moe.experts.103.w3", "model.layers.30.block_sparse_moe.experts.104.w3", "model.layers.30.block_sparse_moe.experts.105.w3", "model.layers.30.block_sparse_moe.experts.106.w3", "model.layers.30.block_sparse_moe.experts.107.w3", "model.layers.30.block_sparse_moe.experts.108.w3", "model.layers.30.block_sparse_moe.experts.109.w3", "model.layers.30.block_sparse_moe.experts.110.w3", "model.layers.30.block_sparse_moe.experts.111.w3", "model.layers.30.block_sparse_moe.experts.112.w3", "model.layers.30.block_sparse_moe.experts.113.w3", "model.layers.30.block_sparse_moe.experts.114.w3", "model.layers.30.block_sparse_moe.experts.115.w3", "model.layers.30.block_sparse_moe.experts.116.w3", "model.layers.30.block_sparse_moe.experts.117.w3", "model.layers.30.block_sparse_moe.experts.118.w3", "model.layers.30.block_sparse_moe.experts.119.w3", "model.layers.30.block_sparse_moe.experts.120.w3", "model.layers.30.block_sparse_moe.experts.121.w3", "model.layers.30.block_sparse_moe.experts.122.w3", "model.layers.30.block_sparse_moe.experts.123.w3", "model.layers.30.block_sparse_moe.experts.124.w3", "model.layers.30.block_sparse_moe.experts.125.w3", "model.layers.30.block_sparse_moe.experts.126.w3", "model.layers.30.block_sparse_moe.experts.127.w3", "model.layers.30.block_sparse_moe.experts.128.w3", "model.layers.30.block_sparse_moe.experts.129.w3", "model.layers.30.block_sparse_moe.experts.130.w3", "model.layers.30.block_sparse_moe.experts.131.w3", "model.layers.30.block_sparse_moe.experts.132.w3", "model.layers.30.block_sparse_moe.experts.133.w3", "model.layers.30.block_sparse_moe.experts.134.w3", "model.layers.30.block_sparse_moe.experts.135.w3", "model.layers.30.block_sparse_moe.experts.136.w3", "model.layers.30.block_sparse_moe.experts.137.w3", "model.layers.30.block_sparse_moe.experts.138.w3", "model.layers.30.block_sparse_moe.experts.139.w3", "model.layers.30.block_sparse_moe.experts.140.w3", "model.layers.30.block_sparse_moe.experts.141.w3", "model.layers.30.block_sparse_moe.experts.142.w3", "model.layers.30.block_sparse_moe.experts.143.w3", "model.layers.30.block_sparse_moe.experts.144.w3", "model.layers.30.block_sparse_moe.experts.145.w3", "model.layers.30.block_sparse_moe.experts.146.w3", "model.layers.30.block_sparse_moe.experts.147.w3", "model.layers.30.block_sparse_moe.experts.148.w3", "model.layers.30.block_sparse_moe.experts.149.w3", "model.layers.30.block_sparse_moe.experts.150.w3", "model.layers.30.block_sparse_moe.experts.151.w3", "model.layers.30.block_sparse_moe.experts.152.w3", "model.layers.30.block_sparse_moe.experts.153.w3", "model.layers.30.block_sparse_moe.experts.154.w3", "model.layers.30.block_sparse_moe.experts.155.w3", "model.layers.30.block_sparse_moe.experts.156.w3", "model.layers.30.block_sparse_moe.experts.157.w3", "model.layers.30.block_sparse_moe.experts.158.w3", "model.layers.30.block_sparse_moe.experts.159.w3", "model.layers.30.block_sparse_moe.experts.160.w3", "model.layers.30.block_sparse_moe.experts.161.w3", "model.layers.30.block_sparse_moe.experts.162.w3", "model.layers.30.block_sparse_moe.experts.163.w3", "model.layers.30.block_sparse_moe.experts.164.w3", "model.layers.30.block_sparse_moe.experts.165.w3", "model.layers.30.block_sparse_moe.experts.166.w3", "model.layers.30.block_sparse_moe.experts.167.w3", "model.layers.30.block_sparse_moe.experts.168.w3", "model.layers.30.block_sparse_moe.experts.169.w3", "model.layers.30.block_sparse_moe.experts.170.w3", "model.layers.30.block_sparse_moe.experts.171.w3", "model.layers.30.block_sparse_moe.experts.172.w3", "model.layers.30.block_sparse_moe.experts.173.w3", "model.layers.30.block_sparse_moe.experts.174.w3", "model.layers.30.block_sparse_moe.experts.175.w3", "model.layers.30.block_sparse_moe.experts.176.w3", "model.layers.30.block_sparse_moe.experts.177.w3", "model.layers.30.block_sparse_moe.experts.178.w3", "model.layers.30.block_sparse_moe.experts.179.w3", "model.layers.30.block_sparse_moe.experts.180.w3", "model.layers.30.block_sparse_moe.experts.181.w3", "model.layers.30.block_sparse_moe.experts.182.w3", "model.layers.30.block_sparse_moe.experts.183.w3", "model.layers.30.block_sparse_moe.experts.184.w3", "model.layers.30.block_sparse_moe.experts.185.w3", "model.layers.30.block_sparse_moe.experts.186.w3", "model.layers.30.block_sparse_moe.experts.187.w3", "model.layers.30.block_sparse_moe.experts.188.w3", "model.layers.30.block_sparse_moe.experts.189.w3", "model.layers.30.block_sparse_moe.experts.190.w3", "model.layers.30.block_sparse_moe.experts.191.w3", "model.layers.30.block_sparse_moe.experts.192.w3", "model.layers.30.block_sparse_moe.experts.193.w3", "model.layers.30.block_sparse_moe.experts.194.w3", "model.layers.30.block_sparse_moe.experts.195.w3", "model.layers.30.block_sparse_moe.experts.196.w3", "model.layers.30.block_sparse_moe.experts.197.w3", "model.layers.30.block_sparse_moe.experts.198.w3", "model.layers.30.block_sparse_moe.experts.199.w3", "model.layers.30.block_sparse_moe.experts.200.w3", "model.layers.30.block_sparse_moe.experts.201.w3", "model.layers.30.block_sparse_moe.experts.202.w3", "model.layers.30.block_sparse_moe.experts.203.w3", "model.layers.30.block_sparse_moe.experts.204.w3", "model.layers.30.block_sparse_moe.experts.205.w3", "model.layers.30.block_sparse_moe.experts.206.w3", "model.layers.30.block_sparse_moe.experts.207.w3", "model.layers.30.block_sparse_moe.experts.208.w3", "model.layers.30.block_sparse_moe.experts.209.w3", "model.layers.30.block_sparse_moe.experts.210.w3", "model.layers.30.block_sparse_moe.experts.211.w3", "model.layers.30.block_sparse_moe.experts.212.w3", "model.layers.30.block_sparse_moe.experts.213.w3", "model.layers.30.block_sparse_moe.experts.214.w3", "model.layers.30.block_sparse_moe.experts.215.w3", "model.layers.30.block_sparse_moe.experts.216.w3", "model.layers.30.block_sparse_moe.experts.217.w3", "model.layers.30.block_sparse_moe.experts.218.w3", "model.layers.30.block_sparse_moe.experts.219.w3", "model.layers.30.block_sparse_moe.experts.220.w3", "model.layers.30.block_sparse_moe.experts.221.w3", "model.layers.30.block_sparse_moe.experts.222.w3", "model.layers.30.block_sparse_moe.experts.223.w3", "model.layers.30.block_sparse_moe.experts.224.w3", "model.layers.30.block_sparse_moe.experts.225.w3", "model.layers.30.block_sparse_moe.experts.226.w3", "model.layers.30.block_sparse_moe.experts.227.w3", "model.layers.30.block_sparse_moe.experts.228.w3", "model.layers.30.block_sparse_moe.experts.229.w3", "model.layers.30.block_sparse_moe.experts.230.w3", "model.layers.30.block_sparse_moe.experts.231.w3", "model.layers.30.block_sparse_moe.experts.232.w3", "model.layers.30.block_sparse_moe.experts.233.w3", "model.layers.30.block_sparse_moe.experts.234.w3", "model.layers.30.block_sparse_moe.experts.235.w3", "model.layers.30.block_sparse_moe.experts.236.w3", "model.layers.30.block_sparse_moe.experts.237.w3", "model.layers.30.block_sparse_moe.experts.238.w3", "model.layers.30.block_sparse_moe.experts.239.w3", "model.layers.30.block_sparse_moe.experts.240.w3", "model.layers.30.block_sparse_moe.experts.241.w3", "model.layers.30.block_sparse_moe.experts.242.w3", "model.layers.30.block_sparse_moe.experts.243.w3", "model.layers.30.block_sparse_moe.experts.244.w3", "model.layers.30.block_sparse_moe.experts.245.w3", "model.layers.30.block_sparse_moe.experts.246.w3", "model.layers.30.block_sparse_moe.experts.247.w3", "model.layers.30.block_sparse_moe.experts.248.w3", "model.layers.30.block_sparse_moe.experts.249.w3", "model.layers.30.block_sparse_moe.experts.250.w3", "model.layers.30.block_sparse_moe.experts.251.w3", "model.layers.30.block_sparse_moe.experts.252.w3", "model.layers.30.block_sparse_moe.experts.253.w3", "model.layers.30.block_sparse_moe.experts.254.w3", "model.layers.30.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0005616303533315659, "dbits": 2415919104 } ] }, { "idx": 154, "layers": [ "model.layers.30.block_sparse_moe.experts.0.w2", "model.layers.30.block_sparse_moe.experts.1.w2", "model.layers.30.block_sparse_moe.experts.2.w2", "model.layers.30.block_sparse_moe.experts.3.w2", "model.layers.30.block_sparse_moe.experts.4.w2", "model.layers.30.block_sparse_moe.experts.5.w2", "model.layers.30.block_sparse_moe.experts.6.w2", "model.layers.30.block_sparse_moe.experts.7.w2", "model.layers.30.block_sparse_moe.experts.8.w2", "model.layers.30.block_sparse_moe.experts.9.w2", "model.layers.30.block_sparse_moe.experts.10.w2", "model.layers.30.block_sparse_moe.experts.11.w2", "model.layers.30.block_sparse_moe.experts.12.w2", "model.layers.30.block_sparse_moe.experts.13.w2", "model.layers.30.block_sparse_moe.experts.14.w2", "model.layers.30.block_sparse_moe.experts.15.w2", "model.layers.30.block_sparse_moe.experts.16.w2", "model.layers.30.block_sparse_moe.experts.17.w2", "model.layers.30.block_sparse_moe.experts.18.w2", "model.layers.30.block_sparse_moe.experts.19.w2", "model.layers.30.block_sparse_moe.experts.20.w2", "model.layers.30.block_sparse_moe.experts.21.w2", "model.layers.30.block_sparse_moe.experts.22.w2", "model.layers.30.block_sparse_moe.experts.23.w2", "model.layers.30.block_sparse_moe.experts.24.w2", "model.layers.30.block_sparse_moe.experts.25.w2", "model.layers.30.block_sparse_moe.experts.26.w2", "model.layers.30.block_sparse_moe.experts.27.w2", "model.layers.30.block_sparse_moe.experts.28.w2", "model.layers.30.block_sparse_moe.experts.29.w2", "model.layers.30.block_sparse_moe.experts.30.w2", "model.layers.30.block_sparse_moe.experts.31.w2", "model.layers.30.block_sparse_moe.experts.32.w2", "model.layers.30.block_sparse_moe.experts.33.w2", "model.layers.30.block_sparse_moe.experts.34.w2", "model.layers.30.block_sparse_moe.experts.35.w2", "model.layers.30.block_sparse_moe.experts.36.w2", "model.layers.30.block_sparse_moe.experts.37.w2", "model.layers.30.block_sparse_moe.experts.38.w2", "model.layers.30.block_sparse_moe.experts.39.w2", "model.layers.30.block_sparse_moe.experts.40.w2", "model.layers.30.block_sparse_moe.experts.41.w2", "model.layers.30.block_sparse_moe.experts.42.w2", "model.layers.30.block_sparse_moe.experts.43.w2", "model.layers.30.block_sparse_moe.experts.44.w2", "model.layers.30.block_sparse_moe.experts.45.w2", "model.layers.30.block_sparse_moe.experts.46.w2", "model.layers.30.block_sparse_moe.experts.47.w2", "model.layers.30.block_sparse_moe.experts.48.w2", "model.layers.30.block_sparse_moe.experts.49.w2", "model.layers.30.block_sparse_moe.experts.50.w2", "model.layers.30.block_sparse_moe.experts.51.w2", "model.layers.30.block_sparse_moe.experts.52.w2", "model.layers.30.block_sparse_moe.experts.53.w2", "model.layers.30.block_sparse_moe.experts.54.w2", "model.layers.30.block_sparse_moe.experts.55.w2", "model.layers.30.block_sparse_moe.experts.56.w2", "model.layers.30.block_sparse_moe.experts.57.w2", "model.layers.30.block_sparse_moe.experts.58.w2", "model.layers.30.block_sparse_moe.experts.59.w2", "model.layers.30.block_sparse_moe.experts.60.w2", "model.layers.30.block_sparse_moe.experts.61.w2", "model.layers.30.block_sparse_moe.experts.62.w2", "model.layers.30.block_sparse_moe.experts.63.w2", "model.layers.30.block_sparse_moe.experts.64.w2", "model.layers.30.block_sparse_moe.experts.65.w2", "model.layers.30.block_sparse_moe.experts.66.w2", "model.layers.30.block_sparse_moe.experts.67.w2", "model.layers.30.block_sparse_moe.experts.68.w2", "model.layers.30.block_sparse_moe.experts.69.w2", "model.layers.30.block_sparse_moe.experts.70.w2", "model.layers.30.block_sparse_moe.experts.71.w2", "model.layers.30.block_sparse_moe.experts.72.w2", "model.layers.30.block_sparse_moe.experts.73.w2", "model.layers.30.block_sparse_moe.experts.74.w2", "model.layers.30.block_sparse_moe.experts.75.w2", "model.layers.30.block_sparse_moe.experts.76.w2", "model.layers.30.block_sparse_moe.experts.77.w2", "model.layers.30.block_sparse_moe.experts.78.w2", "model.layers.30.block_sparse_moe.experts.79.w2", "model.layers.30.block_sparse_moe.experts.80.w2", "model.layers.30.block_sparse_moe.experts.81.w2", "model.layers.30.block_sparse_moe.experts.82.w2", "model.layers.30.block_sparse_moe.experts.83.w2", "model.layers.30.block_sparse_moe.experts.84.w2", "model.layers.30.block_sparse_moe.experts.85.w2", "model.layers.30.block_sparse_moe.experts.86.w2", "model.layers.30.block_sparse_moe.experts.87.w2", "model.layers.30.block_sparse_moe.experts.88.w2", "model.layers.30.block_sparse_moe.experts.89.w2", "model.layers.30.block_sparse_moe.experts.90.w2", "model.layers.30.block_sparse_moe.experts.91.w2", "model.layers.30.block_sparse_moe.experts.92.w2", "model.layers.30.block_sparse_moe.experts.93.w2", "model.layers.30.block_sparse_moe.experts.94.w2", "model.layers.30.block_sparse_moe.experts.95.w2", "model.layers.30.block_sparse_moe.experts.96.w2", "model.layers.30.block_sparse_moe.experts.97.w2", "model.layers.30.block_sparse_moe.experts.98.w2", "model.layers.30.block_sparse_moe.experts.99.w2", "model.layers.30.block_sparse_moe.experts.100.w2", "model.layers.30.block_sparse_moe.experts.101.w2", "model.layers.30.block_sparse_moe.experts.102.w2", "model.layers.30.block_sparse_moe.experts.103.w2", "model.layers.30.block_sparse_moe.experts.104.w2", "model.layers.30.block_sparse_moe.experts.105.w2", "model.layers.30.block_sparse_moe.experts.106.w2", "model.layers.30.block_sparse_moe.experts.107.w2", "model.layers.30.block_sparse_moe.experts.108.w2", "model.layers.30.block_sparse_moe.experts.109.w2", "model.layers.30.block_sparse_moe.experts.110.w2", "model.layers.30.block_sparse_moe.experts.111.w2", "model.layers.30.block_sparse_moe.experts.112.w2", "model.layers.30.block_sparse_moe.experts.113.w2", "model.layers.30.block_sparse_moe.experts.114.w2", "model.layers.30.block_sparse_moe.experts.115.w2", "model.layers.30.block_sparse_moe.experts.116.w2", "model.layers.30.block_sparse_moe.experts.117.w2", "model.layers.30.block_sparse_moe.experts.118.w2", "model.layers.30.block_sparse_moe.experts.119.w2", "model.layers.30.block_sparse_moe.experts.120.w2", "model.layers.30.block_sparse_moe.experts.121.w2", "model.layers.30.block_sparse_moe.experts.122.w2", "model.layers.30.block_sparse_moe.experts.123.w2", "model.layers.30.block_sparse_moe.experts.124.w2", "model.layers.30.block_sparse_moe.experts.125.w2", "model.layers.30.block_sparse_moe.experts.126.w2", "model.layers.30.block_sparse_moe.experts.127.w2", "model.layers.30.block_sparse_moe.experts.128.w2", "model.layers.30.block_sparse_moe.experts.129.w2", "model.layers.30.block_sparse_moe.experts.130.w2", "model.layers.30.block_sparse_moe.experts.131.w2", "model.layers.30.block_sparse_moe.experts.132.w2", "model.layers.30.block_sparse_moe.experts.133.w2", "model.layers.30.block_sparse_moe.experts.134.w2", "model.layers.30.block_sparse_moe.experts.135.w2", "model.layers.30.block_sparse_moe.experts.136.w2", "model.layers.30.block_sparse_moe.experts.137.w2", "model.layers.30.block_sparse_moe.experts.138.w2", "model.layers.30.block_sparse_moe.experts.139.w2", "model.layers.30.block_sparse_moe.experts.140.w2", "model.layers.30.block_sparse_moe.experts.141.w2", "model.layers.30.block_sparse_moe.experts.142.w2", "model.layers.30.block_sparse_moe.experts.143.w2", "model.layers.30.block_sparse_moe.experts.144.w2", "model.layers.30.block_sparse_moe.experts.145.w2", "model.layers.30.block_sparse_moe.experts.146.w2", "model.layers.30.block_sparse_moe.experts.147.w2", "model.layers.30.block_sparse_moe.experts.148.w2", "model.layers.30.block_sparse_moe.experts.149.w2", "model.layers.30.block_sparse_moe.experts.150.w2", "model.layers.30.block_sparse_moe.experts.151.w2", "model.layers.30.block_sparse_moe.experts.152.w2", "model.layers.30.block_sparse_moe.experts.153.w2", "model.layers.30.block_sparse_moe.experts.154.w2", "model.layers.30.block_sparse_moe.experts.155.w2", "model.layers.30.block_sparse_moe.experts.156.w2", "model.layers.30.block_sparse_moe.experts.157.w2", "model.layers.30.block_sparse_moe.experts.158.w2", "model.layers.30.block_sparse_moe.experts.159.w2", "model.layers.30.block_sparse_moe.experts.160.w2", "model.layers.30.block_sparse_moe.experts.161.w2", "model.layers.30.block_sparse_moe.experts.162.w2", "model.layers.30.block_sparse_moe.experts.163.w2", "model.layers.30.block_sparse_moe.experts.164.w2", "model.layers.30.block_sparse_moe.experts.165.w2", "model.layers.30.block_sparse_moe.experts.166.w2", "model.layers.30.block_sparse_moe.experts.167.w2", "model.layers.30.block_sparse_moe.experts.168.w2", "model.layers.30.block_sparse_moe.experts.169.w2", "model.layers.30.block_sparse_moe.experts.170.w2", "model.layers.30.block_sparse_moe.experts.171.w2", "model.layers.30.block_sparse_moe.experts.172.w2", "model.layers.30.block_sparse_moe.experts.173.w2", "model.layers.30.block_sparse_moe.experts.174.w2", "model.layers.30.block_sparse_moe.experts.175.w2", "model.layers.30.block_sparse_moe.experts.176.w2", "model.layers.30.block_sparse_moe.experts.177.w2", "model.layers.30.block_sparse_moe.experts.178.w2", "model.layers.30.block_sparse_moe.experts.179.w2", "model.layers.30.block_sparse_moe.experts.180.w2", "model.layers.30.block_sparse_moe.experts.181.w2", "model.layers.30.block_sparse_moe.experts.182.w2", "model.layers.30.block_sparse_moe.experts.183.w2", "model.layers.30.block_sparse_moe.experts.184.w2", "model.layers.30.block_sparse_moe.experts.185.w2", "model.layers.30.block_sparse_moe.experts.186.w2", "model.layers.30.block_sparse_moe.experts.187.w2", "model.layers.30.block_sparse_moe.experts.188.w2", "model.layers.30.block_sparse_moe.experts.189.w2", "model.layers.30.block_sparse_moe.experts.190.w2", "model.layers.30.block_sparse_moe.experts.191.w2", "model.layers.30.block_sparse_moe.experts.192.w2", "model.layers.30.block_sparse_moe.experts.193.w2", "model.layers.30.block_sparse_moe.experts.194.w2", "model.layers.30.block_sparse_moe.experts.195.w2", "model.layers.30.block_sparse_moe.experts.196.w2", "model.layers.30.block_sparse_moe.experts.197.w2", "model.layers.30.block_sparse_moe.experts.198.w2", "model.layers.30.block_sparse_moe.experts.199.w2", "model.layers.30.block_sparse_moe.experts.200.w2", "model.layers.30.block_sparse_moe.experts.201.w2", "model.layers.30.block_sparse_moe.experts.202.w2", "model.layers.30.block_sparse_moe.experts.203.w2", "model.layers.30.block_sparse_moe.experts.204.w2", "model.layers.30.block_sparse_moe.experts.205.w2", "model.layers.30.block_sparse_moe.experts.206.w2", "model.layers.30.block_sparse_moe.experts.207.w2", "model.layers.30.block_sparse_moe.experts.208.w2", "model.layers.30.block_sparse_moe.experts.209.w2", "model.layers.30.block_sparse_moe.experts.210.w2", "model.layers.30.block_sparse_moe.experts.211.w2", "model.layers.30.block_sparse_moe.experts.212.w2", "model.layers.30.block_sparse_moe.experts.213.w2", "model.layers.30.block_sparse_moe.experts.214.w2", "model.layers.30.block_sparse_moe.experts.215.w2", "model.layers.30.block_sparse_moe.experts.216.w2", "model.layers.30.block_sparse_moe.experts.217.w2", "model.layers.30.block_sparse_moe.experts.218.w2", "model.layers.30.block_sparse_moe.experts.219.w2", "model.layers.30.block_sparse_moe.experts.220.w2", "model.layers.30.block_sparse_moe.experts.221.w2", "model.layers.30.block_sparse_moe.experts.222.w2", "model.layers.30.block_sparse_moe.experts.223.w2", "model.layers.30.block_sparse_moe.experts.224.w2", "model.layers.30.block_sparse_moe.experts.225.w2", "model.layers.30.block_sparse_moe.experts.226.w2", "model.layers.30.block_sparse_moe.experts.227.w2", "model.layers.30.block_sparse_moe.experts.228.w2", "model.layers.30.block_sparse_moe.experts.229.w2", "model.layers.30.block_sparse_moe.experts.230.w2", "model.layers.30.block_sparse_moe.experts.231.w2", "model.layers.30.block_sparse_moe.experts.232.w2", "model.layers.30.block_sparse_moe.experts.233.w2", "model.layers.30.block_sparse_moe.experts.234.w2", "model.layers.30.block_sparse_moe.experts.235.w2", "model.layers.30.block_sparse_moe.experts.236.w2", "model.layers.30.block_sparse_moe.experts.237.w2", "model.layers.30.block_sparse_moe.experts.238.w2", "model.layers.30.block_sparse_moe.experts.239.w2", "model.layers.30.block_sparse_moe.experts.240.w2", "model.layers.30.block_sparse_moe.experts.241.w2", "model.layers.30.block_sparse_moe.experts.242.w2", "model.layers.30.block_sparse_moe.experts.243.w2", "model.layers.30.block_sparse_moe.experts.244.w2", "model.layers.30.block_sparse_moe.experts.245.w2", "model.layers.30.block_sparse_moe.experts.246.w2", "model.layers.30.block_sparse_moe.experts.247.w2", "model.layers.30.block_sparse_moe.experts.248.w2", "model.layers.30.block_sparse_moe.experts.249.w2", "model.layers.30.block_sparse_moe.experts.250.w2", "model.layers.30.block_sparse_moe.experts.251.w2", "model.layers.30.block_sparse_moe.experts.252.w2", "model.layers.30.block_sparse_moe.experts.253.w2", "model.layers.30.block_sparse_moe.experts.254.w2", "model.layers.30.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0006692141294479259, "dbits": 1207959552 } ] }, { "idx": 155, "layers": [ "model.layers.31.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0014709312468766944, "dbits": 18874368 } ] }, { "idx": 156, "layers": [ "model.layers.31.self_attn.k_proj", "model.layers.31.self_attn.v_proj" ], "candidates": [ { "dkld": -0.004489528015255928, "dbits": 6291456 } ] }, { "idx": 157, "layers": [ "model.layers.31.self_attn.o_proj" ], "candidates": [ { "dkld": 0.004029864072799649, "dbits": 18874368 } ] }, { "idx": 158, "layers": [ "model.layers.31.block_sparse_moe.experts.0.w1", "model.layers.31.block_sparse_moe.experts.1.w1", "model.layers.31.block_sparse_moe.experts.2.w1", "model.layers.31.block_sparse_moe.experts.3.w1", "model.layers.31.block_sparse_moe.experts.4.w1", "model.layers.31.block_sparse_moe.experts.5.w1", "model.layers.31.block_sparse_moe.experts.6.w1", "model.layers.31.block_sparse_moe.experts.7.w1", "model.layers.31.block_sparse_moe.experts.8.w1", "model.layers.31.block_sparse_moe.experts.9.w1", "model.layers.31.block_sparse_moe.experts.10.w1", "model.layers.31.block_sparse_moe.experts.11.w1", "model.layers.31.block_sparse_moe.experts.12.w1", "model.layers.31.block_sparse_moe.experts.13.w1", "model.layers.31.block_sparse_moe.experts.14.w1", "model.layers.31.block_sparse_moe.experts.15.w1", "model.layers.31.block_sparse_moe.experts.16.w1", "model.layers.31.block_sparse_moe.experts.17.w1", "model.layers.31.block_sparse_moe.experts.18.w1", "model.layers.31.block_sparse_moe.experts.19.w1", "model.layers.31.block_sparse_moe.experts.20.w1", "model.layers.31.block_sparse_moe.experts.21.w1", "model.layers.31.block_sparse_moe.experts.22.w1", "model.layers.31.block_sparse_moe.experts.23.w1", "model.layers.31.block_sparse_moe.experts.24.w1", "model.layers.31.block_sparse_moe.experts.25.w1", "model.layers.31.block_sparse_moe.experts.26.w1", "model.layers.31.block_sparse_moe.experts.27.w1", "model.layers.31.block_sparse_moe.experts.28.w1", "model.layers.31.block_sparse_moe.experts.29.w1", "model.layers.31.block_sparse_moe.experts.30.w1", "model.layers.31.block_sparse_moe.experts.31.w1", "model.layers.31.block_sparse_moe.experts.32.w1", "model.layers.31.block_sparse_moe.experts.33.w1", "model.layers.31.block_sparse_moe.experts.34.w1", "model.layers.31.block_sparse_moe.experts.35.w1", "model.layers.31.block_sparse_moe.experts.36.w1", "model.layers.31.block_sparse_moe.experts.37.w1", "model.layers.31.block_sparse_moe.experts.38.w1", "model.layers.31.block_sparse_moe.experts.39.w1", "model.layers.31.block_sparse_moe.experts.40.w1", "model.layers.31.block_sparse_moe.experts.41.w1", "model.layers.31.block_sparse_moe.experts.42.w1", "model.layers.31.block_sparse_moe.experts.43.w1", "model.layers.31.block_sparse_moe.experts.44.w1", "model.layers.31.block_sparse_moe.experts.45.w1", "model.layers.31.block_sparse_moe.experts.46.w1", "model.layers.31.block_sparse_moe.experts.47.w1", "model.layers.31.block_sparse_moe.experts.48.w1", "model.layers.31.block_sparse_moe.experts.49.w1", "model.layers.31.block_sparse_moe.experts.50.w1", "model.layers.31.block_sparse_moe.experts.51.w1", "model.layers.31.block_sparse_moe.experts.52.w1", "model.layers.31.block_sparse_moe.experts.53.w1", "model.layers.31.block_sparse_moe.experts.54.w1", "model.layers.31.block_sparse_moe.experts.55.w1", "model.layers.31.block_sparse_moe.experts.56.w1", "model.layers.31.block_sparse_moe.experts.57.w1", "model.layers.31.block_sparse_moe.experts.58.w1", "model.layers.31.block_sparse_moe.experts.59.w1", "model.layers.31.block_sparse_moe.experts.60.w1", "model.layers.31.block_sparse_moe.experts.61.w1", "model.layers.31.block_sparse_moe.experts.62.w1", "model.layers.31.block_sparse_moe.experts.63.w1", "model.layers.31.block_sparse_moe.experts.64.w1", "model.layers.31.block_sparse_moe.experts.65.w1", "model.layers.31.block_sparse_moe.experts.66.w1", "model.layers.31.block_sparse_moe.experts.67.w1", "model.layers.31.block_sparse_moe.experts.68.w1", "model.layers.31.block_sparse_moe.experts.69.w1", "model.layers.31.block_sparse_moe.experts.70.w1", "model.layers.31.block_sparse_moe.experts.71.w1", "model.layers.31.block_sparse_moe.experts.72.w1", "model.layers.31.block_sparse_moe.experts.73.w1", "model.layers.31.block_sparse_moe.experts.74.w1", "model.layers.31.block_sparse_moe.experts.75.w1", "model.layers.31.block_sparse_moe.experts.76.w1", "model.layers.31.block_sparse_moe.experts.77.w1", "model.layers.31.block_sparse_moe.experts.78.w1", "model.layers.31.block_sparse_moe.experts.79.w1", "model.layers.31.block_sparse_moe.experts.80.w1", "model.layers.31.block_sparse_moe.experts.81.w1", "model.layers.31.block_sparse_moe.experts.82.w1", "model.layers.31.block_sparse_moe.experts.83.w1", "model.layers.31.block_sparse_moe.experts.84.w1", "model.layers.31.block_sparse_moe.experts.85.w1", "model.layers.31.block_sparse_moe.experts.86.w1", "model.layers.31.block_sparse_moe.experts.87.w1", "model.layers.31.block_sparse_moe.experts.88.w1", "model.layers.31.block_sparse_moe.experts.89.w1", "model.layers.31.block_sparse_moe.experts.90.w1", "model.layers.31.block_sparse_moe.experts.91.w1", "model.layers.31.block_sparse_moe.experts.92.w1", "model.layers.31.block_sparse_moe.experts.93.w1", "model.layers.31.block_sparse_moe.experts.94.w1", "model.layers.31.block_sparse_moe.experts.95.w1", "model.layers.31.block_sparse_moe.experts.96.w1", "model.layers.31.block_sparse_moe.experts.97.w1", "model.layers.31.block_sparse_moe.experts.98.w1", "model.layers.31.block_sparse_moe.experts.99.w1", "model.layers.31.block_sparse_moe.experts.100.w1", "model.layers.31.block_sparse_moe.experts.101.w1", "model.layers.31.block_sparse_moe.experts.102.w1", "model.layers.31.block_sparse_moe.experts.103.w1", "model.layers.31.block_sparse_moe.experts.104.w1", "model.layers.31.block_sparse_moe.experts.105.w1", "model.layers.31.block_sparse_moe.experts.106.w1", "model.layers.31.block_sparse_moe.experts.107.w1", "model.layers.31.block_sparse_moe.experts.108.w1", "model.layers.31.block_sparse_moe.experts.109.w1", "model.layers.31.block_sparse_moe.experts.110.w1", "model.layers.31.block_sparse_moe.experts.111.w1", "model.layers.31.block_sparse_moe.experts.112.w1", "model.layers.31.block_sparse_moe.experts.113.w1", "model.layers.31.block_sparse_moe.experts.114.w1", "model.layers.31.block_sparse_moe.experts.115.w1", "model.layers.31.block_sparse_moe.experts.116.w1", "model.layers.31.block_sparse_moe.experts.117.w1", "model.layers.31.block_sparse_moe.experts.118.w1", "model.layers.31.block_sparse_moe.experts.119.w1", "model.layers.31.block_sparse_moe.experts.120.w1", "model.layers.31.block_sparse_moe.experts.121.w1", "model.layers.31.block_sparse_moe.experts.122.w1", "model.layers.31.block_sparse_moe.experts.123.w1", "model.layers.31.block_sparse_moe.experts.124.w1", "model.layers.31.block_sparse_moe.experts.125.w1", "model.layers.31.block_sparse_moe.experts.126.w1", "model.layers.31.block_sparse_moe.experts.127.w1", "model.layers.31.block_sparse_moe.experts.128.w1", "model.layers.31.block_sparse_moe.experts.129.w1", "model.layers.31.block_sparse_moe.experts.130.w1", "model.layers.31.block_sparse_moe.experts.131.w1", "model.layers.31.block_sparse_moe.experts.132.w1", "model.layers.31.block_sparse_moe.experts.133.w1", "model.layers.31.block_sparse_moe.experts.134.w1", "model.layers.31.block_sparse_moe.experts.135.w1", "model.layers.31.block_sparse_moe.experts.136.w1", "model.layers.31.block_sparse_moe.experts.137.w1", "model.layers.31.block_sparse_moe.experts.138.w1", "model.layers.31.block_sparse_moe.experts.139.w1", "model.layers.31.block_sparse_moe.experts.140.w1", "model.layers.31.block_sparse_moe.experts.141.w1", "model.layers.31.block_sparse_moe.experts.142.w1", "model.layers.31.block_sparse_moe.experts.143.w1", "model.layers.31.block_sparse_moe.experts.144.w1", "model.layers.31.block_sparse_moe.experts.145.w1", "model.layers.31.block_sparse_moe.experts.146.w1", "model.layers.31.block_sparse_moe.experts.147.w1", "model.layers.31.block_sparse_moe.experts.148.w1", "model.layers.31.block_sparse_moe.experts.149.w1", "model.layers.31.block_sparse_moe.experts.150.w1", "model.layers.31.block_sparse_moe.experts.151.w1", "model.layers.31.block_sparse_moe.experts.152.w1", "model.layers.31.block_sparse_moe.experts.153.w1", "model.layers.31.block_sparse_moe.experts.154.w1", "model.layers.31.block_sparse_moe.experts.155.w1", "model.layers.31.block_sparse_moe.experts.156.w1", "model.layers.31.block_sparse_moe.experts.157.w1", "model.layers.31.block_sparse_moe.experts.158.w1", "model.layers.31.block_sparse_moe.experts.159.w1", "model.layers.31.block_sparse_moe.experts.160.w1", "model.layers.31.block_sparse_moe.experts.161.w1", "model.layers.31.block_sparse_moe.experts.162.w1", "model.layers.31.block_sparse_moe.experts.163.w1", "model.layers.31.block_sparse_moe.experts.164.w1", "model.layers.31.block_sparse_moe.experts.165.w1", "model.layers.31.block_sparse_moe.experts.166.w1", "model.layers.31.block_sparse_moe.experts.167.w1", "model.layers.31.block_sparse_moe.experts.168.w1", "model.layers.31.block_sparse_moe.experts.169.w1", "model.layers.31.block_sparse_moe.experts.170.w1", "model.layers.31.block_sparse_moe.experts.171.w1", "model.layers.31.block_sparse_moe.experts.172.w1", "model.layers.31.block_sparse_moe.experts.173.w1", "model.layers.31.block_sparse_moe.experts.174.w1", "model.layers.31.block_sparse_moe.experts.175.w1", "model.layers.31.block_sparse_moe.experts.176.w1", "model.layers.31.block_sparse_moe.experts.177.w1", "model.layers.31.block_sparse_moe.experts.178.w1", "model.layers.31.block_sparse_moe.experts.179.w1", "model.layers.31.block_sparse_moe.experts.180.w1", "model.layers.31.block_sparse_moe.experts.181.w1", "model.layers.31.block_sparse_moe.experts.182.w1", "model.layers.31.block_sparse_moe.experts.183.w1", "model.layers.31.block_sparse_moe.experts.184.w1", "model.layers.31.block_sparse_moe.experts.185.w1", "model.layers.31.block_sparse_moe.experts.186.w1", "model.layers.31.block_sparse_moe.experts.187.w1", "model.layers.31.block_sparse_moe.experts.188.w1", "model.layers.31.block_sparse_moe.experts.189.w1", "model.layers.31.block_sparse_moe.experts.190.w1", "model.layers.31.block_sparse_moe.experts.191.w1", "model.layers.31.block_sparse_moe.experts.192.w1", "model.layers.31.block_sparse_moe.experts.193.w1", "model.layers.31.block_sparse_moe.experts.194.w1", "model.layers.31.block_sparse_moe.experts.195.w1", "model.layers.31.block_sparse_moe.experts.196.w1", "model.layers.31.block_sparse_moe.experts.197.w1", "model.layers.31.block_sparse_moe.experts.198.w1", "model.layers.31.block_sparse_moe.experts.199.w1", "model.layers.31.block_sparse_moe.experts.200.w1", "model.layers.31.block_sparse_moe.experts.201.w1", "model.layers.31.block_sparse_moe.experts.202.w1", "model.layers.31.block_sparse_moe.experts.203.w1", "model.layers.31.block_sparse_moe.experts.204.w1", "model.layers.31.block_sparse_moe.experts.205.w1", "model.layers.31.block_sparse_moe.experts.206.w1", "model.layers.31.block_sparse_moe.experts.207.w1", "model.layers.31.block_sparse_moe.experts.208.w1", "model.layers.31.block_sparse_moe.experts.209.w1", "model.layers.31.block_sparse_moe.experts.210.w1", "model.layers.31.block_sparse_moe.experts.211.w1", "model.layers.31.block_sparse_moe.experts.212.w1", "model.layers.31.block_sparse_moe.experts.213.w1", "model.layers.31.block_sparse_moe.experts.214.w1", "model.layers.31.block_sparse_moe.experts.215.w1", "model.layers.31.block_sparse_moe.experts.216.w1", "model.layers.31.block_sparse_moe.experts.217.w1", "model.layers.31.block_sparse_moe.experts.218.w1", "model.layers.31.block_sparse_moe.experts.219.w1", "model.layers.31.block_sparse_moe.experts.220.w1", "model.layers.31.block_sparse_moe.experts.221.w1", "model.layers.31.block_sparse_moe.experts.222.w1", "model.layers.31.block_sparse_moe.experts.223.w1", "model.layers.31.block_sparse_moe.experts.224.w1", "model.layers.31.block_sparse_moe.experts.225.w1", "model.layers.31.block_sparse_moe.experts.226.w1", "model.layers.31.block_sparse_moe.experts.227.w1", "model.layers.31.block_sparse_moe.experts.228.w1", "model.layers.31.block_sparse_moe.experts.229.w1", "model.layers.31.block_sparse_moe.experts.230.w1", "model.layers.31.block_sparse_moe.experts.231.w1", "model.layers.31.block_sparse_moe.experts.232.w1", "model.layers.31.block_sparse_moe.experts.233.w1", "model.layers.31.block_sparse_moe.experts.234.w1", "model.layers.31.block_sparse_moe.experts.235.w1", "model.layers.31.block_sparse_moe.experts.236.w1", "model.layers.31.block_sparse_moe.experts.237.w1", "model.layers.31.block_sparse_moe.experts.238.w1", "model.layers.31.block_sparse_moe.experts.239.w1", "model.layers.31.block_sparse_moe.experts.240.w1", "model.layers.31.block_sparse_moe.experts.241.w1", "model.layers.31.block_sparse_moe.experts.242.w1", "model.layers.31.block_sparse_moe.experts.243.w1", "model.layers.31.block_sparse_moe.experts.244.w1", "model.layers.31.block_sparse_moe.experts.245.w1", "model.layers.31.block_sparse_moe.experts.246.w1", "model.layers.31.block_sparse_moe.experts.247.w1", "model.layers.31.block_sparse_moe.experts.248.w1", "model.layers.31.block_sparse_moe.experts.249.w1", "model.layers.31.block_sparse_moe.experts.250.w1", "model.layers.31.block_sparse_moe.experts.251.w1", "model.layers.31.block_sparse_moe.experts.252.w1", "model.layers.31.block_sparse_moe.experts.253.w1", "model.layers.31.block_sparse_moe.experts.254.w1", "model.layers.31.block_sparse_moe.experts.255.w1", "model.layers.31.block_sparse_moe.experts.0.w3", "model.layers.31.block_sparse_moe.experts.1.w3", "model.layers.31.block_sparse_moe.experts.2.w3", "model.layers.31.block_sparse_moe.experts.3.w3", "model.layers.31.block_sparse_moe.experts.4.w3", "model.layers.31.block_sparse_moe.experts.5.w3", "model.layers.31.block_sparse_moe.experts.6.w3", "model.layers.31.block_sparse_moe.experts.7.w3", "model.layers.31.block_sparse_moe.experts.8.w3", "model.layers.31.block_sparse_moe.experts.9.w3", "model.layers.31.block_sparse_moe.experts.10.w3", "model.layers.31.block_sparse_moe.experts.11.w3", "model.layers.31.block_sparse_moe.experts.12.w3", "model.layers.31.block_sparse_moe.experts.13.w3", "model.layers.31.block_sparse_moe.experts.14.w3", "model.layers.31.block_sparse_moe.experts.15.w3", "model.layers.31.block_sparse_moe.experts.16.w3", "model.layers.31.block_sparse_moe.experts.17.w3", "model.layers.31.block_sparse_moe.experts.18.w3", "model.layers.31.block_sparse_moe.experts.19.w3", "model.layers.31.block_sparse_moe.experts.20.w3", "model.layers.31.block_sparse_moe.experts.21.w3", "model.layers.31.block_sparse_moe.experts.22.w3", "model.layers.31.block_sparse_moe.experts.23.w3", "model.layers.31.block_sparse_moe.experts.24.w3", "model.layers.31.block_sparse_moe.experts.25.w3", "model.layers.31.block_sparse_moe.experts.26.w3", "model.layers.31.block_sparse_moe.experts.27.w3", "model.layers.31.block_sparse_moe.experts.28.w3", "model.layers.31.block_sparse_moe.experts.29.w3", "model.layers.31.block_sparse_moe.experts.30.w3", "model.layers.31.block_sparse_moe.experts.31.w3", "model.layers.31.block_sparse_moe.experts.32.w3", "model.layers.31.block_sparse_moe.experts.33.w3", "model.layers.31.block_sparse_moe.experts.34.w3", "model.layers.31.block_sparse_moe.experts.35.w3", "model.layers.31.block_sparse_moe.experts.36.w3", "model.layers.31.block_sparse_moe.experts.37.w3", "model.layers.31.block_sparse_moe.experts.38.w3", "model.layers.31.block_sparse_moe.experts.39.w3", "model.layers.31.block_sparse_moe.experts.40.w3", "model.layers.31.block_sparse_moe.experts.41.w3", "model.layers.31.block_sparse_moe.experts.42.w3", "model.layers.31.block_sparse_moe.experts.43.w3", "model.layers.31.block_sparse_moe.experts.44.w3", "model.layers.31.block_sparse_moe.experts.45.w3", "model.layers.31.block_sparse_moe.experts.46.w3", "model.layers.31.block_sparse_moe.experts.47.w3", "model.layers.31.block_sparse_moe.experts.48.w3", "model.layers.31.block_sparse_moe.experts.49.w3", "model.layers.31.block_sparse_moe.experts.50.w3", "model.layers.31.block_sparse_moe.experts.51.w3", "model.layers.31.block_sparse_moe.experts.52.w3", "model.layers.31.block_sparse_moe.experts.53.w3", "model.layers.31.block_sparse_moe.experts.54.w3", "model.layers.31.block_sparse_moe.experts.55.w3", "model.layers.31.block_sparse_moe.experts.56.w3", "model.layers.31.block_sparse_moe.experts.57.w3", "model.layers.31.block_sparse_moe.experts.58.w3", "model.layers.31.block_sparse_moe.experts.59.w3", "model.layers.31.block_sparse_moe.experts.60.w3", "model.layers.31.block_sparse_moe.experts.61.w3", "model.layers.31.block_sparse_moe.experts.62.w3", "model.layers.31.block_sparse_moe.experts.63.w3", "model.layers.31.block_sparse_moe.experts.64.w3", "model.layers.31.block_sparse_moe.experts.65.w3", "model.layers.31.block_sparse_moe.experts.66.w3", "model.layers.31.block_sparse_moe.experts.67.w3", "model.layers.31.block_sparse_moe.experts.68.w3", "model.layers.31.block_sparse_moe.experts.69.w3", "model.layers.31.block_sparse_moe.experts.70.w3", "model.layers.31.block_sparse_moe.experts.71.w3", "model.layers.31.block_sparse_moe.experts.72.w3", "model.layers.31.block_sparse_moe.experts.73.w3", "model.layers.31.block_sparse_moe.experts.74.w3", "model.layers.31.block_sparse_moe.experts.75.w3", "model.layers.31.block_sparse_moe.experts.76.w3", "model.layers.31.block_sparse_moe.experts.77.w3", "model.layers.31.block_sparse_moe.experts.78.w3", "model.layers.31.block_sparse_moe.experts.79.w3", "model.layers.31.block_sparse_moe.experts.80.w3", "model.layers.31.block_sparse_moe.experts.81.w3", "model.layers.31.block_sparse_moe.experts.82.w3", "model.layers.31.block_sparse_moe.experts.83.w3", "model.layers.31.block_sparse_moe.experts.84.w3", "model.layers.31.block_sparse_moe.experts.85.w3", "model.layers.31.block_sparse_moe.experts.86.w3", "model.layers.31.block_sparse_moe.experts.87.w3", "model.layers.31.block_sparse_moe.experts.88.w3", "model.layers.31.block_sparse_moe.experts.89.w3", "model.layers.31.block_sparse_moe.experts.90.w3", "model.layers.31.block_sparse_moe.experts.91.w3", "model.layers.31.block_sparse_moe.experts.92.w3", "model.layers.31.block_sparse_moe.experts.93.w3", "model.layers.31.block_sparse_moe.experts.94.w3", "model.layers.31.block_sparse_moe.experts.95.w3", "model.layers.31.block_sparse_moe.experts.96.w3", "model.layers.31.block_sparse_moe.experts.97.w3", "model.layers.31.block_sparse_moe.experts.98.w3", "model.layers.31.block_sparse_moe.experts.99.w3", "model.layers.31.block_sparse_moe.experts.100.w3", "model.layers.31.block_sparse_moe.experts.101.w3", "model.layers.31.block_sparse_moe.experts.102.w3", "model.layers.31.block_sparse_moe.experts.103.w3", "model.layers.31.block_sparse_moe.experts.104.w3", "model.layers.31.block_sparse_moe.experts.105.w3", "model.layers.31.block_sparse_moe.experts.106.w3", "model.layers.31.block_sparse_moe.experts.107.w3", "model.layers.31.block_sparse_moe.experts.108.w3", "model.layers.31.block_sparse_moe.experts.109.w3", "model.layers.31.block_sparse_moe.experts.110.w3", "model.layers.31.block_sparse_moe.experts.111.w3", "model.layers.31.block_sparse_moe.experts.112.w3", "model.layers.31.block_sparse_moe.experts.113.w3", "model.layers.31.block_sparse_moe.experts.114.w3", "model.layers.31.block_sparse_moe.experts.115.w3", "model.layers.31.block_sparse_moe.experts.116.w3", "model.layers.31.block_sparse_moe.experts.117.w3", "model.layers.31.block_sparse_moe.experts.118.w3", "model.layers.31.block_sparse_moe.experts.119.w3", "model.layers.31.block_sparse_moe.experts.120.w3", "model.layers.31.block_sparse_moe.experts.121.w3", "model.layers.31.block_sparse_moe.experts.122.w3", "model.layers.31.block_sparse_moe.experts.123.w3", "model.layers.31.block_sparse_moe.experts.124.w3", "model.layers.31.block_sparse_moe.experts.125.w3", "model.layers.31.block_sparse_moe.experts.126.w3", "model.layers.31.block_sparse_moe.experts.127.w3", "model.layers.31.block_sparse_moe.experts.128.w3", "model.layers.31.block_sparse_moe.experts.129.w3", "model.layers.31.block_sparse_moe.experts.130.w3", "model.layers.31.block_sparse_moe.experts.131.w3", "model.layers.31.block_sparse_moe.experts.132.w3", "model.layers.31.block_sparse_moe.experts.133.w3", "model.layers.31.block_sparse_moe.experts.134.w3", "model.layers.31.block_sparse_moe.experts.135.w3", "model.layers.31.block_sparse_moe.experts.136.w3", "model.layers.31.block_sparse_moe.experts.137.w3", "model.layers.31.block_sparse_moe.experts.138.w3", "model.layers.31.block_sparse_moe.experts.139.w3", "model.layers.31.block_sparse_moe.experts.140.w3", "model.layers.31.block_sparse_moe.experts.141.w3", "model.layers.31.block_sparse_moe.experts.142.w3", "model.layers.31.block_sparse_moe.experts.143.w3", "model.layers.31.block_sparse_moe.experts.144.w3", "model.layers.31.block_sparse_moe.experts.145.w3", "model.layers.31.block_sparse_moe.experts.146.w3", "model.layers.31.block_sparse_moe.experts.147.w3", "model.layers.31.block_sparse_moe.experts.148.w3", "model.layers.31.block_sparse_moe.experts.149.w3", "model.layers.31.block_sparse_moe.experts.150.w3", "model.layers.31.block_sparse_moe.experts.151.w3", "model.layers.31.block_sparse_moe.experts.152.w3", "model.layers.31.block_sparse_moe.experts.153.w3", "model.layers.31.block_sparse_moe.experts.154.w3", "model.layers.31.block_sparse_moe.experts.155.w3", "model.layers.31.block_sparse_moe.experts.156.w3", "model.layers.31.block_sparse_moe.experts.157.w3", "model.layers.31.block_sparse_moe.experts.158.w3", "model.layers.31.block_sparse_moe.experts.159.w3", "model.layers.31.block_sparse_moe.experts.160.w3", "model.layers.31.block_sparse_moe.experts.161.w3", "model.layers.31.block_sparse_moe.experts.162.w3", "model.layers.31.block_sparse_moe.experts.163.w3", "model.layers.31.block_sparse_moe.experts.164.w3", "model.layers.31.block_sparse_moe.experts.165.w3", "model.layers.31.block_sparse_moe.experts.166.w3", "model.layers.31.block_sparse_moe.experts.167.w3", "model.layers.31.block_sparse_moe.experts.168.w3", "model.layers.31.block_sparse_moe.experts.169.w3", "model.layers.31.block_sparse_moe.experts.170.w3", "model.layers.31.block_sparse_moe.experts.171.w3", "model.layers.31.block_sparse_moe.experts.172.w3", "model.layers.31.block_sparse_moe.experts.173.w3", "model.layers.31.block_sparse_moe.experts.174.w3", "model.layers.31.block_sparse_moe.experts.175.w3", "model.layers.31.block_sparse_moe.experts.176.w3", "model.layers.31.block_sparse_moe.experts.177.w3", "model.layers.31.block_sparse_moe.experts.178.w3", "model.layers.31.block_sparse_moe.experts.179.w3", "model.layers.31.block_sparse_moe.experts.180.w3", "model.layers.31.block_sparse_moe.experts.181.w3", "model.layers.31.block_sparse_moe.experts.182.w3", "model.layers.31.block_sparse_moe.experts.183.w3", "model.layers.31.block_sparse_moe.experts.184.w3", "model.layers.31.block_sparse_moe.experts.185.w3", "model.layers.31.block_sparse_moe.experts.186.w3", "model.layers.31.block_sparse_moe.experts.187.w3", "model.layers.31.block_sparse_moe.experts.188.w3", "model.layers.31.block_sparse_moe.experts.189.w3", "model.layers.31.block_sparse_moe.experts.190.w3", "model.layers.31.block_sparse_moe.experts.191.w3", "model.layers.31.block_sparse_moe.experts.192.w3", "model.layers.31.block_sparse_moe.experts.193.w3", "model.layers.31.block_sparse_moe.experts.194.w3", "model.layers.31.block_sparse_moe.experts.195.w3", "model.layers.31.block_sparse_moe.experts.196.w3", "model.layers.31.block_sparse_moe.experts.197.w3", "model.layers.31.block_sparse_moe.experts.198.w3", "model.layers.31.block_sparse_moe.experts.199.w3", "model.layers.31.block_sparse_moe.experts.200.w3", "model.layers.31.block_sparse_moe.experts.201.w3", "model.layers.31.block_sparse_moe.experts.202.w3", "model.layers.31.block_sparse_moe.experts.203.w3", "model.layers.31.block_sparse_moe.experts.204.w3", "model.layers.31.block_sparse_moe.experts.205.w3", "model.layers.31.block_sparse_moe.experts.206.w3", "model.layers.31.block_sparse_moe.experts.207.w3", "model.layers.31.block_sparse_moe.experts.208.w3", "model.layers.31.block_sparse_moe.experts.209.w3", "model.layers.31.block_sparse_moe.experts.210.w3", "model.layers.31.block_sparse_moe.experts.211.w3", "model.layers.31.block_sparse_moe.experts.212.w3", "model.layers.31.block_sparse_moe.experts.213.w3", "model.layers.31.block_sparse_moe.experts.214.w3", "model.layers.31.block_sparse_moe.experts.215.w3", "model.layers.31.block_sparse_moe.experts.216.w3", "model.layers.31.block_sparse_moe.experts.217.w3", "model.layers.31.block_sparse_moe.experts.218.w3", "model.layers.31.block_sparse_moe.experts.219.w3", "model.layers.31.block_sparse_moe.experts.220.w3", "model.layers.31.block_sparse_moe.experts.221.w3", "model.layers.31.block_sparse_moe.experts.222.w3", "model.layers.31.block_sparse_moe.experts.223.w3", "model.layers.31.block_sparse_moe.experts.224.w3", "model.layers.31.block_sparse_moe.experts.225.w3", "model.layers.31.block_sparse_moe.experts.226.w3", "model.layers.31.block_sparse_moe.experts.227.w3", "model.layers.31.block_sparse_moe.experts.228.w3", "model.layers.31.block_sparse_moe.experts.229.w3", "model.layers.31.block_sparse_moe.experts.230.w3", "model.layers.31.block_sparse_moe.experts.231.w3", "model.layers.31.block_sparse_moe.experts.232.w3", "model.layers.31.block_sparse_moe.experts.233.w3", "model.layers.31.block_sparse_moe.experts.234.w3", "model.layers.31.block_sparse_moe.experts.235.w3", "model.layers.31.block_sparse_moe.experts.236.w3", "model.layers.31.block_sparse_moe.experts.237.w3", "model.layers.31.block_sparse_moe.experts.238.w3", "model.layers.31.block_sparse_moe.experts.239.w3", "model.layers.31.block_sparse_moe.experts.240.w3", "model.layers.31.block_sparse_moe.experts.241.w3", "model.layers.31.block_sparse_moe.experts.242.w3", "model.layers.31.block_sparse_moe.experts.243.w3", "model.layers.31.block_sparse_moe.experts.244.w3", "model.layers.31.block_sparse_moe.experts.245.w3", "model.layers.31.block_sparse_moe.experts.246.w3", "model.layers.31.block_sparse_moe.experts.247.w3", "model.layers.31.block_sparse_moe.experts.248.w3", "model.layers.31.block_sparse_moe.experts.249.w3", "model.layers.31.block_sparse_moe.experts.250.w3", "model.layers.31.block_sparse_moe.experts.251.w3", "model.layers.31.block_sparse_moe.experts.252.w3", "model.layers.31.block_sparse_moe.experts.253.w3", "model.layers.31.block_sparse_moe.experts.254.w3", "model.layers.31.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0002626858651638142, "dbits": 2415919104 } ] }, { "idx": 159, "layers": [ "model.layers.31.block_sparse_moe.experts.0.w2", "model.layers.31.block_sparse_moe.experts.1.w2", "model.layers.31.block_sparse_moe.experts.2.w2", "model.layers.31.block_sparse_moe.experts.3.w2", "model.layers.31.block_sparse_moe.experts.4.w2", "model.layers.31.block_sparse_moe.experts.5.w2", "model.layers.31.block_sparse_moe.experts.6.w2", "model.layers.31.block_sparse_moe.experts.7.w2", "model.layers.31.block_sparse_moe.experts.8.w2", "model.layers.31.block_sparse_moe.experts.9.w2", "model.layers.31.block_sparse_moe.experts.10.w2", "model.layers.31.block_sparse_moe.experts.11.w2", "model.layers.31.block_sparse_moe.experts.12.w2", "model.layers.31.block_sparse_moe.experts.13.w2", "model.layers.31.block_sparse_moe.experts.14.w2", "model.layers.31.block_sparse_moe.experts.15.w2", "model.layers.31.block_sparse_moe.experts.16.w2", "model.layers.31.block_sparse_moe.experts.17.w2", "model.layers.31.block_sparse_moe.experts.18.w2", "model.layers.31.block_sparse_moe.experts.19.w2", "model.layers.31.block_sparse_moe.experts.20.w2", "model.layers.31.block_sparse_moe.experts.21.w2", "model.layers.31.block_sparse_moe.experts.22.w2", "model.layers.31.block_sparse_moe.experts.23.w2", "model.layers.31.block_sparse_moe.experts.24.w2", "model.layers.31.block_sparse_moe.experts.25.w2", "model.layers.31.block_sparse_moe.experts.26.w2", "model.layers.31.block_sparse_moe.experts.27.w2", "model.layers.31.block_sparse_moe.experts.28.w2", "model.layers.31.block_sparse_moe.experts.29.w2", "model.layers.31.block_sparse_moe.experts.30.w2", "model.layers.31.block_sparse_moe.experts.31.w2", "model.layers.31.block_sparse_moe.experts.32.w2", "model.layers.31.block_sparse_moe.experts.33.w2", "model.layers.31.block_sparse_moe.experts.34.w2", "model.layers.31.block_sparse_moe.experts.35.w2", "model.layers.31.block_sparse_moe.experts.36.w2", "model.layers.31.block_sparse_moe.experts.37.w2", "model.layers.31.block_sparse_moe.experts.38.w2", "model.layers.31.block_sparse_moe.experts.39.w2", "model.layers.31.block_sparse_moe.experts.40.w2", "model.layers.31.block_sparse_moe.experts.41.w2", "model.layers.31.block_sparse_moe.experts.42.w2", "model.layers.31.block_sparse_moe.experts.43.w2", "model.layers.31.block_sparse_moe.experts.44.w2", "model.layers.31.block_sparse_moe.experts.45.w2", "model.layers.31.block_sparse_moe.experts.46.w2", "model.layers.31.block_sparse_moe.experts.47.w2", "model.layers.31.block_sparse_moe.experts.48.w2", "model.layers.31.block_sparse_moe.experts.49.w2", "model.layers.31.block_sparse_moe.experts.50.w2", "model.layers.31.block_sparse_moe.experts.51.w2", "model.layers.31.block_sparse_moe.experts.52.w2", "model.layers.31.block_sparse_moe.experts.53.w2", "model.layers.31.block_sparse_moe.experts.54.w2", "model.layers.31.block_sparse_moe.experts.55.w2", "model.layers.31.block_sparse_moe.experts.56.w2", "model.layers.31.block_sparse_moe.experts.57.w2", "model.layers.31.block_sparse_moe.experts.58.w2", "model.layers.31.block_sparse_moe.experts.59.w2", "model.layers.31.block_sparse_moe.experts.60.w2", "model.layers.31.block_sparse_moe.experts.61.w2", "model.layers.31.block_sparse_moe.experts.62.w2", "model.layers.31.block_sparse_moe.experts.63.w2", "model.layers.31.block_sparse_moe.experts.64.w2", "model.layers.31.block_sparse_moe.experts.65.w2", "model.layers.31.block_sparse_moe.experts.66.w2", "model.layers.31.block_sparse_moe.experts.67.w2", "model.layers.31.block_sparse_moe.experts.68.w2", "model.layers.31.block_sparse_moe.experts.69.w2", "model.layers.31.block_sparse_moe.experts.70.w2", "model.layers.31.block_sparse_moe.experts.71.w2", "model.layers.31.block_sparse_moe.experts.72.w2", "model.layers.31.block_sparse_moe.experts.73.w2", "model.layers.31.block_sparse_moe.experts.74.w2", "model.layers.31.block_sparse_moe.experts.75.w2", "model.layers.31.block_sparse_moe.experts.76.w2", "model.layers.31.block_sparse_moe.experts.77.w2", "model.layers.31.block_sparse_moe.experts.78.w2", "model.layers.31.block_sparse_moe.experts.79.w2", "model.layers.31.block_sparse_moe.experts.80.w2", "model.layers.31.block_sparse_moe.experts.81.w2", "model.layers.31.block_sparse_moe.experts.82.w2", "model.layers.31.block_sparse_moe.experts.83.w2", "model.layers.31.block_sparse_moe.experts.84.w2", "model.layers.31.block_sparse_moe.experts.85.w2", "model.layers.31.block_sparse_moe.experts.86.w2", "model.layers.31.block_sparse_moe.experts.87.w2", "model.layers.31.block_sparse_moe.experts.88.w2", "model.layers.31.block_sparse_moe.experts.89.w2", "model.layers.31.block_sparse_moe.experts.90.w2", "model.layers.31.block_sparse_moe.experts.91.w2", "model.layers.31.block_sparse_moe.experts.92.w2", "model.layers.31.block_sparse_moe.experts.93.w2", "model.layers.31.block_sparse_moe.experts.94.w2", "model.layers.31.block_sparse_moe.experts.95.w2", "model.layers.31.block_sparse_moe.experts.96.w2", "model.layers.31.block_sparse_moe.experts.97.w2", "model.layers.31.block_sparse_moe.experts.98.w2", "model.layers.31.block_sparse_moe.experts.99.w2", "model.layers.31.block_sparse_moe.experts.100.w2", "model.layers.31.block_sparse_moe.experts.101.w2", "model.layers.31.block_sparse_moe.experts.102.w2", "model.layers.31.block_sparse_moe.experts.103.w2", "model.layers.31.block_sparse_moe.experts.104.w2", "model.layers.31.block_sparse_moe.experts.105.w2", "model.layers.31.block_sparse_moe.experts.106.w2", "model.layers.31.block_sparse_moe.experts.107.w2", "model.layers.31.block_sparse_moe.experts.108.w2", "model.layers.31.block_sparse_moe.experts.109.w2", "model.layers.31.block_sparse_moe.experts.110.w2", "model.layers.31.block_sparse_moe.experts.111.w2", "model.layers.31.block_sparse_moe.experts.112.w2", "model.layers.31.block_sparse_moe.experts.113.w2", "model.layers.31.block_sparse_moe.experts.114.w2", "model.layers.31.block_sparse_moe.experts.115.w2", "model.layers.31.block_sparse_moe.experts.116.w2", "model.layers.31.block_sparse_moe.experts.117.w2", "model.layers.31.block_sparse_moe.experts.118.w2", "model.layers.31.block_sparse_moe.experts.119.w2", "model.layers.31.block_sparse_moe.experts.120.w2", "model.layers.31.block_sparse_moe.experts.121.w2", "model.layers.31.block_sparse_moe.experts.122.w2", "model.layers.31.block_sparse_moe.experts.123.w2", "model.layers.31.block_sparse_moe.experts.124.w2", "model.layers.31.block_sparse_moe.experts.125.w2", "model.layers.31.block_sparse_moe.experts.126.w2", "model.layers.31.block_sparse_moe.experts.127.w2", "model.layers.31.block_sparse_moe.experts.128.w2", "model.layers.31.block_sparse_moe.experts.129.w2", "model.layers.31.block_sparse_moe.experts.130.w2", "model.layers.31.block_sparse_moe.experts.131.w2", "model.layers.31.block_sparse_moe.experts.132.w2", "model.layers.31.block_sparse_moe.experts.133.w2", "model.layers.31.block_sparse_moe.experts.134.w2", "model.layers.31.block_sparse_moe.experts.135.w2", "model.layers.31.block_sparse_moe.experts.136.w2", "model.layers.31.block_sparse_moe.experts.137.w2", "model.layers.31.block_sparse_moe.experts.138.w2", "model.layers.31.block_sparse_moe.experts.139.w2", "model.layers.31.block_sparse_moe.experts.140.w2", "model.layers.31.block_sparse_moe.experts.141.w2", "model.layers.31.block_sparse_moe.experts.142.w2", "model.layers.31.block_sparse_moe.experts.143.w2", "model.layers.31.block_sparse_moe.experts.144.w2", "model.layers.31.block_sparse_moe.experts.145.w2", "model.layers.31.block_sparse_moe.experts.146.w2", "model.layers.31.block_sparse_moe.experts.147.w2", "model.layers.31.block_sparse_moe.experts.148.w2", "model.layers.31.block_sparse_moe.experts.149.w2", "model.layers.31.block_sparse_moe.experts.150.w2", "model.layers.31.block_sparse_moe.experts.151.w2", "model.layers.31.block_sparse_moe.experts.152.w2", "model.layers.31.block_sparse_moe.experts.153.w2", "model.layers.31.block_sparse_moe.experts.154.w2", "model.layers.31.block_sparse_moe.experts.155.w2", "model.layers.31.block_sparse_moe.experts.156.w2", "model.layers.31.block_sparse_moe.experts.157.w2", "model.layers.31.block_sparse_moe.experts.158.w2", "model.layers.31.block_sparse_moe.experts.159.w2", "model.layers.31.block_sparse_moe.experts.160.w2", "model.layers.31.block_sparse_moe.experts.161.w2", "model.layers.31.block_sparse_moe.experts.162.w2", "model.layers.31.block_sparse_moe.experts.163.w2", "model.layers.31.block_sparse_moe.experts.164.w2", "model.layers.31.block_sparse_moe.experts.165.w2", "model.layers.31.block_sparse_moe.experts.166.w2", "model.layers.31.block_sparse_moe.experts.167.w2", "model.layers.31.block_sparse_moe.experts.168.w2", "model.layers.31.block_sparse_moe.experts.169.w2", "model.layers.31.block_sparse_moe.experts.170.w2", "model.layers.31.block_sparse_moe.experts.171.w2", "model.layers.31.block_sparse_moe.experts.172.w2", "model.layers.31.block_sparse_moe.experts.173.w2", "model.layers.31.block_sparse_moe.experts.174.w2", "model.layers.31.block_sparse_moe.experts.175.w2", "model.layers.31.block_sparse_moe.experts.176.w2", "model.layers.31.block_sparse_moe.experts.177.w2", "model.layers.31.block_sparse_moe.experts.178.w2", "model.layers.31.block_sparse_moe.experts.179.w2", "model.layers.31.block_sparse_moe.experts.180.w2", "model.layers.31.block_sparse_moe.experts.181.w2", "model.layers.31.block_sparse_moe.experts.182.w2", "model.layers.31.block_sparse_moe.experts.183.w2", "model.layers.31.block_sparse_moe.experts.184.w2", "model.layers.31.block_sparse_moe.experts.185.w2", "model.layers.31.block_sparse_moe.experts.186.w2", "model.layers.31.block_sparse_moe.experts.187.w2", "model.layers.31.block_sparse_moe.experts.188.w2", "model.layers.31.block_sparse_moe.experts.189.w2", "model.layers.31.block_sparse_moe.experts.190.w2", "model.layers.31.block_sparse_moe.experts.191.w2", "model.layers.31.block_sparse_moe.experts.192.w2", "model.layers.31.block_sparse_moe.experts.193.w2", "model.layers.31.block_sparse_moe.experts.194.w2", "model.layers.31.block_sparse_moe.experts.195.w2", "model.layers.31.block_sparse_moe.experts.196.w2", "model.layers.31.block_sparse_moe.experts.197.w2", "model.layers.31.block_sparse_moe.experts.198.w2", "model.layers.31.block_sparse_moe.experts.199.w2", "model.layers.31.block_sparse_moe.experts.200.w2", "model.layers.31.block_sparse_moe.experts.201.w2", "model.layers.31.block_sparse_moe.experts.202.w2", "model.layers.31.block_sparse_moe.experts.203.w2", "model.layers.31.block_sparse_moe.experts.204.w2", "model.layers.31.block_sparse_moe.experts.205.w2", "model.layers.31.block_sparse_moe.experts.206.w2", "model.layers.31.block_sparse_moe.experts.207.w2", "model.layers.31.block_sparse_moe.experts.208.w2", "model.layers.31.block_sparse_moe.experts.209.w2", "model.layers.31.block_sparse_moe.experts.210.w2", "model.layers.31.block_sparse_moe.experts.211.w2", "model.layers.31.block_sparse_moe.experts.212.w2", "model.layers.31.block_sparse_moe.experts.213.w2", "model.layers.31.block_sparse_moe.experts.214.w2", "model.layers.31.block_sparse_moe.experts.215.w2", "model.layers.31.block_sparse_moe.experts.216.w2", "model.layers.31.block_sparse_moe.experts.217.w2", "model.layers.31.block_sparse_moe.experts.218.w2", "model.layers.31.block_sparse_moe.experts.219.w2", "model.layers.31.block_sparse_moe.experts.220.w2", "model.layers.31.block_sparse_moe.experts.221.w2", "model.layers.31.block_sparse_moe.experts.222.w2", "model.layers.31.block_sparse_moe.experts.223.w2", "model.layers.31.block_sparse_moe.experts.224.w2", "model.layers.31.block_sparse_moe.experts.225.w2", "model.layers.31.block_sparse_moe.experts.226.w2", "model.layers.31.block_sparse_moe.experts.227.w2", "model.layers.31.block_sparse_moe.experts.228.w2", "model.layers.31.block_sparse_moe.experts.229.w2", "model.layers.31.block_sparse_moe.experts.230.w2", "model.layers.31.block_sparse_moe.experts.231.w2", "model.layers.31.block_sparse_moe.experts.232.w2", "model.layers.31.block_sparse_moe.experts.233.w2", "model.layers.31.block_sparse_moe.experts.234.w2", "model.layers.31.block_sparse_moe.experts.235.w2", "model.layers.31.block_sparse_moe.experts.236.w2", "model.layers.31.block_sparse_moe.experts.237.w2", "model.layers.31.block_sparse_moe.experts.238.w2", "model.layers.31.block_sparse_moe.experts.239.w2", "model.layers.31.block_sparse_moe.experts.240.w2", "model.layers.31.block_sparse_moe.experts.241.w2", "model.layers.31.block_sparse_moe.experts.242.w2", "model.layers.31.block_sparse_moe.experts.243.w2", "model.layers.31.block_sparse_moe.experts.244.w2", "model.layers.31.block_sparse_moe.experts.245.w2", "model.layers.31.block_sparse_moe.experts.246.w2", "model.layers.31.block_sparse_moe.experts.247.w2", "model.layers.31.block_sparse_moe.experts.248.w2", "model.layers.31.block_sparse_moe.experts.249.w2", "model.layers.31.block_sparse_moe.experts.250.w2", "model.layers.31.block_sparse_moe.experts.251.w2", "model.layers.31.block_sparse_moe.experts.252.w2", "model.layers.31.block_sparse_moe.experts.253.w2", "model.layers.31.block_sparse_moe.experts.254.w2", "model.layers.31.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00022940859198572472, "dbits": 1207959552 } ] }, { "idx": 160, "layers": [ "model.layers.32.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0012242510914802773, "dbits": 18874368 } ] }, { "idx": 161, "layers": [ "model.layers.32.self_attn.k_proj", "model.layers.32.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0010105062276125065, "dbits": 6291456 } ] }, { "idx": 162, "layers": [ "model.layers.32.self_attn.o_proj" ], "candidates": [ { "dkld": -0.005250370129942894, "dbits": 18874368 } ] }, { "idx": 163, "layers": [ "model.layers.32.block_sparse_moe.experts.0.w1", "model.layers.32.block_sparse_moe.experts.1.w1", "model.layers.32.block_sparse_moe.experts.2.w1", "model.layers.32.block_sparse_moe.experts.3.w1", "model.layers.32.block_sparse_moe.experts.4.w1", "model.layers.32.block_sparse_moe.experts.5.w1", "model.layers.32.block_sparse_moe.experts.6.w1", "model.layers.32.block_sparse_moe.experts.7.w1", "model.layers.32.block_sparse_moe.experts.8.w1", "model.layers.32.block_sparse_moe.experts.9.w1", "model.layers.32.block_sparse_moe.experts.10.w1", "model.layers.32.block_sparse_moe.experts.11.w1", "model.layers.32.block_sparse_moe.experts.12.w1", "model.layers.32.block_sparse_moe.experts.13.w1", "model.layers.32.block_sparse_moe.experts.14.w1", "model.layers.32.block_sparse_moe.experts.15.w1", "model.layers.32.block_sparse_moe.experts.16.w1", "model.layers.32.block_sparse_moe.experts.17.w1", "model.layers.32.block_sparse_moe.experts.18.w1", "model.layers.32.block_sparse_moe.experts.19.w1", "model.layers.32.block_sparse_moe.experts.20.w1", "model.layers.32.block_sparse_moe.experts.21.w1", "model.layers.32.block_sparse_moe.experts.22.w1", "model.layers.32.block_sparse_moe.experts.23.w1", "model.layers.32.block_sparse_moe.experts.24.w1", "model.layers.32.block_sparse_moe.experts.25.w1", "model.layers.32.block_sparse_moe.experts.26.w1", "model.layers.32.block_sparse_moe.experts.27.w1", "model.layers.32.block_sparse_moe.experts.28.w1", "model.layers.32.block_sparse_moe.experts.29.w1", "model.layers.32.block_sparse_moe.experts.30.w1", "model.layers.32.block_sparse_moe.experts.31.w1", "model.layers.32.block_sparse_moe.experts.32.w1", "model.layers.32.block_sparse_moe.experts.33.w1", "model.layers.32.block_sparse_moe.experts.34.w1", "model.layers.32.block_sparse_moe.experts.35.w1", "model.layers.32.block_sparse_moe.experts.36.w1", "model.layers.32.block_sparse_moe.experts.37.w1", "model.layers.32.block_sparse_moe.experts.38.w1", "model.layers.32.block_sparse_moe.experts.39.w1", "model.layers.32.block_sparse_moe.experts.40.w1", "model.layers.32.block_sparse_moe.experts.41.w1", "model.layers.32.block_sparse_moe.experts.42.w1", "model.layers.32.block_sparse_moe.experts.43.w1", "model.layers.32.block_sparse_moe.experts.44.w1", "model.layers.32.block_sparse_moe.experts.45.w1", "model.layers.32.block_sparse_moe.experts.46.w1", "model.layers.32.block_sparse_moe.experts.47.w1", "model.layers.32.block_sparse_moe.experts.48.w1", "model.layers.32.block_sparse_moe.experts.49.w1", "model.layers.32.block_sparse_moe.experts.50.w1", "model.layers.32.block_sparse_moe.experts.51.w1", "model.layers.32.block_sparse_moe.experts.52.w1", "model.layers.32.block_sparse_moe.experts.53.w1", "model.layers.32.block_sparse_moe.experts.54.w1", "model.layers.32.block_sparse_moe.experts.55.w1", "model.layers.32.block_sparse_moe.experts.56.w1", "model.layers.32.block_sparse_moe.experts.57.w1", "model.layers.32.block_sparse_moe.experts.58.w1", "model.layers.32.block_sparse_moe.experts.59.w1", "model.layers.32.block_sparse_moe.experts.60.w1", "model.layers.32.block_sparse_moe.experts.61.w1", "model.layers.32.block_sparse_moe.experts.62.w1", "model.layers.32.block_sparse_moe.experts.63.w1", "model.layers.32.block_sparse_moe.experts.64.w1", "model.layers.32.block_sparse_moe.experts.65.w1", "model.layers.32.block_sparse_moe.experts.66.w1", "model.layers.32.block_sparse_moe.experts.67.w1", "model.layers.32.block_sparse_moe.experts.68.w1", "model.layers.32.block_sparse_moe.experts.69.w1", "model.layers.32.block_sparse_moe.experts.70.w1", "model.layers.32.block_sparse_moe.experts.71.w1", "model.layers.32.block_sparse_moe.experts.72.w1", "model.layers.32.block_sparse_moe.experts.73.w1", "model.layers.32.block_sparse_moe.experts.74.w1", "model.layers.32.block_sparse_moe.experts.75.w1", "model.layers.32.block_sparse_moe.experts.76.w1", "model.layers.32.block_sparse_moe.experts.77.w1", "model.layers.32.block_sparse_moe.experts.78.w1", "model.layers.32.block_sparse_moe.experts.79.w1", "model.layers.32.block_sparse_moe.experts.80.w1", "model.layers.32.block_sparse_moe.experts.81.w1", "model.layers.32.block_sparse_moe.experts.82.w1", "model.layers.32.block_sparse_moe.experts.83.w1", "model.layers.32.block_sparse_moe.experts.84.w1", "model.layers.32.block_sparse_moe.experts.85.w1", "model.layers.32.block_sparse_moe.experts.86.w1", "model.layers.32.block_sparse_moe.experts.87.w1", "model.layers.32.block_sparse_moe.experts.88.w1", "model.layers.32.block_sparse_moe.experts.89.w1", "model.layers.32.block_sparse_moe.experts.90.w1", "model.layers.32.block_sparse_moe.experts.91.w1", "model.layers.32.block_sparse_moe.experts.92.w1", "model.layers.32.block_sparse_moe.experts.93.w1", "model.layers.32.block_sparse_moe.experts.94.w1", "model.layers.32.block_sparse_moe.experts.95.w1", "model.layers.32.block_sparse_moe.experts.96.w1", "model.layers.32.block_sparse_moe.experts.97.w1", "model.layers.32.block_sparse_moe.experts.98.w1", "model.layers.32.block_sparse_moe.experts.99.w1", "model.layers.32.block_sparse_moe.experts.100.w1", "model.layers.32.block_sparse_moe.experts.101.w1", "model.layers.32.block_sparse_moe.experts.102.w1", "model.layers.32.block_sparse_moe.experts.103.w1", "model.layers.32.block_sparse_moe.experts.104.w1", "model.layers.32.block_sparse_moe.experts.105.w1", "model.layers.32.block_sparse_moe.experts.106.w1", "model.layers.32.block_sparse_moe.experts.107.w1", "model.layers.32.block_sparse_moe.experts.108.w1", "model.layers.32.block_sparse_moe.experts.109.w1", "model.layers.32.block_sparse_moe.experts.110.w1", "model.layers.32.block_sparse_moe.experts.111.w1", "model.layers.32.block_sparse_moe.experts.112.w1", "model.layers.32.block_sparse_moe.experts.113.w1", "model.layers.32.block_sparse_moe.experts.114.w1", "model.layers.32.block_sparse_moe.experts.115.w1", "model.layers.32.block_sparse_moe.experts.116.w1", "model.layers.32.block_sparse_moe.experts.117.w1", "model.layers.32.block_sparse_moe.experts.118.w1", "model.layers.32.block_sparse_moe.experts.119.w1", "model.layers.32.block_sparse_moe.experts.120.w1", "model.layers.32.block_sparse_moe.experts.121.w1", "model.layers.32.block_sparse_moe.experts.122.w1", "model.layers.32.block_sparse_moe.experts.123.w1", "model.layers.32.block_sparse_moe.experts.124.w1", "model.layers.32.block_sparse_moe.experts.125.w1", "model.layers.32.block_sparse_moe.experts.126.w1", "model.layers.32.block_sparse_moe.experts.127.w1", "model.layers.32.block_sparse_moe.experts.128.w1", "model.layers.32.block_sparse_moe.experts.129.w1", "model.layers.32.block_sparse_moe.experts.130.w1", "model.layers.32.block_sparse_moe.experts.131.w1", "model.layers.32.block_sparse_moe.experts.132.w1", "model.layers.32.block_sparse_moe.experts.133.w1", "model.layers.32.block_sparse_moe.experts.134.w1", "model.layers.32.block_sparse_moe.experts.135.w1", "model.layers.32.block_sparse_moe.experts.136.w1", "model.layers.32.block_sparse_moe.experts.137.w1", "model.layers.32.block_sparse_moe.experts.138.w1", "model.layers.32.block_sparse_moe.experts.139.w1", "model.layers.32.block_sparse_moe.experts.140.w1", "model.layers.32.block_sparse_moe.experts.141.w1", "model.layers.32.block_sparse_moe.experts.142.w1", "model.layers.32.block_sparse_moe.experts.143.w1", "model.layers.32.block_sparse_moe.experts.144.w1", "model.layers.32.block_sparse_moe.experts.145.w1", "model.layers.32.block_sparse_moe.experts.146.w1", "model.layers.32.block_sparse_moe.experts.147.w1", "model.layers.32.block_sparse_moe.experts.148.w1", "model.layers.32.block_sparse_moe.experts.149.w1", "model.layers.32.block_sparse_moe.experts.150.w1", "model.layers.32.block_sparse_moe.experts.151.w1", "model.layers.32.block_sparse_moe.experts.152.w1", "model.layers.32.block_sparse_moe.experts.153.w1", "model.layers.32.block_sparse_moe.experts.154.w1", "model.layers.32.block_sparse_moe.experts.155.w1", "model.layers.32.block_sparse_moe.experts.156.w1", "model.layers.32.block_sparse_moe.experts.157.w1", "model.layers.32.block_sparse_moe.experts.158.w1", "model.layers.32.block_sparse_moe.experts.159.w1", "model.layers.32.block_sparse_moe.experts.160.w1", "model.layers.32.block_sparse_moe.experts.161.w1", "model.layers.32.block_sparse_moe.experts.162.w1", "model.layers.32.block_sparse_moe.experts.163.w1", "model.layers.32.block_sparse_moe.experts.164.w1", "model.layers.32.block_sparse_moe.experts.165.w1", "model.layers.32.block_sparse_moe.experts.166.w1", "model.layers.32.block_sparse_moe.experts.167.w1", "model.layers.32.block_sparse_moe.experts.168.w1", "model.layers.32.block_sparse_moe.experts.169.w1", "model.layers.32.block_sparse_moe.experts.170.w1", "model.layers.32.block_sparse_moe.experts.171.w1", "model.layers.32.block_sparse_moe.experts.172.w1", "model.layers.32.block_sparse_moe.experts.173.w1", "model.layers.32.block_sparse_moe.experts.174.w1", "model.layers.32.block_sparse_moe.experts.175.w1", "model.layers.32.block_sparse_moe.experts.176.w1", "model.layers.32.block_sparse_moe.experts.177.w1", "model.layers.32.block_sparse_moe.experts.178.w1", "model.layers.32.block_sparse_moe.experts.179.w1", "model.layers.32.block_sparse_moe.experts.180.w1", "model.layers.32.block_sparse_moe.experts.181.w1", "model.layers.32.block_sparse_moe.experts.182.w1", "model.layers.32.block_sparse_moe.experts.183.w1", "model.layers.32.block_sparse_moe.experts.184.w1", "model.layers.32.block_sparse_moe.experts.185.w1", "model.layers.32.block_sparse_moe.experts.186.w1", "model.layers.32.block_sparse_moe.experts.187.w1", "model.layers.32.block_sparse_moe.experts.188.w1", "model.layers.32.block_sparse_moe.experts.189.w1", "model.layers.32.block_sparse_moe.experts.190.w1", "model.layers.32.block_sparse_moe.experts.191.w1", "model.layers.32.block_sparse_moe.experts.192.w1", "model.layers.32.block_sparse_moe.experts.193.w1", "model.layers.32.block_sparse_moe.experts.194.w1", "model.layers.32.block_sparse_moe.experts.195.w1", "model.layers.32.block_sparse_moe.experts.196.w1", "model.layers.32.block_sparse_moe.experts.197.w1", "model.layers.32.block_sparse_moe.experts.198.w1", "model.layers.32.block_sparse_moe.experts.199.w1", "model.layers.32.block_sparse_moe.experts.200.w1", "model.layers.32.block_sparse_moe.experts.201.w1", "model.layers.32.block_sparse_moe.experts.202.w1", "model.layers.32.block_sparse_moe.experts.203.w1", "model.layers.32.block_sparse_moe.experts.204.w1", "model.layers.32.block_sparse_moe.experts.205.w1", "model.layers.32.block_sparse_moe.experts.206.w1", "model.layers.32.block_sparse_moe.experts.207.w1", "model.layers.32.block_sparse_moe.experts.208.w1", "model.layers.32.block_sparse_moe.experts.209.w1", "model.layers.32.block_sparse_moe.experts.210.w1", "model.layers.32.block_sparse_moe.experts.211.w1", "model.layers.32.block_sparse_moe.experts.212.w1", "model.layers.32.block_sparse_moe.experts.213.w1", "model.layers.32.block_sparse_moe.experts.214.w1", "model.layers.32.block_sparse_moe.experts.215.w1", "model.layers.32.block_sparse_moe.experts.216.w1", "model.layers.32.block_sparse_moe.experts.217.w1", "model.layers.32.block_sparse_moe.experts.218.w1", "model.layers.32.block_sparse_moe.experts.219.w1", "model.layers.32.block_sparse_moe.experts.220.w1", "model.layers.32.block_sparse_moe.experts.221.w1", "model.layers.32.block_sparse_moe.experts.222.w1", "model.layers.32.block_sparse_moe.experts.223.w1", "model.layers.32.block_sparse_moe.experts.224.w1", "model.layers.32.block_sparse_moe.experts.225.w1", "model.layers.32.block_sparse_moe.experts.226.w1", "model.layers.32.block_sparse_moe.experts.227.w1", "model.layers.32.block_sparse_moe.experts.228.w1", "model.layers.32.block_sparse_moe.experts.229.w1", "model.layers.32.block_sparse_moe.experts.230.w1", "model.layers.32.block_sparse_moe.experts.231.w1", "model.layers.32.block_sparse_moe.experts.232.w1", "model.layers.32.block_sparse_moe.experts.233.w1", "model.layers.32.block_sparse_moe.experts.234.w1", "model.layers.32.block_sparse_moe.experts.235.w1", "model.layers.32.block_sparse_moe.experts.236.w1", "model.layers.32.block_sparse_moe.experts.237.w1", "model.layers.32.block_sparse_moe.experts.238.w1", "model.layers.32.block_sparse_moe.experts.239.w1", "model.layers.32.block_sparse_moe.experts.240.w1", "model.layers.32.block_sparse_moe.experts.241.w1", "model.layers.32.block_sparse_moe.experts.242.w1", "model.layers.32.block_sparse_moe.experts.243.w1", "model.layers.32.block_sparse_moe.experts.244.w1", "model.layers.32.block_sparse_moe.experts.245.w1", "model.layers.32.block_sparse_moe.experts.246.w1", "model.layers.32.block_sparse_moe.experts.247.w1", "model.layers.32.block_sparse_moe.experts.248.w1", "model.layers.32.block_sparse_moe.experts.249.w1", "model.layers.32.block_sparse_moe.experts.250.w1", "model.layers.32.block_sparse_moe.experts.251.w1", "model.layers.32.block_sparse_moe.experts.252.w1", "model.layers.32.block_sparse_moe.experts.253.w1", "model.layers.32.block_sparse_moe.experts.254.w1", "model.layers.32.block_sparse_moe.experts.255.w1", "model.layers.32.block_sparse_moe.experts.0.w3", "model.layers.32.block_sparse_moe.experts.1.w3", "model.layers.32.block_sparse_moe.experts.2.w3", "model.layers.32.block_sparse_moe.experts.3.w3", "model.layers.32.block_sparse_moe.experts.4.w3", "model.layers.32.block_sparse_moe.experts.5.w3", "model.layers.32.block_sparse_moe.experts.6.w3", "model.layers.32.block_sparse_moe.experts.7.w3", "model.layers.32.block_sparse_moe.experts.8.w3", "model.layers.32.block_sparse_moe.experts.9.w3", "model.layers.32.block_sparse_moe.experts.10.w3", "model.layers.32.block_sparse_moe.experts.11.w3", "model.layers.32.block_sparse_moe.experts.12.w3", "model.layers.32.block_sparse_moe.experts.13.w3", "model.layers.32.block_sparse_moe.experts.14.w3", "model.layers.32.block_sparse_moe.experts.15.w3", "model.layers.32.block_sparse_moe.experts.16.w3", "model.layers.32.block_sparse_moe.experts.17.w3", "model.layers.32.block_sparse_moe.experts.18.w3", "model.layers.32.block_sparse_moe.experts.19.w3", "model.layers.32.block_sparse_moe.experts.20.w3", "model.layers.32.block_sparse_moe.experts.21.w3", "model.layers.32.block_sparse_moe.experts.22.w3", "model.layers.32.block_sparse_moe.experts.23.w3", "model.layers.32.block_sparse_moe.experts.24.w3", "model.layers.32.block_sparse_moe.experts.25.w3", "model.layers.32.block_sparse_moe.experts.26.w3", "model.layers.32.block_sparse_moe.experts.27.w3", "model.layers.32.block_sparse_moe.experts.28.w3", "model.layers.32.block_sparse_moe.experts.29.w3", "model.layers.32.block_sparse_moe.experts.30.w3", "model.layers.32.block_sparse_moe.experts.31.w3", "model.layers.32.block_sparse_moe.experts.32.w3", "model.layers.32.block_sparse_moe.experts.33.w3", "model.layers.32.block_sparse_moe.experts.34.w3", "model.layers.32.block_sparse_moe.experts.35.w3", "model.layers.32.block_sparse_moe.experts.36.w3", "model.layers.32.block_sparse_moe.experts.37.w3", "model.layers.32.block_sparse_moe.experts.38.w3", "model.layers.32.block_sparse_moe.experts.39.w3", "model.layers.32.block_sparse_moe.experts.40.w3", "model.layers.32.block_sparse_moe.experts.41.w3", "model.layers.32.block_sparse_moe.experts.42.w3", "model.layers.32.block_sparse_moe.experts.43.w3", "model.layers.32.block_sparse_moe.experts.44.w3", "model.layers.32.block_sparse_moe.experts.45.w3", "model.layers.32.block_sparse_moe.experts.46.w3", "model.layers.32.block_sparse_moe.experts.47.w3", "model.layers.32.block_sparse_moe.experts.48.w3", "model.layers.32.block_sparse_moe.experts.49.w3", "model.layers.32.block_sparse_moe.experts.50.w3", "model.layers.32.block_sparse_moe.experts.51.w3", "model.layers.32.block_sparse_moe.experts.52.w3", "model.layers.32.block_sparse_moe.experts.53.w3", "model.layers.32.block_sparse_moe.experts.54.w3", "model.layers.32.block_sparse_moe.experts.55.w3", "model.layers.32.block_sparse_moe.experts.56.w3", "model.layers.32.block_sparse_moe.experts.57.w3", "model.layers.32.block_sparse_moe.experts.58.w3", "model.layers.32.block_sparse_moe.experts.59.w3", "model.layers.32.block_sparse_moe.experts.60.w3", "model.layers.32.block_sparse_moe.experts.61.w3", "model.layers.32.block_sparse_moe.experts.62.w3", "model.layers.32.block_sparse_moe.experts.63.w3", "model.layers.32.block_sparse_moe.experts.64.w3", "model.layers.32.block_sparse_moe.experts.65.w3", "model.layers.32.block_sparse_moe.experts.66.w3", "model.layers.32.block_sparse_moe.experts.67.w3", "model.layers.32.block_sparse_moe.experts.68.w3", "model.layers.32.block_sparse_moe.experts.69.w3", "model.layers.32.block_sparse_moe.experts.70.w3", "model.layers.32.block_sparse_moe.experts.71.w3", "model.layers.32.block_sparse_moe.experts.72.w3", "model.layers.32.block_sparse_moe.experts.73.w3", "model.layers.32.block_sparse_moe.experts.74.w3", "model.layers.32.block_sparse_moe.experts.75.w3", "model.layers.32.block_sparse_moe.experts.76.w3", "model.layers.32.block_sparse_moe.experts.77.w3", "model.layers.32.block_sparse_moe.experts.78.w3", "model.layers.32.block_sparse_moe.experts.79.w3", "model.layers.32.block_sparse_moe.experts.80.w3", "model.layers.32.block_sparse_moe.experts.81.w3", "model.layers.32.block_sparse_moe.experts.82.w3", "model.layers.32.block_sparse_moe.experts.83.w3", "model.layers.32.block_sparse_moe.experts.84.w3", "model.layers.32.block_sparse_moe.experts.85.w3", "model.layers.32.block_sparse_moe.experts.86.w3", "model.layers.32.block_sparse_moe.experts.87.w3", "model.layers.32.block_sparse_moe.experts.88.w3", "model.layers.32.block_sparse_moe.experts.89.w3", "model.layers.32.block_sparse_moe.experts.90.w3", "model.layers.32.block_sparse_moe.experts.91.w3", "model.layers.32.block_sparse_moe.experts.92.w3", "model.layers.32.block_sparse_moe.experts.93.w3", "model.layers.32.block_sparse_moe.experts.94.w3", "model.layers.32.block_sparse_moe.experts.95.w3", "model.layers.32.block_sparse_moe.experts.96.w3", "model.layers.32.block_sparse_moe.experts.97.w3", "model.layers.32.block_sparse_moe.experts.98.w3", "model.layers.32.block_sparse_moe.experts.99.w3", "model.layers.32.block_sparse_moe.experts.100.w3", "model.layers.32.block_sparse_moe.experts.101.w3", "model.layers.32.block_sparse_moe.experts.102.w3", "model.layers.32.block_sparse_moe.experts.103.w3", "model.layers.32.block_sparse_moe.experts.104.w3", "model.layers.32.block_sparse_moe.experts.105.w3", "model.layers.32.block_sparse_moe.experts.106.w3", "model.layers.32.block_sparse_moe.experts.107.w3", "model.layers.32.block_sparse_moe.experts.108.w3", "model.layers.32.block_sparse_moe.experts.109.w3", "model.layers.32.block_sparse_moe.experts.110.w3", "model.layers.32.block_sparse_moe.experts.111.w3", "model.layers.32.block_sparse_moe.experts.112.w3", "model.layers.32.block_sparse_moe.experts.113.w3", "model.layers.32.block_sparse_moe.experts.114.w3", "model.layers.32.block_sparse_moe.experts.115.w3", "model.layers.32.block_sparse_moe.experts.116.w3", "model.layers.32.block_sparse_moe.experts.117.w3", "model.layers.32.block_sparse_moe.experts.118.w3", "model.layers.32.block_sparse_moe.experts.119.w3", "model.layers.32.block_sparse_moe.experts.120.w3", "model.layers.32.block_sparse_moe.experts.121.w3", "model.layers.32.block_sparse_moe.experts.122.w3", "model.layers.32.block_sparse_moe.experts.123.w3", "model.layers.32.block_sparse_moe.experts.124.w3", "model.layers.32.block_sparse_moe.experts.125.w3", "model.layers.32.block_sparse_moe.experts.126.w3", "model.layers.32.block_sparse_moe.experts.127.w3", "model.layers.32.block_sparse_moe.experts.128.w3", "model.layers.32.block_sparse_moe.experts.129.w3", "model.layers.32.block_sparse_moe.experts.130.w3", "model.layers.32.block_sparse_moe.experts.131.w3", "model.layers.32.block_sparse_moe.experts.132.w3", "model.layers.32.block_sparse_moe.experts.133.w3", "model.layers.32.block_sparse_moe.experts.134.w3", "model.layers.32.block_sparse_moe.experts.135.w3", "model.layers.32.block_sparse_moe.experts.136.w3", "model.layers.32.block_sparse_moe.experts.137.w3", "model.layers.32.block_sparse_moe.experts.138.w3", "model.layers.32.block_sparse_moe.experts.139.w3", "model.layers.32.block_sparse_moe.experts.140.w3", "model.layers.32.block_sparse_moe.experts.141.w3", "model.layers.32.block_sparse_moe.experts.142.w3", "model.layers.32.block_sparse_moe.experts.143.w3", "model.layers.32.block_sparse_moe.experts.144.w3", "model.layers.32.block_sparse_moe.experts.145.w3", "model.layers.32.block_sparse_moe.experts.146.w3", "model.layers.32.block_sparse_moe.experts.147.w3", "model.layers.32.block_sparse_moe.experts.148.w3", "model.layers.32.block_sparse_moe.experts.149.w3", "model.layers.32.block_sparse_moe.experts.150.w3", "model.layers.32.block_sparse_moe.experts.151.w3", "model.layers.32.block_sparse_moe.experts.152.w3", "model.layers.32.block_sparse_moe.experts.153.w3", "model.layers.32.block_sparse_moe.experts.154.w3", "model.layers.32.block_sparse_moe.experts.155.w3", "model.layers.32.block_sparse_moe.experts.156.w3", "model.layers.32.block_sparse_moe.experts.157.w3", "model.layers.32.block_sparse_moe.experts.158.w3", "model.layers.32.block_sparse_moe.experts.159.w3", "model.layers.32.block_sparse_moe.experts.160.w3", "model.layers.32.block_sparse_moe.experts.161.w3", "model.layers.32.block_sparse_moe.experts.162.w3", "model.layers.32.block_sparse_moe.experts.163.w3", "model.layers.32.block_sparse_moe.experts.164.w3", "model.layers.32.block_sparse_moe.experts.165.w3", "model.layers.32.block_sparse_moe.experts.166.w3", "model.layers.32.block_sparse_moe.experts.167.w3", "model.layers.32.block_sparse_moe.experts.168.w3", "model.layers.32.block_sparse_moe.experts.169.w3", "model.layers.32.block_sparse_moe.experts.170.w3", "model.layers.32.block_sparse_moe.experts.171.w3", "model.layers.32.block_sparse_moe.experts.172.w3", "model.layers.32.block_sparse_moe.experts.173.w3", "model.layers.32.block_sparse_moe.experts.174.w3", "model.layers.32.block_sparse_moe.experts.175.w3", "model.layers.32.block_sparse_moe.experts.176.w3", "model.layers.32.block_sparse_moe.experts.177.w3", "model.layers.32.block_sparse_moe.experts.178.w3", "model.layers.32.block_sparse_moe.experts.179.w3", "model.layers.32.block_sparse_moe.experts.180.w3", "model.layers.32.block_sparse_moe.experts.181.w3", "model.layers.32.block_sparse_moe.experts.182.w3", "model.layers.32.block_sparse_moe.experts.183.w3", "model.layers.32.block_sparse_moe.experts.184.w3", "model.layers.32.block_sparse_moe.experts.185.w3", "model.layers.32.block_sparse_moe.experts.186.w3", "model.layers.32.block_sparse_moe.experts.187.w3", "model.layers.32.block_sparse_moe.experts.188.w3", "model.layers.32.block_sparse_moe.experts.189.w3", "model.layers.32.block_sparse_moe.experts.190.w3", "model.layers.32.block_sparse_moe.experts.191.w3", "model.layers.32.block_sparse_moe.experts.192.w3", "model.layers.32.block_sparse_moe.experts.193.w3", "model.layers.32.block_sparse_moe.experts.194.w3", "model.layers.32.block_sparse_moe.experts.195.w3", "model.layers.32.block_sparse_moe.experts.196.w3", "model.layers.32.block_sparse_moe.experts.197.w3", "model.layers.32.block_sparse_moe.experts.198.w3", "model.layers.32.block_sparse_moe.experts.199.w3", "model.layers.32.block_sparse_moe.experts.200.w3", "model.layers.32.block_sparse_moe.experts.201.w3", "model.layers.32.block_sparse_moe.experts.202.w3", "model.layers.32.block_sparse_moe.experts.203.w3", "model.layers.32.block_sparse_moe.experts.204.w3", "model.layers.32.block_sparse_moe.experts.205.w3", "model.layers.32.block_sparse_moe.experts.206.w3", "model.layers.32.block_sparse_moe.experts.207.w3", "model.layers.32.block_sparse_moe.experts.208.w3", "model.layers.32.block_sparse_moe.experts.209.w3", "model.layers.32.block_sparse_moe.experts.210.w3", "model.layers.32.block_sparse_moe.experts.211.w3", "model.layers.32.block_sparse_moe.experts.212.w3", "model.layers.32.block_sparse_moe.experts.213.w3", "model.layers.32.block_sparse_moe.experts.214.w3", "model.layers.32.block_sparse_moe.experts.215.w3", "model.layers.32.block_sparse_moe.experts.216.w3", "model.layers.32.block_sparse_moe.experts.217.w3", "model.layers.32.block_sparse_moe.experts.218.w3", "model.layers.32.block_sparse_moe.experts.219.w3", "model.layers.32.block_sparse_moe.experts.220.w3", "model.layers.32.block_sparse_moe.experts.221.w3", "model.layers.32.block_sparse_moe.experts.222.w3", "model.layers.32.block_sparse_moe.experts.223.w3", "model.layers.32.block_sparse_moe.experts.224.w3", "model.layers.32.block_sparse_moe.experts.225.w3", "model.layers.32.block_sparse_moe.experts.226.w3", "model.layers.32.block_sparse_moe.experts.227.w3", "model.layers.32.block_sparse_moe.experts.228.w3", "model.layers.32.block_sparse_moe.experts.229.w3", "model.layers.32.block_sparse_moe.experts.230.w3", "model.layers.32.block_sparse_moe.experts.231.w3", "model.layers.32.block_sparse_moe.experts.232.w3", "model.layers.32.block_sparse_moe.experts.233.w3", "model.layers.32.block_sparse_moe.experts.234.w3", "model.layers.32.block_sparse_moe.experts.235.w3", "model.layers.32.block_sparse_moe.experts.236.w3", "model.layers.32.block_sparse_moe.experts.237.w3", "model.layers.32.block_sparse_moe.experts.238.w3", "model.layers.32.block_sparse_moe.experts.239.w3", "model.layers.32.block_sparse_moe.experts.240.w3", "model.layers.32.block_sparse_moe.experts.241.w3", "model.layers.32.block_sparse_moe.experts.242.w3", "model.layers.32.block_sparse_moe.experts.243.w3", "model.layers.32.block_sparse_moe.experts.244.w3", "model.layers.32.block_sparse_moe.experts.245.w3", "model.layers.32.block_sparse_moe.experts.246.w3", "model.layers.32.block_sparse_moe.experts.247.w3", "model.layers.32.block_sparse_moe.experts.248.w3", "model.layers.32.block_sparse_moe.experts.249.w3", "model.layers.32.block_sparse_moe.experts.250.w3", "model.layers.32.block_sparse_moe.experts.251.w3", "model.layers.32.block_sparse_moe.experts.252.w3", "model.layers.32.block_sparse_moe.experts.253.w3", "model.layers.32.block_sparse_moe.experts.254.w3", "model.layers.32.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0008231453597545291, "dbits": 2415919104 } ] }, { "idx": 164, "layers": [ "model.layers.32.block_sparse_moe.experts.0.w2", "model.layers.32.block_sparse_moe.experts.1.w2", "model.layers.32.block_sparse_moe.experts.2.w2", "model.layers.32.block_sparse_moe.experts.3.w2", "model.layers.32.block_sparse_moe.experts.4.w2", "model.layers.32.block_sparse_moe.experts.5.w2", "model.layers.32.block_sparse_moe.experts.6.w2", "model.layers.32.block_sparse_moe.experts.7.w2", "model.layers.32.block_sparse_moe.experts.8.w2", "model.layers.32.block_sparse_moe.experts.9.w2", "model.layers.32.block_sparse_moe.experts.10.w2", "model.layers.32.block_sparse_moe.experts.11.w2", "model.layers.32.block_sparse_moe.experts.12.w2", "model.layers.32.block_sparse_moe.experts.13.w2", "model.layers.32.block_sparse_moe.experts.14.w2", "model.layers.32.block_sparse_moe.experts.15.w2", "model.layers.32.block_sparse_moe.experts.16.w2", "model.layers.32.block_sparse_moe.experts.17.w2", "model.layers.32.block_sparse_moe.experts.18.w2", "model.layers.32.block_sparse_moe.experts.19.w2", "model.layers.32.block_sparse_moe.experts.20.w2", "model.layers.32.block_sparse_moe.experts.21.w2", "model.layers.32.block_sparse_moe.experts.22.w2", "model.layers.32.block_sparse_moe.experts.23.w2", "model.layers.32.block_sparse_moe.experts.24.w2", "model.layers.32.block_sparse_moe.experts.25.w2", "model.layers.32.block_sparse_moe.experts.26.w2", "model.layers.32.block_sparse_moe.experts.27.w2", "model.layers.32.block_sparse_moe.experts.28.w2", "model.layers.32.block_sparse_moe.experts.29.w2", "model.layers.32.block_sparse_moe.experts.30.w2", "model.layers.32.block_sparse_moe.experts.31.w2", "model.layers.32.block_sparse_moe.experts.32.w2", "model.layers.32.block_sparse_moe.experts.33.w2", "model.layers.32.block_sparse_moe.experts.34.w2", "model.layers.32.block_sparse_moe.experts.35.w2", "model.layers.32.block_sparse_moe.experts.36.w2", "model.layers.32.block_sparse_moe.experts.37.w2", "model.layers.32.block_sparse_moe.experts.38.w2", "model.layers.32.block_sparse_moe.experts.39.w2", "model.layers.32.block_sparse_moe.experts.40.w2", "model.layers.32.block_sparse_moe.experts.41.w2", "model.layers.32.block_sparse_moe.experts.42.w2", "model.layers.32.block_sparse_moe.experts.43.w2", "model.layers.32.block_sparse_moe.experts.44.w2", "model.layers.32.block_sparse_moe.experts.45.w2", "model.layers.32.block_sparse_moe.experts.46.w2", "model.layers.32.block_sparse_moe.experts.47.w2", "model.layers.32.block_sparse_moe.experts.48.w2", "model.layers.32.block_sparse_moe.experts.49.w2", "model.layers.32.block_sparse_moe.experts.50.w2", "model.layers.32.block_sparse_moe.experts.51.w2", "model.layers.32.block_sparse_moe.experts.52.w2", "model.layers.32.block_sparse_moe.experts.53.w2", "model.layers.32.block_sparse_moe.experts.54.w2", "model.layers.32.block_sparse_moe.experts.55.w2", "model.layers.32.block_sparse_moe.experts.56.w2", "model.layers.32.block_sparse_moe.experts.57.w2", "model.layers.32.block_sparse_moe.experts.58.w2", "model.layers.32.block_sparse_moe.experts.59.w2", "model.layers.32.block_sparse_moe.experts.60.w2", "model.layers.32.block_sparse_moe.experts.61.w2", "model.layers.32.block_sparse_moe.experts.62.w2", "model.layers.32.block_sparse_moe.experts.63.w2", "model.layers.32.block_sparse_moe.experts.64.w2", "model.layers.32.block_sparse_moe.experts.65.w2", "model.layers.32.block_sparse_moe.experts.66.w2", "model.layers.32.block_sparse_moe.experts.67.w2", "model.layers.32.block_sparse_moe.experts.68.w2", "model.layers.32.block_sparse_moe.experts.69.w2", "model.layers.32.block_sparse_moe.experts.70.w2", "model.layers.32.block_sparse_moe.experts.71.w2", "model.layers.32.block_sparse_moe.experts.72.w2", "model.layers.32.block_sparse_moe.experts.73.w2", "model.layers.32.block_sparse_moe.experts.74.w2", "model.layers.32.block_sparse_moe.experts.75.w2", "model.layers.32.block_sparse_moe.experts.76.w2", "model.layers.32.block_sparse_moe.experts.77.w2", "model.layers.32.block_sparse_moe.experts.78.w2", "model.layers.32.block_sparse_moe.experts.79.w2", "model.layers.32.block_sparse_moe.experts.80.w2", "model.layers.32.block_sparse_moe.experts.81.w2", "model.layers.32.block_sparse_moe.experts.82.w2", "model.layers.32.block_sparse_moe.experts.83.w2", "model.layers.32.block_sparse_moe.experts.84.w2", "model.layers.32.block_sparse_moe.experts.85.w2", "model.layers.32.block_sparse_moe.experts.86.w2", "model.layers.32.block_sparse_moe.experts.87.w2", "model.layers.32.block_sparse_moe.experts.88.w2", "model.layers.32.block_sparse_moe.experts.89.w2", "model.layers.32.block_sparse_moe.experts.90.w2", "model.layers.32.block_sparse_moe.experts.91.w2", "model.layers.32.block_sparse_moe.experts.92.w2", "model.layers.32.block_sparse_moe.experts.93.w2", "model.layers.32.block_sparse_moe.experts.94.w2", "model.layers.32.block_sparse_moe.experts.95.w2", "model.layers.32.block_sparse_moe.experts.96.w2", "model.layers.32.block_sparse_moe.experts.97.w2", "model.layers.32.block_sparse_moe.experts.98.w2", "model.layers.32.block_sparse_moe.experts.99.w2", "model.layers.32.block_sparse_moe.experts.100.w2", "model.layers.32.block_sparse_moe.experts.101.w2", "model.layers.32.block_sparse_moe.experts.102.w2", "model.layers.32.block_sparse_moe.experts.103.w2", "model.layers.32.block_sparse_moe.experts.104.w2", "model.layers.32.block_sparse_moe.experts.105.w2", "model.layers.32.block_sparse_moe.experts.106.w2", "model.layers.32.block_sparse_moe.experts.107.w2", "model.layers.32.block_sparse_moe.experts.108.w2", "model.layers.32.block_sparse_moe.experts.109.w2", "model.layers.32.block_sparse_moe.experts.110.w2", "model.layers.32.block_sparse_moe.experts.111.w2", "model.layers.32.block_sparse_moe.experts.112.w2", "model.layers.32.block_sparse_moe.experts.113.w2", "model.layers.32.block_sparse_moe.experts.114.w2", "model.layers.32.block_sparse_moe.experts.115.w2", "model.layers.32.block_sparse_moe.experts.116.w2", "model.layers.32.block_sparse_moe.experts.117.w2", "model.layers.32.block_sparse_moe.experts.118.w2", "model.layers.32.block_sparse_moe.experts.119.w2", "model.layers.32.block_sparse_moe.experts.120.w2", "model.layers.32.block_sparse_moe.experts.121.w2", "model.layers.32.block_sparse_moe.experts.122.w2", "model.layers.32.block_sparse_moe.experts.123.w2", "model.layers.32.block_sparse_moe.experts.124.w2", "model.layers.32.block_sparse_moe.experts.125.w2", "model.layers.32.block_sparse_moe.experts.126.w2", "model.layers.32.block_sparse_moe.experts.127.w2", "model.layers.32.block_sparse_moe.experts.128.w2", "model.layers.32.block_sparse_moe.experts.129.w2", "model.layers.32.block_sparse_moe.experts.130.w2", "model.layers.32.block_sparse_moe.experts.131.w2", "model.layers.32.block_sparse_moe.experts.132.w2", "model.layers.32.block_sparse_moe.experts.133.w2", "model.layers.32.block_sparse_moe.experts.134.w2", "model.layers.32.block_sparse_moe.experts.135.w2", "model.layers.32.block_sparse_moe.experts.136.w2", "model.layers.32.block_sparse_moe.experts.137.w2", "model.layers.32.block_sparse_moe.experts.138.w2", "model.layers.32.block_sparse_moe.experts.139.w2", "model.layers.32.block_sparse_moe.experts.140.w2", "model.layers.32.block_sparse_moe.experts.141.w2", "model.layers.32.block_sparse_moe.experts.142.w2", "model.layers.32.block_sparse_moe.experts.143.w2", "model.layers.32.block_sparse_moe.experts.144.w2", "model.layers.32.block_sparse_moe.experts.145.w2", "model.layers.32.block_sparse_moe.experts.146.w2", "model.layers.32.block_sparse_moe.experts.147.w2", "model.layers.32.block_sparse_moe.experts.148.w2", "model.layers.32.block_sparse_moe.experts.149.w2", "model.layers.32.block_sparse_moe.experts.150.w2", "model.layers.32.block_sparse_moe.experts.151.w2", "model.layers.32.block_sparse_moe.experts.152.w2", "model.layers.32.block_sparse_moe.experts.153.w2", "model.layers.32.block_sparse_moe.experts.154.w2", "model.layers.32.block_sparse_moe.experts.155.w2", "model.layers.32.block_sparse_moe.experts.156.w2", "model.layers.32.block_sparse_moe.experts.157.w2", "model.layers.32.block_sparse_moe.experts.158.w2", "model.layers.32.block_sparse_moe.experts.159.w2", "model.layers.32.block_sparse_moe.experts.160.w2", "model.layers.32.block_sparse_moe.experts.161.w2", "model.layers.32.block_sparse_moe.experts.162.w2", "model.layers.32.block_sparse_moe.experts.163.w2", "model.layers.32.block_sparse_moe.experts.164.w2", "model.layers.32.block_sparse_moe.experts.165.w2", "model.layers.32.block_sparse_moe.experts.166.w2", "model.layers.32.block_sparse_moe.experts.167.w2", "model.layers.32.block_sparse_moe.experts.168.w2", "model.layers.32.block_sparse_moe.experts.169.w2", "model.layers.32.block_sparse_moe.experts.170.w2", "model.layers.32.block_sparse_moe.experts.171.w2", "model.layers.32.block_sparse_moe.experts.172.w2", "model.layers.32.block_sparse_moe.experts.173.w2", "model.layers.32.block_sparse_moe.experts.174.w2", "model.layers.32.block_sparse_moe.experts.175.w2", "model.layers.32.block_sparse_moe.experts.176.w2", "model.layers.32.block_sparse_moe.experts.177.w2", "model.layers.32.block_sparse_moe.experts.178.w2", "model.layers.32.block_sparse_moe.experts.179.w2", "model.layers.32.block_sparse_moe.experts.180.w2", "model.layers.32.block_sparse_moe.experts.181.w2", "model.layers.32.block_sparse_moe.experts.182.w2", "model.layers.32.block_sparse_moe.experts.183.w2", "model.layers.32.block_sparse_moe.experts.184.w2", "model.layers.32.block_sparse_moe.experts.185.w2", "model.layers.32.block_sparse_moe.experts.186.w2", "model.layers.32.block_sparse_moe.experts.187.w2", "model.layers.32.block_sparse_moe.experts.188.w2", "model.layers.32.block_sparse_moe.experts.189.w2", "model.layers.32.block_sparse_moe.experts.190.w2", "model.layers.32.block_sparse_moe.experts.191.w2", "model.layers.32.block_sparse_moe.experts.192.w2", "model.layers.32.block_sparse_moe.experts.193.w2", "model.layers.32.block_sparse_moe.experts.194.w2", "model.layers.32.block_sparse_moe.experts.195.w2", "model.layers.32.block_sparse_moe.experts.196.w2", "model.layers.32.block_sparse_moe.experts.197.w2", "model.layers.32.block_sparse_moe.experts.198.w2", "model.layers.32.block_sparse_moe.experts.199.w2", "model.layers.32.block_sparse_moe.experts.200.w2", "model.layers.32.block_sparse_moe.experts.201.w2", "model.layers.32.block_sparse_moe.experts.202.w2", "model.layers.32.block_sparse_moe.experts.203.w2", "model.layers.32.block_sparse_moe.experts.204.w2", "model.layers.32.block_sparse_moe.experts.205.w2", "model.layers.32.block_sparse_moe.experts.206.w2", "model.layers.32.block_sparse_moe.experts.207.w2", "model.layers.32.block_sparse_moe.experts.208.w2", "model.layers.32.block_sparse_moe.experts.209.w2", "model.layers.32.block_sparse_moe.experts.210.w2", "model.layers.32.block_sparse_moe.experts.211.w2", "model.layers.32.block_sparse_moe.experts.212.w2", "model.layers.32.block_sparse_moe.experts.213.w2", "model.layers.32.block_sparse_moe.experts.214.w2", "model.layers.32.block_sparse_moe.experts.215.w2", "model.layers.32.block_sparse_moe.experts.216.w2", "model.layers.32.block_sparse_moe.experts.217.w2", "model.layers.32.block_sparse_moe.experts.218.w2", "model.layers.32.block_sparse_moe.experts.219.w2", "model.layers.32.block_sparse_moe.experts.220.w2", "model.layers.32.block_sparse_moe.experts.221.w2", "model.layers.32.block_sparse_moe.experts.222.w2", "model.layers.32.block_sparse_moe.experts.223.w2", "model.layers.32.block_sparse_moe.experts.224.w2", "model.layers.32.block_sparse_moe.experts.225.w2", "model.layers.32.block_sparse_moe.experts.226.w2", "model.layers.32.block_sparse_moe.experts.227.w2", "model.layers.32.block_sparse_moe.experts.228.w2", "model.layers.32.block_sparse_moe.experts.229.w2", "model.layers.32.block_sparse_moe.experts.230.w2", "model.layers.32.block_sparse_moe.experts.231.w2", "model.layers.32.block_sparse_moe.experts.232.w2", "model.layers.32.block_sparse_moe.experts.233.w2", "model.layers.32.block_sparse_moe.experts.234.w2", "model.layers.32.block_sparse_moe.experts.235.w2", "model.layers.32.block_sparse_moe.experts.236.w2", "model.layers.32.block_sparse_moe.experts.237.w2", "model.layers.32.block_sparse_moe.experts.238.w2", "model.layers.32.block_sparse_moe.experts.239.w2", "model.layers.32.block_sparse_moe.experts.240.w2", "model.layers.32.block_sparse_moe.experts.241.w2", "model.layers.32.block_sparse_moe.experts.242.w2", "model.layers.32.block_sparse_moe.experts.243.w2", "model.layers.32.block_sparse_moe.experts.244.w2", "model.layers.32.block_sparse_moe.experts.245.w2", "model.layers.32.block_sparse_moe.experts.246.w2", "model.layers.32.block_sparse_moe.experts.247.w2", "model.layers.32.block_sparse_moe.experts.248.w2", "model.layers.32.block_sparse_moe.experts.249.w2", "model.layers.32.block_sparse_moe.experts.250.w2", "model.layers.32.block_sparse_moe.experts.251.w2", "model.layers.32.block_sparse_moe.experts.252.w2", "model.layers.32.block_sparse_moe.experts.253.w2", "model.layers.32.block_sparse_moe.experts.254.w2", "model.layers.32.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0006429757922887802, "dbits": 1207959552 } ] }, { "idx": 165, "layers": [ "model.layers.33.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0020331889390945435, "dbits": 18874368 } ] }, { "idx": 166, "layers": [ "model.layers.33.self_attn.k_proj", "model.layers.33.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0031895659863949155, "dbits": 6291456 } ] }, { "idx": 167, "layers": [ "model.layers.33.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0019637688994407765, "dbits": 18874368 } ] }, { "idx": 168, "layers": [ "model.layers.33.block_sparse_moe.experts.0.w1", "model.layers.33.block_sparse_moe.experts.1.w1", "model.layers.33.block_sparse_moe.experts.2.w1", "model.layers.33.block_sparse_moe.experts.3.w1", "model.layers.33.block_sparse_moe.experts.4.w1", "model.layers.33.block_sparse_moe.experts.5.w1", "model.layers.33.block_sparse_moe.experts.6.w1", "model.layers.33.block_sparse_moe.experts.7.w1", "model.layers.33.block_sparse_moe.experts.8.w1", "model.layers.33.block_sparse_moe.experts.9.w1", "model.layers.33.block_sparse_moe.experts.10.w1", "model.layers.33.block_sparse_moe.experts.11.w1", "model.layers.33.block_sparse_moe.experts.12.w1", "model.layers.33.block_sparse_moe.experts.13.w1", "model.layers.33.block_sparse_moe.experts.14.w1", "model.layers.33.block_sparse_moe.experts.15.w1", "model.layers.33.block_sparse_moe.experts.16.w1", "model.layers.33.block_sparse_moe.experts.17.w1", "model.layers.33.block_sparse_moe.experts.18.w1", "model.layers.33.block_sparse_moe.experts.19.w1", "model.layers.33.block_sparse_moe.experts.20.w1", "model.layers.33.block_sparse_moe.experts.21.w1", "model.layers.33.block_sparse_moe.experts.22.w1", "model.layers.33.block_sparse_moe.experts.23.w1", "model.layers.33.block_sparse_moe.experts.24.w1", "model.layers.33.block_sparse_moe.experts.25.w1", "model.layers.33.block_sparse_moe.experts.26.w1", "model.layers.33.block_sparse_moe.experts.27.w1", "model.layers.33.block_sparse_moe.experts.28.w1", "model.layers.33.block_sparse_moe.experts.29.w1", "model.layers.33.block_sparse_moe.experts.30.w1", "model.layers.33.block_sparse_moe.experts.31.w1", "model.layers.33.block_sparse_moe.experts.32.w1", "model.layers.33.block_sparse_moe.experts.33.w1", "model.layers.33.block_sparse_moe.experts.34.w1", "model.layers.33.block_sparse_moe.experts.35.w1", "model.layers.33.block_sparse_moe.experts.36.w1", "model.layers.33.block_sparse_moe.experts.37.w1", "model.layers.33.block_sparse_moe.experts.38.w1", "model.layers.33.block_sparse_moe.experts.39.w1", "model.layers.33.block_sparse_moe.experts.40.w1", "model.layers.33.block_sparse_moe.experts.41.w1", "model.layers.33.block_sparse_moe.experts.42.w1", "model.layers.33.block_sparse_moe.experts.43.w1", "model.layers.33.block_sparse_moe.experts.44.w1", "model.layers.33.block_sparse_moe.experts.45.w1", "model.layers.33.block_sparse_moe.experts.46.w1", "model.layers.33.block_sparse_moe.experts.47.w1", "model.layers.33.block_sparse_moe.experts.48.w1", "model.layers.33.block_sparse_moe.experts.49.w1", "model.layers.33.block_sparse_moe.experts.50.w1", "model.layers.33.block_sparse_moe.experts.51.w1", "model.layers.33.block_sparse_moe.experts.52.w1", "model.layers.33.block_sparse_moe.experts.53.w1", "model.layers.33.block_sparse_moe.experts.54.w1", "model.layers.33.block_sparse_moe.experts.55.w1", "model.layers.33.block_sparse_moe.experts.56.w1", "model.layers.33.block_sparse_moe.experts.57.w1", "model.layers.33.block_sparse_moe.experts.58.w1", "model.layers.33.block_sparse_moe.experts.59.w1", "model.layers.33.block_sparse_moe.experts.60.w1", "model.layers.33.block_sparse_moe.experts.61.w1", "model.layers.33.block_sparse_moe.experts.62.w1", "model.layers.33.block_sparse_moe.experts.63.w1", "model.layers.33.block_sparse_moe.experts.64.w1", "model.layers.33.block_sparse_moe.experts.65.w1", "model.layers.33.block_sparse_moe.experts.66.w1", "model.layers.33.block_sparse_moe.experts.67.w1", "model.layers.33.block_sparse_moe.experts.68.w1", "model.layers.33.block_sparse_moe.experts.69.w1", "model.layers.33.block_sparse_moe.experts.70.w1", "model.layers.33.block_sparse_moe.experts.71.w1", "model.layers.33.block_sparse_moe.experts.72.w1", "model.layers.33.block_sparse_moe.experts.73.w1", "model.layers.33.block_sparse_moe.experts.74.w1", "model.layers.33.block_sparse_moe.experts.75.w1", "model.layers.33.block_sparse_moe.experts.76.w1", "model.layers.33.block_sparse_moe.experts.77.w1", "model.layers.33.block_sparse_moe.experts.78.w1", "model.layers.33.block_sparse_moe.experts.79.w1", "model.layers.33.block_sparse_moe.experts.80.w1", "model.layers.33.block_sparse_moe.experts.81.w1", "model.layers.33.block_sparse_moe.experts.82.w1", "model.layers.33.block_sparse_moe.experts.83.w1", "model.layers.33.block_sparse_moe.experts.84.w1", "model.layers.33.block_sparse_moe.experts.85.w1", "model.layers.33.block_sparse_moe.experts.86.w1", "model.layers.33.block_sparse_moe.experts.87.w1", "model.layers.33.block_sparse_moe.experts.88.w1", "model.layers.33.block_sparse_moe.experts.89.w1", "model.layers.33.block_sparse_moe.experts.90.w1", "model.layers.33.block_sparse_moe.experts.91.w1", "model.layers.33.block_sparse_moe.experts.92.w1", "model.layers.33.block_sparse_moe.experts.93.w1", "model.layers.33.block_sparse_moe.experts.94.w1", "model.layers.33.block_sparse_moe.experts.95.w1", "model.layers.33.block_sparse_moe.experts.96.w1", "model.layers.33.block_sparse_moe.experts.97.w1", "model.layers.33.block_sparse_moe.experts.98.w1", "model.layers.33.block_sparse_moe.experts.99.w1", "model.layers.33.block_sparse_moe.experts.100.w1", "model.layers.33.block_sparse_moe.experts.101.w1", "model.layers.33.block_sparse_moe.experts.102.w1", "model.layers.33.block_sparse_moe.experts.103.w1", "model.layers.33.block_sparse_moe.experts.104.w1", "model.layers.33.block_sparse_moe.experts.105.w1", "model.layers.33.block_sparse_moe.experts.106.w1", "model.layers.33.block_sparse_moe.experts.107.w1", "model.layers.33.block_sparse_moe.experts.108.w1", "model.layers.33.block_sparse_moe.experts.109.w1", "model.layers.33.block_sparse_moe.experts.110.w1", "model.layers.33.block_sparse_moe.experts.111.w1", "model.layers.33.block_sparse_moe.experts.112.w1", "model.layers.33.block_sparse_moe.experts.113.w1", "model.layers.33.block_sparse_moe.experts.114.w1", "model.layers.33.block_sparse_moe.experts.115.w1", "model.layers.33.block_sparse_moe.experts.116.w1", "model.layers.33.block_sparse_moe.experts.117.w1", "model.layers.33.block_sparse_moe.experts.118.w1", "model.layers.33.block_sparse_moe.experts.119.w1", "model.layers.33.block_sparse_moe.experts.120.w1", "model.layers.33.block_sparse_moe.experts.121.w1", "model.layers.33.block_sparse_moe.experts.122.w1", "model.layers.33.block_sparse_moe.experts.123.w1", "model.layers.33.block_sparse_moe.experts.124.w1", "model.layers.33.block_sparse_moe.experts.125.w1", "model.layers.33.block_sparse_moe.experts.126.w1", "model.layers.33.block_sparse_moe.experts.127.w1", "model.layers.33.block_sparse_moe.experts.128.w1", "model.layers.33.block_sparse_moe.experts.129.w1", "model.layers.33.block_sparse_moe.experts.130.w1", "model.layers.33.block_sparse_moe.experts.131.w1", "model.layers.33.block_sparse_moe.experts.132.w1", "model.layers.33.block_sparse_moe.experts.133.w1", "model.layers.33.block_sparse_moe.experts.134.w1", "model.layers.33.block_sparse_moe.experts.135.w1", "model.layers.33.block_sparse_moe.experts.136.w1", "model.layers.33.block_sparse_moe.experts.137.w1", "model.layers.33.block_sparse_moe.experts.138.w1", "model.layers.33.block_sparse_moe.experts.139.w1", "model.layers.33.block_sparse_moe.experts.140.w1", "model.layers.33.block_sparse_moe.experts.141.w1", "model.layers.33.block_sparse_moe.experts.142.w1", "model.layers.33.block_sparse_moe.experts.143.w1", "model.layers.33.block_sparse_moe.experts.144.w1", "model.layers.33.block_sparse_moe.experts.145.w1", "model.layers.33.block_sparse_moe.experts.146.w1", "model.layers.33.block_sparse_moe.experts.147.w1", "model.layers.33.block_sparse_moe.experts.148.w1", "model.layers.33.block_sparse_moe.experts.149.w1", "model.layers.33.block_sparse_moe.experts.150.w1", "model.layers.33.block_sparse_moe.experts.151.w1", "model.layers.33.block_sparse_moe.experts.152.w1", "model.layers.33.block_sparse_moe.experts.153.w1", "model.layers.33.block_sparse_moe.experts.154.w1", "model.layers.33.block_sparse_moe.experts.155.w1", "model.layers.33.block_sparse_moe.experts.156.w1", "model.layers.33.block_sparse_moe.experts.157.w1", "model.layers.33.block_sparse_moe.experts.158.w1", "model.layers.33.block_sparse_moe.experts.159.w1", "model.layers.33.block_sparse_moe.experts.160.w1", "model.layers.33.block_sparse_moe.experts.161.w1", "model.layers.33.block_sparse_moe.experts.162.w1", "model.layers.33.block_sparse_moe.experts.163.w1", "model.layers.33.block_sparse_moe.experts.164.w1", "model.layers.33.block_sparse_moe.experts.165.w1", "model.layers.33.block_sparse_moe.experts.166.w1", "model.layers.33.block_sparse_moe.experts.167.w1", "model.layers.33.block_sparse_moe.experts.168.w1", "model.layers.33.block_sparse_moe.experts.169.w1", "model.layers.33.block_sparse_moe.experts.170.w1", "model.layers.33.block_sparse_moe.experts.171.w1", "model.layers.33.block_sparse_moe.experts.172.w1", "model.layers.33.block_sparse_moe.experts.173.w1", "model.layers.33.block_sparse_moe.experts.174.w1", "model.layers.33.block_sparse_moe.experts.175.w1", "model.layers.33.block_sparse_moe.experts.176.w1", "model.layers.33.block_sparse_moe.experts.177.w1", "model.layers.33.block_sparse_moe.experts.178.w1", "model.layers.33.block_sparse_moe.experts.179.w1", "model.layers.33.block_sparse_moe.experts.180.w1", "model.layers.33.block_sparse_moe.experts.181.w1", "model.layers.33.block_sparse_moe.experts.182.w1", "model.layers.33.block_sparse_moe.experts.183.w1", "model.layers.33.block_sparse_moe.experts.184.w1", "model.layers.33.block_sparse_moe.experts.185.w1", "model.layers.33.block_sparse_moe.experts.186.w1", "model.layers.33.block_sparse_moe.experts.187.w1", "model.layers.33.block_sparse_moe.experts.188.w1", "model.layers.33.block_sparse_moe.experts.189.w1", "model.layers.33.block_sparse_moe.experts.190.w1", "model.layers.33.block_sparse_moe.experts.191.w1", "model.layers.33.block_sparse_moe.experts.192.w1", "model.layers.33.block_sparse_moe.experts.193.w1", "model.layers.33.block_sparse_moe.experts.194.w1", "model.layers.33.block_sparse_moe.experts.195.w1", "model.layers.33.block_sparse_moe.experts.196.w1", "model.layers.33.block_sparse_moe.experts.197.w1", "model.layers.33.block_sparse_moe.experts.198.w1", "model.layers.33.block_sparse_moe.experts.199.w1", "model.layers.33.block_sparse_moe.experts.200.w1", "model.layers.33.block_sparse_moe.experts.201.w1", "model.layers.33.block_sparse_moe.experts.202.w1", "model.layers.33.block_sparse_moe.experts.203.w1", "model.layers.33.block_sparse_moe.experts.204.w1", "model.layers.33.block_sparse_moe.experts.205.w1", "model.layers.33.block_sparse_moe.experts.206.w1", "model.layers.33.block_sparse_moe.experts.207.w1", "model.layers.33.block_sparse_moe.experts.208.w1", "model.layers.33.block_sparse_moe.experts.209.w1", "model.layers.33.block_sparse_moe.experts.210.w1", "model.layers.33.block_sparse_moe.experts.211.w1", "model.layers.33.block_sparse_moe.experts.212.w1", "model.layers.33.block_sparse_moe.experts.213.w1", "model.layers.33.block_sparse_moe.experts.214.w1", "model.layers.33.block_sparse_moe.experts.215.w1", "model.layers.33.block_sparse_moe.experts.216.w1", "model.layers.33.block_sparse_moe.experts.217.w1", "model.layers.33.block_sparse_moe.experts.218.w1", "model.layers.33.block_sparse_moe.experts.219.w1", "model.layers.33.block_sparse_moe.experts.220.w1", "model.layers.33.block_sparse_moe.experts.221.w1", "model.layers.33.block_sparse_moe.experts.222.w1", "model.layers.33.block_sparse_moe.experts.223.w1", "model.layers.33.block_sparse_moe.experts.224.w1", "model.layers.33.block_sparse_moe.experts.225.w1", "model.layers.33.block_sparse_moe.experts.226.w1", "model.layers.33.block_sparse_moe.experts.227.w1", "model.layers.33.block_sparse_moe.experts.228.w1", "model.layers.33.block_sparse_moe.experts.229.w1", "model.layers.33.block_sparse_moe.experts.230.w1", "model.layers.33.block_sparse_moe.experts.231.w1", "model.layers.33.block_sparse_moe.experts.232.w1", "model.layers.33.block_sparse_moe.experts.233.w1", "model.layers.33.block_sparse_moe.experts.234.w1", "model.layers.33.block_sparse_moe.experts.235.w1", "model.layers.33.block_sparse_moe.experts.236.w1", "model.layers.33.block_sparse_moe.experts.237.w1", "model.layers.33.block_sparse_moe.experts.238.w1", "model.layers.33.block_sparse_moe.experts.239.w1", "model.layers.33.block_sparse_moe.experts.240.w1", "model.layers.33.block_sparse_moe.experts.241.w1", "model.layers.33.block_sparse_moe.experts.242.w1", "model.layers.33.block_sparse_moe.experts.243.w1", "model.layers.33.block_sparse_moe.experts.244.w1", "model.layers.33.block_sparse_moe.experts.245.w1", "model.layers.33.block_sparse_moe.experts.246.w1", "model.layers.33.block_sparse_moe.experts.247.w1", "model.layers.33.block_sparse_moe.experts.248.w1", "model.layers.33.block_sparse_moe.experts.249.w1", "model.layers.33.block_sparse_moe.experts.250.w1", "model.layers.33.block_sparse_moe.experts.251.w1", "model.layers.33.block_sparse_moe.experts.252.w1", "model.layers.33.block_sparse_moe.experts.253.w1", "model.layers.33.block_sparse_moe.experts.254.w1", "model.layers.33.block_sparse_moe.experts.255.w1", "model.layers.33.block_sparse_moe.experts.0.w3", "model.layers.33.block_sparse_moe.experts.1.w3", "model.layers.33.block_sparse_moe.experts.2.w3", "model.layers.33.block_sparse_moe.experts.3.w3", "model.layers.33.block_sparse_moe.experts.4.w3", "model.layers.33.block_sparse_moe.experts.5.w3", "model.layers.33.block_sparse_moe.experts.6.w3", "model.layers.33.block_sparse_moe.experts.7.w3", "model.layers.33.block_sparse_moe.experts.8.w3", "model.layers.33.block_sparse_moe.experts.9.w3", "model.layers.33.block_sparse_moe.experts.10.w3", "model.layers.33.block_sparse_moe.experts.11.w3", "model.layers.33.block_sparse_moe.experts.12.w3", "model.layers.33.block_sparse_moe.experts.13.w3", "model.layers.33.block_sparse_moe.experts.14.w3", "model.layers.33.block_sparse_moe.experts.15.w3", "model.layers.33.block_sparse_moe.experts.16.w3", "model.layers.33.block_sparse_moe.experts.17.w3", "model.layers.33.block_sparse_moe.experts.18.w3", "model.layers.33.block_sparse_moe.experts.19.w3", "model.layers.33.block_sparse_moe.experts.20.w3", "model.layers.33.block_sparse_moe.experts.21.w3", "model.layers.33.block_sparse_moe.experts.22.w3", "model.layers.33.block_sparse_moe.experts.23.w3", "model.layers.33.block_sparse_moe.experts.24.w3", "model.layers.33.block_sparse_moe.experts.25.w3", "model.layers.33.block_sparse_moe.experts.26.w3", "model.layers.33.block_sparse_moe.experts.27.w3", "model.layers.33.block_sparse_moe.experts.28.w3", "model.layers.33.block_sparse_moe.experts.29.w3", "model.layers.33.block_sparse_moe.experts.30.w3", "model.layers.33.block_sparse_moe.experts.31.w3", "model.layers.33.block_sparse_moe.experts.32.w3", "model.layers.33.block_sparse_moe.experts.33.w3", "model.layers.33.block_sparse_moe.experts.34.w3", "model.layers.33.block_sparse_moe.experts.35.w3", "model.layers.33.block_sparse_moe.experts.36.w3", "model.layers.33.block_sparse_moe.experts.37.w3", "model.layers.33.block_sparse_moe.experts.38.w3", "model.layers.33.block_sparse_moe.experts.39.w3", "model.layers.33.block_sparse_moe.experts.40.w3", "model.layers.33.block_sparse_moe.experts.41.w3", "model.layers.33.block_sparse_moe.experts.42.w3", "model.layers.33.block_sparse_moe.experts.43.w3", "model.layers.33.block_sparse_moe.experts.44.w3", "model.layers.33.block_sparse_moe.experts.45.w3", "model.layers.33.block_sparse_moe.experts.46.w3", "model.layers.33.block_sparse_moe.experts.47.w3", "model.layers.33.block_sparse_moe.experts.48.w3", "model.layers.33.block_sparse_moe.experts.49.w3", "model.layers.33.block_sparse_moe.experts.50.w3", "model.layers.33.block_sparse_moe.experts.51.w3", "model.layers.33.block_sparse_moe.experts.52.w3", "model.layers.33.block_sparse_moe.experts.53.w3", "model.layers.33.block_sparse_moe.experts.54.w3", "model.layers.33.block_sparse_moe.experts.55.w3", "model.layers.33.block_sparse_moe.experts.56.w3", "model.layers.33.block_sparse_moe.experts.57.w3", "model.layers.33.block_sparse_moe.experts.58.w3", "model.layers.33.block_sparse_moe.experts.59.w3", "model.layers.33.block_sparse_moe.experts.60.w3", "model.layers.33.block_sparse_moe.experts.61.w3", "model.layers.33.block_sparse_moe.experts.62.w3", "model.layers.33.block_sparse_moe.experts.63.w3", "model.layers.33.block_sparse_moe.experts.64.w3", "model.layers.33.block_sparse_moe.experts.65.w3", "model.layers.33.block_sparse_moe.experts.66.w3", "model.layers.33.block_sparse_moe.experts.67.w3", "model.layers.33.block_sparse_moe.experts.68.w3", "model.layers.33.block_sparse_moe.experts.69.w3", "model.layers.33.block_sparse_moe.experts.70.w3", "model.layers.33.block_sparse_moe.experts.71.w3", "model.layers.33.block_sparse_moe.experts.72.w3", "model.layers.33.block_sparse_moe.experts.73.w3", "model.layers.33.block_sparse_moe.experts.74.w3", "model.layers.33.block_sparse_moe.experts.75.w3", "model.layers.33.block_sparse_moe.experts.76.w3", "model.layers.33.block_sparse_moe.experts.77.w3", "model.layers.33.block_sparse_moe.experts.78.w3", "model.layers.33.block_sparse_moe.experts.79.w3", "model.layers.33.block_sparse_moe.experts.80.w3", "model.layers.33.block_sparse_moe.experts.81.w3", "model.layers.33.block_sparse_moe.experts.82.w3", "model.layers.33.block_sparse_moe.experts.83.w3", "model.layers.33.block_sparse_moe.experts.84.w3", "model.layers.33.block_sparse_moe.experts.85.w3", "model.layers.33.block_sparse_moe.experts.86.w3", "model.layers.33.block_sparse_moe.experts.87.w3", "model.layers.33.block_sparse_moe.experts.88.w3", "model.layers.33.block_sparse_moe.experts.89.w3", "model.layers.33.block_sparse_moe.experts.90.w3", "model.layers.33.block_sparse_moe.experts.91.w3", "model.layers.33.block_sparse_moe.experts.92.w3", "model.layers.33.block_sparse_moe.experts.93.w3", "model.layers.33.block_sparse_moe.experts.94.w3", "model.layers.33.block_sparse_moe.experts.95.w3", "model.layers.33.block_sparse_moe.experts.96.w3", "model.layers.33.block_sparse_moe.experts.97.w3", "model.layers.33.block_sparse_moe.experts.98.w3", "model.layers.33.block_sparse_moe.experts.99.w3", "model.layers.33.block_sparse_moe.experts.100.w3", "model.layers.33.block_sparse_moe.experts.101.w3", "model.layers.33.block_sparse_moe.experts.102.w3", "model.layers.33.block_sparse_moe.experts.103.w3", "model.layers.33.block_sparse_moe.experts.104.w3", "model.layers.33.block_sparse_moe.experts.105.w3", "model.layers.33.block_sparse_moe.experts.106.w3", "model.layers.33.block_sparse_moe.experts.107.w3", "model.layers.33.block_sparse_moe.experts.108.w3", "model.layers.33.block_sparse_moe.experts.109.w3", "model.layers.33.block_sparse_moe.experts.110.w3", "model.layers.33.block_sparse_moe.experts.111.w3", "model.layers.33.block_sparse_moe.experts.112.w3", "model.layers.33.block_sparse_moe.experts.113.w3", "model.layers.33.block_sparse_moe.experts.114.w3", "model.layers.33.block_sparse_moe.experts.115.w3", "model.layers.33.block_sparse_moe.experts.116.w3", "model.layers.33.block_sparse_moe.experts.117.w3", "model.layers.33.block_sparse_moe.experts.118.w3", "model.layers.33.block_sparse_moe.experts.119.w3", "model.layers.33.block_sparse_moe.experts.120.w3", "model.layers.33.block_sparse_moe.experts.121.w3", "model.layers.33.block_sparse_moe.experts.122.w3", "model.layers.33.block_sparse_moe.experts.123.w3", "model.layers.33.block_sparse_moe.experts.124.w3", "model.layers.33.block_sparse_moe.experts.125.w3", "model.layers.33.block_sparse_moe.experts.126.w3", "model.layers.33.block_sparse_moe.experts.127.w3", "model.layers.33.block_sparse_moe.experts.128.w3", "model.layers.33.block_sparse_moe.experts.129.w3", "model.layers.33.block_sparse_moe.experts.130.w3", "model.layers.33.block_sparse_moe.experts.131.w3", "model.layers.33.block_sparse_moe.experts.132.w3", "model.layers.33.block_sparse_moe.experts.133.w3", "model.layers.33.block_sparse_moe.experts.134.w3", "model.layers.33.block_sparse_moe.experts.135.w3", "model.layers.33.block_sparse_moe.experts.136.w3", "model.layers.33.block_sparse_moe.experts.137.w3", "model.layers.33.block_sparse_moe.experts.138.w3", "model.layers.33.block_sparse_moe.experts.139.w3", "model.layers.33.block_sparse_moe.experts.140.w3", "model.layers.33.block_sparse_moe.experts.141.w3", "model.layers.33.block_sparse_moe.experts.142.w3", "model.layers.33.block_sparse_moe.experts.143.w3", "model.layers.33.block_sparse_moe.experts.144.w3", "model.layers.33.block_sparse_moe.experts.145.w3", "model.layers.33.block_sparse_moe.experts.146.w3", "model.layers.33.block_sparse_moe.experts.147.w3", "model.layers.33.block_sparse_moe.experts.148.w3", "model.layers.33.block_sparse_moe.experts.149.w3", "model.layers.33.block_sparse_moe.experts.150.w3", "model.layers.33.block_sparse_moe.experts.151.w3", "model.layers.33.block_sparse_moe.experts.152.w3", "model.layers.33.block_sparse_moe.experts.153.w3", "model.layers.33.block_sparse_moe.experts.154.w3", "model.layers.33.block_sparse_moe.experts.155.w3", "model.layers.33.block_sparse_moe.experts.156.w3", "model.layers.33.block_sparse_moe.experts.157.w3", "model.layers.33.block_sparse_moe.experts.158.w3", "model.layers.33.block_sparse_moe.experts.159.w3", "model.layers.33.block_sparse_moe.experts.160.w3", "model.layers.33.block_sparse_moe.experts.161.w3", "model.layers.33.block_sparse_moe.experts.162.w3", "model.layers.33.block_sparse_moe.experts.163.w3", "model.layers.33.block_sparse_moe.experts.164.w3", "model.layers.33.block_sparse_moe.experts.165.w3", "model.layers.33.block_sparse_moe.experts.166.w3", "model.layers.33.block_sparse_moe.experts.167.w3", "model.layers.33.block_sparse_moe.experts.168.w3", "model.layers.33.block_sparse_moe.experts.169.w3", "model.layers.33.block_sparse_moe.experts.170.w3", "model.layers.33.block_sparse_moe.experts.171.w3", "model.layers.33.block_sparse_moe.experts.172.w3", "model.layers.33.block_sparse_moe.experts.173.w3", "model.layers.33.block_sparse_moe.experts.174.w3", "model.layers.33.block_sparse_moe.experts.175.w3", "model.layers.33.block_sparse_moe.experts.176.w3", "model.layers.33.block_sparse_moe.experts.177.w3", "model.layers.33.block_sparse_moe.experts.178.w3", "model.layers.33.block_sparse_moe.experts.179.w3", "model.layers.33.block_sparse_moe.experts.180.w3", "model.layers.33.block_sparse_moe.experts.181.w3", "model.layers.33.block_sparse_moe.experts.182.w3", "model.layers.33.block_sparse_moe.experts.183.w3", "model.layers.33.block_sparse_moe.experts.184.w3", "model.layers.33.block_sparse_moe.experts.185.w3", "model.layers.33.block_sparse_moe.experts.186.w3", "model.layers.33.block_sparse_moe.experts.187.w3", "model.layers.33.block_sparse_moe.experts.188.w3", "model.layers.33.block_sparse_moe.experts.189.w3", "model.layers.33.block_sparse_moe.experts.190.w3", "model.layers.33.block_sparse_moe.experts.191.w3", "model.layers.33.block_sparse_moe.experts.192.w3", "model.layers.33.block_sparse_moe.experts.193.w3", "model.layers.33.block_sparse_moe.experts.194.w3", "model.layers.33.block_sparse_moe.experts.195.w3", "model.layers.33.block_sparse_moe.experts.196.w3", "model.layers.33.block_sparse_moe.experts.197.w3", "model.layers.33.block_sparse_moe.experts.198.w3", "model.layers.33.block_sparse_moe.experts.199.w3", "model.layers.33.block_sparse_moe.experts.200.w3", "model.layers.33.block_sparse_moe.experts.201.w3", "model.layers.33.block_sparse_moe.experts.202.w3", "model.layers.33.block_sparse_moe.experts.203.w3", "model.layers.33.block_sparse_moe.experts.204.w3", "model.layers.33.block_sparse_moe.experts.205.w3", "model.layers.33.block_sparse_moe.experts.206.w3", "model.layers.33.block_sparse_moe.experts.207.w3", "model.layers.33.block_sparse_moe.experts.208.w3", "model.layers.33.block_sparse_moe.experts.209.w3", "model.layers.33.block_sparse_moe.experts.210.w3", "model.layers.33.block_sparse_moe.experts.211.w3", "model.layers.33.block_sparse_moe.experts.212.w3", "model.layers.33.block_sparse_moe.experts.213.w3", "model.layers.33.block_sparse_moe.experts.214.w3", "model.layers.33.block_sparse_moe.experts.215.w3", "model.layers.33.block_sparse_moe.experts.216.w3", "model.layers.33.block_sparse_moe.experts.217.w3", "model.layers.33.block_sparse_moe.experts.218.w3", "model.layers.33.block_sparse_moe.experts.219.w3", "model.layers.33.block_sparse_moe.experts.220.w3", "model.layers.33.block_sparse_moe.experts.221.w3", "model.layers.33.block_sparse_moe.experts.222.w3", "model.layers.33.block_sparse_moe.experts.223.w3", "model.layers.33.block_sparse_moe.experts.224.w3", "model.layers.33.block_sparse_moe.experts.225.w3", "model.layers.33.block_sparse_moe.experts.226.w3", "model.layers.33.block_sparse_moe.experts.227.w3", "model.layers.33.block_sparse_moe.experts.228.w3", "model.layers.33.block_sparse_moe.experts.229.w3", "model.layers.33.block_sparse_moe.experts.230.w3", "model.layers.33.block_sparse_moe.experts.231.w3", "model.layers.33.block_sparse_moe.experts.232.w3", "model.layers.33.block_sparse_moe.experts.233.w3", "model.layers.33.block_sparse_moe.experts.234.w3", "model.layers.33.block_sparse_moe.experts.235.w3", "model.layers.33.block_sparse_moe.experts.236.w3", "model.layers.33.block_sparse_moe.experts.237.w3", "model.layers.33.block_sparse_moe.experts.238.w3", "model.layers.33.block_sparse_moe.experts.239.w3", "model.layers.33.block_sparse_moe.experts.240.w3", "model.layers.33.block_sparse_moe.experts.241.w3", "model.layers.33.block_sparse_moe.experts.242.w3", "model.layers.33.block_sparse_moe.experts.243.w3", "model.layers.33.block_sparse_moe.experts.244.w3", "model.layers.33.block_sparse_moe.experts.245.w3", "model.layers.33.block_sparse_moe.experts.246.w3", "model.layers.33.block_sparse_moe.experts.247.w3", "model.layers.33.block_sparse_moe.experts.248.w3", "model.layers.33.block_sparse_moe.experts.249.w3", "model.layers.33.block_sparse_moe.experts.250.w3", "model.layers.33.block_sparse_moe.experts.251.w3", "model.layers.33.block_sparse_moe.experts.252.w3", "model.layers.33.block_sparse_moe.experts.253.w3", "model.layers.33.block_sparse_moe.experts.254.w3", "model.layers.33.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00022610723972321667, "dbits": 2415919104 } ] }, { "idx": 169, "layers": [ "model.layers.33.block_sparse_moe.experts.0.w2", "model.layers.33.block_sparse_moe.experts.1.w2", "model.layers.33.block_sparse_moe.experts.2.w2", "model.layers.33.block_sparse_moe.experts.3.w2", "model.layers.33.block_sparse_moe.experts.4.w2", "model.layers.33.block_sparse_moe.experts.5.w2", "model.layers.33.block_sparse_moe.experts.6.w2", "model.layers.33.block_sparse_moe.experts.7.w2", "model.layers.33.block_sparse_moe.experts.8.w2", "model.layers.33.block_sparse_moe.experts.9.w2", "model.layers.33.block_sparse_moe.experts.10.w2", "model.layers.33.block_sparse_moe.experts.11.w2", "model.layers.33.block_sparse_moe.experts.12.w2", "model.layers.33.block_sparse_moe.experts.13.w2", "model.layers.33.block_sparse_moe.experts.14.w2", "model.layers.33.block_sparse_moe.experts.15.w2", "model.layers.33.block_sparse_moe.experts.16.w2", "model.layers.33.block_sparse_moe.experts.17.w2", "model.layers.33.block_sparse_moe.experts.18.w2", "model.layers.33.block_sparse_moe.experts.19.w2", "model.layers.33.block_sparse_moe.experts.20.w2", "model.layers.33.block_sparse_moe.experts.21.w2", "model.layers.33.block_sparse_moe.experts.22.w2", "model.layers.33.block_sparse_moe.experts.23.w2", "model.layers.33.block_sparse_moe.experts.24.w2", "model.layers.33.block_sparse_moe.experts.25.w2", "model.layers.33.block_sparse_moe.experts.26.w2", "model.layers.33.block_sparse_moe.experts.27.w2", "model.layers.33.block_sparse_moe.experts.28.w2", "model.layers.33.block_sparse_moe.experts.29.w2", "model.layers.33.block_sparse_moe.experts.30.w2", "model.layers.33.block_sparse_moe.experts.31.w2", "model.layers.33.block_sparse_moe.experts.32.w2", "model.layers.33.block_sparse_moe.experts.33.w2", "model.layers.33.block_sparse_moe.experts.34.w2", "model.layers.33.block_sparse_moe.experts.35.w2", "model.layers.33.block_sparse_moe.experts.36.w2", "model.layers.33.block_sparse_moe.experts.37.w2", "model.layers.33.block_sparse_moe.experts.38.w2", "model.layers.33.block_sparse_moe.experts.39.w2", "model.layers.33.block_sparse_moe.experts.40.w2", "model.layers.33.block_sparse_moe.experts.41.w2", "model.layers.33.block_sparse_moe.experts.42.w2", "model.layers.33.block_sparse_moe.experts.43.w2", "model.layers.33.block_sparse_moe.experts.44.w2", "model.layers.33.block_sparse_moe.experts.45.w2", "model.layers.33.block_sparse_moe.experts.46.w2", "model.layers.33.block_sparse_moe.experts.47.w2", "model.layers.33.block_sparse_moe.experts.48.w2", "model.layers.33.block_sparse_moe.experts.49.w2", "model.layers.33.block_sparse_moe.experts.50.w2", "model.layers.33.block_sparse_moe.experts.51.w2", "model.layers.33.block_sparse_moe.experts.52.w2", "model.layers.33.block_sparse_moe.experts.53.w2", "model.layers.33.block_sparse_moe.experts.54.w2", "model.layers.33.block_sparse_moe.experts.55.w2", "model.layers.33.block_sparse_moe.experts.56.w2", "model.layers.33.block_sparse_moe.experts.57.w2", "model.layers.33.block_sparse_moe.experts.58.w2", "model.layers.33.block_sparse_moe.experts.59.w2", "model.layers.33.block_sparse_moe.experts.60.w2", "model.layers.33.block_sparse_moe.experts.61.w2", "model.layers.33.block_sparse_moe.experts.62.w2", "model.layers.33.block_sparse_moe.experts.63.w2", "model.layers.33.block_sparse_moe.experts.64.w2", "model.layers.33.block_sparse_moe.experts.65.w2", "model.layers.33.block_sparse_moe.experts.66.w2", "model.layers.33.block_sparse_moe.experts.67.w2", "model.layers.33.block_sparse_moe.experts.68.w2", "model.layers.33.block_sparse_moe.experts.69.w2", "model.layers.33.block_sparse_moe.experts.70.w2", "model.layers.33.block_sparse_moe.experts.71.w2", "model.layers.33.block_sparse_moe.experts.72.w2", "model.layers.33.block_sparse_moe.experts.73.w2", "model.layers.33.block_sparse_moe.experts.74.w2", "model.layers.33.block_sparse_moe.experts.75.w2", "model.layers.33.block_sparse_moe.experts.76.w2", "model.layers.33.block_sparse_moe.experts.77.w2", "model.layers.33.block_sparse_moe.experts.78.w2", "model.layers.33.block_sparse_moe.experts.79.w2", "model.layers.33.block_sparse_moe.experts.80.w2", "model.layers.33.block_sparse_moe.experts.81.w2", "model.layers.33.block_sparse_moe.experts.82.w2", "model.layers.33.block_sparse_moe.experts.83.w2", "model.layers.33.block_sparse_moe.experts.84.w2", "model.layers.33.block_sparse_moe.experts.85.w2", "model.layers.33.block_sparse_moe.experts.86.w2", "model.layers.33.block_sparse_moe.experts.87.w2", "model.layers.33.block_sparse_moe.experts.88.w2", "model.layers.33.block_sparse_moe.experts.89.w2", "model.layers.33.block_sparse_moe.experts.90.w2", "model.layers.33.block_sparse_moe.experts.91.w2", "model.layers.33.block_sparse_moe.experts.92.w2", "model.layers.33.block_sparse_moe.experts.93.w2", "model.layers.33.block_sparse_moe.experts.94.w2", "model.layers.33.block_sparse_moe.experts.95.w2", "model.layers.33.block_sparse_moe.experts.96.w2", "model.layers.33.block_sparse_moe.experts.97.w2", "model.layers.33.block_sparse_moe.experts.98.w2", "model.layers.33.block_sparse_moe.experts.99.w2", "model.layers.33.block_sparse_moe.experts.100.w2", "model.layers.33.block_sparse_moe.experts.101.w2", "model.layers.33.block_sparse_moe.experts.102.w2", "model.layers.33.block_sparse_moe.experts.103.w2", "model.layers.33.block_sparse_moe.experts.104.w2", "model.layers.33.block_sparse_moe.experts.105.w2", "model.layers.33.block_sparse_moe.experts.106.w2", "model.layers.33.block_sparse_moe.experts.107.w2", "model.layers.33.block_sparse_moe.experts.108.w2", "model.layers.33.block_sparse_moe.experts.109.w2", "model.layers.33.block_sparse_moe.experts.110.w2", "model.layers.33.block_sparse_moe.experts.111.w2", "model.layers.33.block_sparse_moe.experts.112.w2", "model.layers.33.block_sparse_moe.experts.113.w2", "model.layers.33.block_sparse_moe.experts.114.w2", "model.layers.33.block_sparse_moe.experts.115.w2", "model.layers.33.block_sparse_moe.experts.116.w2", "model.layers.33.block_sparse_moe.experts.117.w2", "model.layers.33.block_sparse_moe.experts.118.w2", "model.layers.33.block_sparse_moe.experts.119.w2", "model.layers.33.block_sparse_moe.experts.120.w2", "model.layers.33.block_sparse_moe.experts.121.w2", "model.layers.33.block_sparse_moe.experts.122.w2", "model.layers.33.block_sparse_moe.experts.123.w2", "model.layers.33.block_sparse_moe.experts.124.w2", "model.layers.33.block_sparse_moe.experts.125.w2", "model.layers.33.block_sparse_moe.experts.126.w2", "model.layers.33.block_sparse_moe.experts.127.w2", "model.layers.33.block_sparse_moe.experts.128.w2", "model.layers.33.block_sparse_moe.experts.129.w2", "model.layers.33.block_sparse_moe.experts.130.w2", "model.layers.33.block_sparse_moe.experts.131.w2", "model.layers.33.block_sparse_moe.experts.132.w2", "model.layers.33.block_sparse_moe.experts.133.w2", "model.layers.33.block_sparse_moe.experts.134.w2", "model.layers.33.block_sparse_moe.experts.135.w2", "model.layers.33.block_sparse_moe.experts.136.w2", "model.layers.33.block_sparse_moe.experts.137.w2", "model.layers.33.block_sparse_moe.experts.138.w2", "model.layers.33.block_sparse_moe.experts.139.w2", "model.layers.33.block_sparse_moe.experts.140.w2", "model.layers.33.block_sparse_moe.experts.141.w2", "model.layers.33.block_sparse_moe.experts.142.w2", "model.layers.33.block_sparse_moe.experts.143.w2", "model.layers.33.block_sparse_moe.experts.144.w2", "model.layers.33.block_sparse_moe.experts.145.w2", "model.layers.33.block_sparse_moe.experts.146.w2", "model.layers.33.block_sparse_moe.experts.147.w2", "model.layers.33.block_sparse_moe.experts.148.w2", "model.layers.33.block_sparse_moe.experts.149.w2", "model.layers.33.block_sparse_moe.experts.150.w2", "model.layers.33.block_sparse_moe.experts.151.w2", "model.layers.33.block_sparse_moe.experts.152.w2", "model.layers.33.block_sparse_moe.experts.153.w2", "model.layers.33.block_sparse_moe.experts.154.w2", "model.layers.33.block_sparse_moe.experts.155.w2", "model.layers.33.block_sparse_moe.experts.156.w2", "model.layers.33.block_sparse_moe.experts.157.w2", "model.layers.33.block_sparse_moe.experts.158.w2", "model.layers.33.block_sparse_moe.experts.159.w2", "model.layers.33.block_sparse_moe.experts.160.w2", "model.layers.33.block_sparse_moe.experts.161.w2", "model.layers.33.block_sparse_moe.experts.162.w2", "model.layers.33.block_sparse_moe.experts.163.w2", "model.layers.33.block_sparse_moe.experts.164.w2", "model.layers.33.block_sparse_moe.experts.165.w2", "model.layers.33.block_sparse_moe.experts.166.w2", "model.layers.33.block_sparse_moe.experts.167.w2", "model.layers.33.block_sparse_moe.experts.168.w2", "model.layers.33.block_sparse_moe.experts.169.w2", "model.layers.33.block_sparse_moe.experts.170.w2", "model.layers.33.block_sparse_moe.experts.171.w2", "model.layers.33.block_sparse_moe.experts.172.w2", "model.layers.33.block_sparse_moe.experts.173.w2", "model.layers.33.block_sparse_moe.experts.174.w2", "model.layers.33.block_sparse_moe.experts.175.w2", "model.layers.33.block_sparse_moe.experts.176.w2", "model.layers.33.block_sparse_moe.experts.177.w2", "model.layers.33.block_sparse_moe.experts.178.w2", "model.layers.33.block_sparse_moe.experts.179.w2", "model.layers.33.block_sparse_moe.experts.180.w2", "model.layers.33.block_sparse_moe.experts.181.w2", "model.layers.33.block_sparse_moe.experts.182.w2", "model.layers.33.block_sparse_moe.experts.183.w2", "model.layers.33.block_sparse_moe.experts.184.w2", "model.layers.33.block_sparse_moe.experts.185.w2", "model.layers.33.block_sparse_moe.experts.186.w2", "model.layers.33.block_sparse_moe.experts.187.w2", "model.layers.33.block_sparse_moe.experts.188.w2", "model.layers.33.block_sparse_moe.experts.189.w2", "model.layers.33.block_sparse_moe.experts.190.w2", "model.layers.33.block_sparse_moe.experts.191.w2", "model.layers.33.block_sparse_moe.experts.192.w2", "model.layers.33.block_sparse_moe.experts.193.w2", "model.layers.33.block_sparse_moe.experts.194.w2", "model.layers.33.block_sparse_moe.experts.195.w2", "model.layers.33.block_sparse_moe.experts.196.w2", "model.layers.33.block_sparse_moe.experts.197.w2", "model.layers.33.block_sparse_moe.experts.198.w2", "model.layers.33.block_sparse_moe.experts.199.w2", "model.layers.33.block_sparse_moe.experts.200.w2", "model.layers.33.block_sparse_moe.experts.201.w2", "model.layers.33.block_sparse_moe.experts.202.w2", "model.layers.33.block_sparse_moe.experts.203.w2", "model.layers.33.block_sparse_moe.experts.204.w2", "model.layers.33.block_sparse_moe.experts.205.w2", "model.layers.33.block_sparse_moe.experts.206.w2", "model.layers.33.block_sparse_moe.experts.207.w2", "model.layers.33.block_sparse_moe.experts.208.w2", "model.layers.33.block_sparse_moe.experts.209.w2", "model.layers.33.block_sparse_moe.experts.210.w2", "model.layers.33.block_sparse_moe.experts.211.w2", "model.layers.33.block_sparse_moe.experts.212.w2", "model.layers.33.block_sparse_moe.experts.213.w2", "model.layers.33.block_sparse_moe.experts.214.w2", "model.layers.33.block_sparse_moe.experts.215.w2", "model.layers.33.block_sparse_moe.experts.216.w2", "model.layers.33.block_sparse_moe.experts.217.w2", "model.layers.33.block_sparse_moe.experts.218.w2", "model.layers.33.block_sparse_moe.experts.219.w2", "model.layers.33.block_sparse_moe.experts.220.w2", "model.layers.33.block_sparse_moe.experts.221.w2", "model.layers.33.block_sparse_moe.experts.222.w2", "model.layers.33.block_sparse_moe.experts.223.w2", "model.layers.33.block_sparse_moe.experts.224.w2", "model.layers.33.block_sparse_moe.experts.225.w2", "model.layers.33.block_sparse_moe.experts.226.w2", "model.layers.33.block_sparse_moe.experts.227.w2", "model.layers.33.block_sparse_moe.experts.228.w2", "model.layers.33.block_sparse_moe.experts.229.w2", "model.layers.33.block_sparse_moe.experts.230.w2", "model.layers.33.block_sparse_moe.experts.231.w2", "model.layers.33.block_sparse_moe.experts.232.w2", "model.layers.33.block_sparse_moe.experts.233.w2", "model.layers.33.block_sparse_moe.experts.234.w2", "model.layers.33.block_sparse_moe.experts.235.w2", "model.layers.33.block_sparse_moe.experts.236.w2", "model.layers.33.block_sparse_moe.experts.237.w2", "model.layers.33.block_sparse_moe.experts.238.w2", "model.layers.33.block_sparse_moe.experts.239.w2", "model.layers.33.block_sparse_moe.experts.240.w2", "model.layers.33.block_sparse_moe.experts.241.w2", "model.layers.33.block_sparse_moe.experts.242.w2", "model.layers.33.block_sparse_moe.experts.243.w2", "model.layers.33.block_sparse_moe.experts.244.w2", "model.layers.33.block_sparse_moe.experts.245.w2", "model.layers.33.block_sparse_moe.experts.246.w2", "model.layers.33.block_sparse_moe.experts.247.w2", "model.layers.33.block_sparse_moe.experts.248.w2", "model.layers.33.block_sparse_moe.experts.249.w2", "model.layers.33.block_sparse_moe.experts.250.w2", "model.layers.33.block_sparse_moe.experts.251.w2", "model.layers.33.block_sparse_moe.experts.252.w2", "model.layers.33.block_sparse_moe.experts.253.w2", "model.layers.33.block_sparse_moe.experts.254.w2", "model.layers.33.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0009441103786230198, "dbits": 1207959552 } ] }, { "idx": 170, "layers": [ "model.layers.34.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0019503604620695336, "dbits": 18874368 } ] }, { "idx": 171, "layers": [ "model.layers.34.self_attn.k_proj", "model.layers.34.self_attn.v_proj" ], "candidates": [ { "dkld": -0.003720904514193546, "dbits": 6291456 } ] }, { "idx": 172, "layers": [ "model.layers.34.self_attn.o_proj" ], "candidates": [ { "dkld": 0.006703591719269764, "dbits": 18874368 } ] }, { "idx": 173, "layers": [ "model.layers.34.block_sparse_moe.experts.0.w1", "model.layers.34.block_sparse_moe.experts.1.w1", "model.layers.34.block_sparse_moe.experts.2.w1", "model.layers.34.block_sparse_moe.experts.3.w1", "model.layers.34.block_sparse_moe.experts.4.w1", "model.layers.34.block_sparse_moe.experts.5.w1", "model.layers.34.block_sparse_moe.experts.6.w1", "model.layers.34.block_sparse_moe.experts.7.w1", "model.layers.34.block_sparse_moe.experts.8.w1", "model.layers.34.block_sparse_moe.experts.9.w1", "model.layers.34.block_sparse_moe.experts.10.w1", "model.layers.34.block_sparse_moe.experts.11.w1", "model.layers.34.block_sparse_moe.experts.12.w1", "model.layers.34.block_sparse_moe.experts.13.w1", "model.layers.34.block_sparse_moe.experts.14.w1", "model.layers.34.block_sparse_moe.experts.15.w1", "model.layers.34.block_sparse_moe.experts.16.w1", "model.layers.34.block_sparse_moe.experts.17.w1", "model.layers.34.block_sparse_moe.experts.18.w1", "model.layers.34.block_sparse_moe.experts.19.w1", "model.layers.34.block_sparse_moe.experts.20.w1", "model.layers.34.block_sparse_moe.experts.21.w1", "model.layers.34.block_sparse_moe.experts.22.w1", "model.layers.34.block_sparse_moe.experts.23.w1", "model.layers.34.block_sparse_moe.experts.24.w1", "model.layers.34.block_sparse_moe.experts.25.w1", "model.layers.34.block_sparse_moe.experts.26.w1", "model.layers.34.block_sparse_moe.experts.27.w1", "model.layers.34.block_sparse_moe.experts.28.w1", "model.layers.34.block_sparse_moe.experts.29.w1", "model.layers.34.block_sparse_moe.experts.30.w1", "model.layers.34.block_sparse_moe.experts.31.w1", "model.layers.34.block_sparse_moe.experts.32.w1", "model.layers.34.block_sparse_moe.experts.33.w1", "model.layers.34.block_sparse_moe.experts.34.w1", "model.layers.34.block_sparse_moe.experts.35.w1", "model.layers.34.block_sparse_moe.experts.36.w1", "model.layers.34.block_sparse_moe.experts.37.w1", "model.layers.34.block_sparse_moe.experts.38.w1", "model.layers.34.block_sparse_moe.experts.39.w1", "model.layers.34.block_sparse_moe.experts.40.w1", "model.layers.34.block_sparse_moe.experts.41.w1", "model.layers.34.block_sparse_moe.experts.42.w1", "model.layers.34.block_sparse_moe.experts.43.w1", "model.layers.34.block_sparse_moe.experts.44.w1", "model.layers.34.block_sparse_moe.experts.45.w1", "model.layers.34.block_sparse_moe.experts.46.w1", "model.layers.34.block_sparse_moe.experts.47.w1", "model.layers.34.block_sparse_moe.experts.48.w1", "model.layers.34.block_sparse_moe.experts.49.w1", "model.layers.34.block_sparse_moe.experts.50.w1", "model.layers.34.block_sparse_moe.experts.51.w1", "model.layers.34.block_sparse_moe.experts.52.w1", "model.layers.34.block_sparse_moe.experts.53.w1", "model.layers.34.block_sparse_moe.experts.54.w1", "model.layers.34.block_sparse_moe.experts.55.w1", "model.layers.34.block_sparse_moe.experts.56.w1", "model.layers.34.block_sparse_moe.experts.57.w1", "model.layers.34.block_sparse_moe.experts.58.w1", "model.layers.34.block_sparse_moe.experts.59.w1", "model.layers.34.block_sparse_moe.experts.60.w1", "model.layers.34.block_sparse_moe.experts.61.w1", "model.layers.34.block_sparse_moe.experts.62.w1", "model.layers.34.block_sparse_moe.experts.63.w1", "model.layers.34.block_sparse_moe.experts.64.w1", "model.layers.34.block_sparse_moe.experts.65.w1", "model.layers.34.block_sparse_moe.experts.66.w1", "model.layers.34.block_sparse_moe.experts.67.w1", "model.layers.34.block_sparse_moe.experts.68.w1", "model.layers.34.block_sparse_moe.experts.69.w1", "model.layers.34.block_sparse_moe.experts.70.w1", "model.layers.34.block_sparse_moe.experts.71.w1", "model.layers.34.block_sparse_moe.experts.72.w1", "model.layers.34.block_sparse_moe.experts.73.w1", "model.layers.34.block_sparse_moe.experts.74.w1", "model.layers.34.block_sparse_moe.experts.75.w1", "model.layers.34.block_sparse_moe.experts.76.w1", "model.layers.34.block_sparse_moe.experts.77.w1", "model.layers.34.block_sparse_moe.experts.78.w1", "model.layers.34.block_sparse_moe.experts.79.w1", "model.layers.34.block_sparse_moe.experts.80.w1", "model.layers.34.block_sparse_moe.experts.81.w1", "model.layers.34.block_sparse_moe.experts.82.w1", "model.layers.34.block_sparse_moe.experts.83.w1", "model.layers.34.block_sparse_moe.experts.84.w1", "model.layers.34.block_sparse_moe.experts.85.w1", "model.layers.34.block_sparse_moe.experts.86.w1", "model.layers.34.block_sparse_moe.experts.87.w1", "model.layers.34.block_sparse_moe.experts.88.w1", "model.layers.34.block_sparse_moe.experts.89.w1", "model.layers.34.block_sparse_moe.experts.90.w1", "model.layers.34.block_sparse_moe.experts.91.w1", "model.layers.34.block_sparse_moe.experts.92.w1", "model.layers.34.block_sparse_moe.experts.93.w1", "model.layers.34.block_sparse_moe.experts.94.w1", "model.layers.34.block_sparse_moe.experts.95.w1", "model.layers.34.block_sparse_moe.experts.96.w1", "model.layers.34.block_sparse_moe.experts.97.w1", "model.layers.34.block_sparse_moe.experts.98.w1", "model.layers.34.block_sparse_moe.experts.99.w1", "model.layers.34.block_sparse_moe.experts.100.w1", "model.layers.34.block_sparse_moe.experts.101.w1", "model.layers.34.block_sparse_moe.experts.102.w1", "model.layers.34.block_sparse_moe.experts.103.w1", "model.layers.34.block_sparse_moe.experts.104.w1", "model.layers.34.block_sparse_moe.experts.105.w1", "model.layers.34.block_sparse_moe.experts.106.w1", "model.layers.34.block_sparse_moe.experts.107.w1", "model.layers.34.block_sparse_moe.experts.108.w1", "model.layers.34.block_sparse_moe.experts.109.w1", "model.layers.34.block_sparse_moe.experts.110.w1", "model.layers.34.block_sparse_moe.experts.111.w1", "model.layers.34.block_sparse_moe.experts.112.w1", "model.layers.34.block_sparse_moe.experts.113.w1", "model.layers.34.block_sparse_moe.experts.114.w1", "model.layers.34.block_sparse_moe.experts.115.w1", "model.layers.34.block_sparse_moe.experts.116.w1", "model.layers.34.block_sparse_moe.experts.117.w1", "model.layers.34.block_sparse_moe.experts.118.w1", "model.layers.34.block_sparse_moe.experts.119.w1", "model.layers.34.block_sparse_moe.experts.120.w1", "model.layers.34.block_sparse_moe.experts.121.w1", "model.layers.34.block_sparse_moe.experts.122.w1", "model.layers.34.block_sparse_moe.experts.123.w1", "model.layers.34.block_sparse_moe.experts.124.w1", "model.layers.34.block_sparse_moe.experts.125.w1", "model.layers.34.block_sparse_moe.experts.126.w1", "model.layers.34.block_sparse_moe.experts.127.w1", "model.layers.34.block_sparse_moe.experts.128.w1", "model.layers.34.block_sparse_moe.experts.129.w1", "model.layers.34.block_sparse_moe.experts.130.w1", "model.layers.34.block_sparse_moe.experts.131.w1", "model.layers.34.block_sparse_moe.experts.132.w1", "model.layers.34.block_sparse_moe.experts.133.w1", "model.layers.34.block_sparse_moe.experts.134.w1", "model.layers.34.block_sparse_moe.experts.135.w1", "model.layers.34.block_sparse_moe.experts.136.w1", "model.layers.34.block_sparse_moe.experts.137.w1", "model.layers.34.block_sparse_moe.experts.138.w1", "model.layers.34.block_sparse_moe.experts.139.w1", "model.layers.34.block_sparse_moe.experts.140.w1", "model.layers.34.block_sparse_moe.experts.141.w1", "model.layers.34.block_sparse_moe.experts.142.w1", "model.layers.34.block_sparse_moe.experts.143.w1", "model.layers.34.block_sparse_moe.experts.144.w1", "model.layers.34.block_sparse_moe.experts.145.w1", "model.layers.34.block_sparse_moe.experts.146.w1", "model.layers.34.block_sparse_moe.experts.147.w1", "model.layers.34.block_sparse_moe.experts.148.w1", "model.layers.34.block_sparse_moe.experts.149.w1", "model.layers.34.block_sparse_moe.experts.150.w1", "model.layers.34.block_sparse_moe.experts.151.w1", "model.layers.34.block_sparse_moe.experts.152.w1", "model.layers.34.block_sparse_moe.experts.153.w1", "model.layers.34.block_sparse_moe.experts.154.w1", "model.layers.34.block_sparse_moe.experts.155.w1", "model.layers.34.block_sparse_moe.experts.156.w1", "model.layers.34.block_sparse_moe.experts.157.w1", "model.layers.34.block_sparse_moe.experts.158.w1", "model.layers.34.block_sparse_moe.experts.159.w1", "model.layers.34.block_sparse_moe.experts.160.w1", "model.layers.34.block_sparse_moe.experts.161.w1", "model.layers.34.block_sparse_moe.experts.162.w1", "model.layers.34.block_sparse_moe.experts.163.w1", "model.layers.34.block_sparse_moe.experts.164.w1", "model.layers.34.block_sparse_moe.experts.165.w1", "model.layers.34.block_sparse_moe.experts.166.w1", "model.layers.34.block_sparse_moe.experts.167.w1", "model.layers.34.block_sparse_moe.experts.168.w1", "model.layers.34.block_sparse_moe.experts.169.w1", "model.layers.34.block_sparse_moe.experts.170.w1", "model.layers.34.block_sparse_moe.experts.171.w1", "model.layers.34.block_sparse_moe.experts.172.w1", "model.layers.34.block_sparse_moe.experts.173.w1", "model.layers.34.block_sparse_moe.experts.174.w1", "model.layers.34.block_sparse_moe.experts.175.w1", "model.layers.34.block_sparse_moe.experts.176.w1", "model.layers.34.block_sparse_moe.experts.177.w1", "model.layers.34.block_sparse_moe.experts.178.w1", "model.layers.34.block_sparse_moe.experts.179.w1", "model.layers.34.block_sparse_moe.experts.180.w1", "model.layers.34.block_sparse_moe.experts.181.w1", "model.layers.34.block_sparse_moe.experts.182.w1", "model.layers.34.block_sparse_moe.experts.183.w1", "model.layers.34.block_sparse_moe.experts.184.w1", "model.layers.34.block_sparse_moe.experts.185.w1", "model.layers.34.block_sparse_moe.experts.186.w1", "model.layers.34.block_sparse_moe.experts.187.w1", "model.layers.34.block_sparse_moe.experts.188.w1", "model.layers.34.block_sparse_moe.experts.189.w1", "model.layers.34.block_sparse_moe.experts.190.w1", "model.layers.34.block_sparse_moe.experts.191.w1", "model.layers.34.block_sparse_moe.experts.192.w1", "model.layers.34.block_sparse_moe.experts.193.w1", "model.layers.34.block_sparse_moe.experts.194.w1", "model.layers.34.block_sparse_moe.experts.195.w1", "model.layers.34.block_sparse_moe.experts.196.w1", "model.layers.34.block_sparse_moe.experts.197.w1", "model.layers.34.block_sparse_moe.experts.198.w1", "model.layers.34.block_sparse_moe.experts.199.w1", "model.layers.34.block_sparse_moe.experts.200.w1", "model.layers.34.block_sparse_moe.experts.201.w1", "model.layers.34.block_sparse_moe.experts.202.w1", "model.layers.34.block_sparse_moe.experts.203.w1", "model.layers.34.block_sparse_moe.experts.204.w1", "model.layers.34.block_sparse_moe.experts.205.w1", "model.layers.34.block_sparse_moe.experts.206.w1", "model.layers.34.block_sparse_moe.experts.207.w1", "model.layers.34.block_sparse_moe.experts.208.w1", "model.layers.34.block_sparse_moe.experts.209.w1", "model.layers.34.block_sparse_moe.experts.210.w1", "model.layers.34.block_sparse_moe.experts.211.w1", "model.layers.34.block_sparse_moe.experts.212.w1", "model.layers.34.block_sparse_moe.experts.213.w1", "model.layers.34.block_sparse_moe.experts.214.w1", "model.layers.34.block_sparse_moe.experts.215.w1", "model.layers.34.block_sparse_moe.experts.216.w1", "model.layers.34.block_sparse_moe.experts.217.w1", "model.layers.34.block_sparse_moe.experts.218.w1", "model.layers.34.block_sparse_moe.experts.219.w1", "model.layers.34.block_sparse_moe.experts.220.w1", "model.layers.34.block_sparse_moe.experts.221.w1", "model.layers.34.block_sparse_moe.experts.222.w1", "model.layers.34.block_sparse_moe.experts.223.w1", "model.layers.34.block_sparse_moe.experts.224.w1", "model.layers.34.block_sparse_moe.experts.225.w1", "model.layers.34.block_sparse_moe.experts.226.w1", "model.layers.34.block_sparse_moe.experts.227.w1", "model.layers.34.block_sparse_moe.experts.228.w1", "model.layers.34.block_sparse_moe.experts.229.w1", "model.layers.34.block_sparse_moe.experts.230.w1", "model.layers.34.block_sparse_moe.experts.231.w1", "model.layers.34.block_sparse_moe.experts.232.w1", "model.layers.34.block_sparse_moe.experts.233.w1", "model.layers.34.block_sparse_moe.experts.234.w1", "model.layers.34.block_sparse_moe.experts.235.w1", "model.layers.34.block_sparse_moe.experts.236.w1", "model.layers.34.block_sparse_moe.experts.237.w1", "model.layers.34.block_sparse_moe.experts.238.w1", "model.layers.34.block_sparse_moe.experts.239.w1", "model.layers.34.block_sparse_moe.experts.240.w1", "model.layers.34.block_sparse_moe.experts.241.w1", "model.layers.34.block_sparse_moe.experts.242.w1", "model.layers.34.block_sparse_moe.experts.243.w1", "model.layers.34.block_sparse_moe.experts.244.w1", "model.layers.34.block_sparse_moe.experts.245.w1", "model.layers.34.block_sparse_moe.experts.246.w1", "model.layers.34.block_sparse_moe.experts.247.w1", "model.layers.34.block_sparse_moe.experts.248.w1", "model.layers.34.block_sparse_moe.experts.249.w1", "model.layers.34.block_sparse_moe.experts.250.w1", "model.layers.34.block_sparse_moe.experts.251.w1", "model.layers.34.block_sparse_moe.experts.252.w1", "model.layers.34.block_sparse_moe.experts.253.w1", "model.layers.34.block_sparse_moe.experts.254.w1", "model.layers.34.block_sparse_moe.experts.255.w1", "model.layers.34.block_sparse_moe.experts.0.w3", "model.layers.34.block_sparse_moe.experts.1.w3", "model.layers.34.block_sparse_moe.experts.2.w3", "model.layers.34.block_sparse_moe.experts.3.w3", "model.layers.34.block_sparse_moe.experts.4.w3", "model.layers.34.block_sparse_moe.experts.5.w3", "model.layers.34.block_sparse_moe.experts.6.w3", "model.layers.34.block_sparse_moe.experts.7.w3", "model.layers.34.block_sparse_moe.experts.8.w3", "model.layers.34.block_sparse_moe.experts.9.w3", "model.layers.34.block_sparse_moe.experts.10.w3", "model.layers.34.block_sparse_moe.experts.11.w3", "model.layers.34.block_sparse_moe.experts.12.w3", "model.layers.34.block_sparse_moe.experts.13.w3", "model.layers.34.block_sparse_moe.experts.14.w3", "model.layers.34.block_sparse_moe.experts.15.w3", "model.layers.34.block_sparse_moe.experts.16.w3", "model.layers.34.block_sparse_moe.experts.17.w3", "model.layers.34.block_sparse_moe.experts.18.w3", "model.layers.34.block_sparse_moe.experts.19.w3", "model.layers.34.block_sparse_moe.experts.20.w3", "model.layers.34.block_sparse_moe.experts.21.w3", "model.layers.34.block_sparse_moe.experts.22.w3", "model.layers.34.block_sparse_moe.experts.23.w3", "model.layers.34.block_sparse_moe.experts.24.w3", "model.layers.34.block_sparse_moe.experts.25.w3", "model.layers.34.block_sparse_moe.experts.26.w3", "model.layers.34.block_sparse_moe.experts.27.w3", "model.layers.34.block_sparse_moe.experts.28.w3", "model.layers.34.block_sparse_moe.experts.29.w3", "model.layers.34.block_sparse_moe.experts.30.w3", "model.layers.34.block_sparse_moe.experts.31.w3", "model.layers.34.block_sparse_moe.experts.32.w3", "model.layers.34.block_sparse_moe.experts.33.w3", "model.layers.34.block_sparse_moe.experts.34.w3", "model.layers.34.block_sparse_moe.experts.35.w3", "model.layers.34.block_sparse_moe.experts.36.w3", "model.layers.34.block_sparse_moe.experts.37.w3", "model.layers.34.block_sparse_moe.experts.38.w3", "model.layers.34.block_sparse_moe.experts.39.w3", "model.layers.34.block_sparse_moe.experts.40.w3", "model.layers.34.block_sparse_moe.experts.41.w3", "model.layers.34.block_sparse_moe.experts.42.w3", "model.layers.34.block_sparse_moe.experts.43.w3", "model.layers.34.block_sparse_moe.experts.44.w3", "model.layers.34.block_sparse_moe.experts.45.w3", "model.layers.34.block_sparse_moe.experts.46.w3", "model.layers.34.block_sparse_moe.experts.47.w3", "model.layers.34.block_sparse_moe.experts.48.w3", "model.layers.34.block_sparse_moe.experts.49.w3", "model.layers.34.block_sparse_moe.experts.50.w3", "model.layers.34.block_sparse_moe.experts.51.w3", "model.layers.34.block_sparse_moe.experts.52.w3", "model.layers.34.block_sparse_moe.experts.53.w3", "model.layers.34.block_sparse_moe.experts.54.w3", "model.layers.34.block_sparse_moe.experts.55.w3", "model.layers.34.block_sparse_moe.experts.56.w3", "model.layers.34.block_sparse_moe.experts.57.w3", "model.layers.34.block_sparse_moe.experts.58.w3", "model.layers.34.block_sparse_moe.experts.59.w3", "model.layers.34.block_sparse_moe.experts.60.w3", "model.layers.34.block_sparse_moe.experts.61.w3", "model.layers.34.block_sparse_moe.experts.62.w3", "model.layers.34.block_sparse_moe.experts.63.w3", "model.layers.34.block_sparse_moe.experts.64.w3", "model.layers.34.block_sparse_moe.experts.65.w3", "model.layers.34.block_sparse_moe.experts.66.w3", "model.layers.34.block_sparse_moe.experts.67.w3", "model.layers.34.block_sparse_moe.experts.68.w3", "model.layers.34.block_sparse_moe.experts.69.w3", "model.layers.34.block_sparse_moe.experts.70.w3", "model.layers.34.block_sparse_moe.experts.71.w3", "model.layers.34.block_sparse_moe.experts.72.w3", "model.layers.34.block_sparse_moe.experts.73.w3", "model.layers.34.block_sparse_moe.experts.74.w3", "model.layers.34.block_sparse_moe.experts.75.w3", "model.layers.34.block_sparse_moe.experts.76.w3", "model.layers.34.block_sparse_moe.experts.77.w3", "model.layers.34.block_sparse_moe.experts.78.w3", "model.layers.34.block_sparse_moe.experts.79.w3", "model.layers.34.block_sparse_moe.experts.80.w3", "model.layers.34.block_sparse_moe.experts.81.w3", "model.layers.34.block_sparse_moe.experts.82.w3", "model.layers.34.block_sparse_moe.experts.83.w3", "model.layers.34.block_sparse_moe.experts.84.w3", "model.layers.34.block_sparse_moe.experts.85.w3", "model.layers.34.block_sparse_moe.experts.86.w3", "model.layers.34.block_sparse_moe.experts.87.w3", "model.layers.34.block_sparse_moe.experts.88.w3", "model.layers.34.block_sparse_moe.experts.89.w3", "model.layers.34.block_sparse_moe.experts.90.w3", "model.layers.34.block_sparse_moe.experts.91.w3", "model.layers.34.block_sparse_moe.experts.92.w3", "model.layers.34.block_sparse_moe.experts.93.w3", "model.layers.34.block_sparse_moe.experts.94.w3", "model.layers.34.block_sparse_moe.experts.95.w3", "model.layers.34.block_sparse_moe.experts.96.w3", "model.layers.34.block_sparse_moe.experts.97.w3", "model.layers.34.block_sparse_moe.experts.98.w3", "model.layers.34.block_sparse_moe.experts.99.w3", "model.layers.34.block_sparse_moe.experts.100.w3", "model.layers.34.block_sparse_moe.experts.101.w3", "model.layers.34.block_sparse_moe.experts.102.w3", "model.layers.34.block_sparse_moe.experts.103.w3", "model.layers.34.block_sparse_moe.experts.104.w3", "model.layers.34.block_sparse_moe.experts.105.w3", "model.layers.34.block_sparse_moe.experts.106.w3", "model.layers.34.block_sparse_moe.experts.107.w3", "model.layers.34.block_sparse_moe.experts.108.w3", "model.layers.34.block_sparse_moe.experts.109.w3", "model.layers.34.block_sparse_moe.experts.110.w3", "model.layers.34.block_sparse_moe.experts.111.w3", "model.layers.34.block_sparse_moe.experts.112.w3", "model.layers.34.block_sparse_moe.experts.113.w3", "model.layers.34.block_sparse_moe.experts.114.w3", "model.layers.34.block_sparse_moe.experts.115.w3", "model.layers.34.block_sparse_moe.experts.116.w3", "model.layers.34.block_sparse_moe.experts.117.w3", "model.layers.34.block_sparse_moe.experts.118.w3", "model.layers.34.block_sparse_moe.experts.119.w3", "model.layers.34.block_sparse_moe.experts.120.w3", "model.layers.34.block_sparse_moe.experts.121.w3", "model.layers.34.block_sparse_moe.experts.122.w3", "model.layers.34.block_sparse_moe.experts.123.w3", "model.layers.34.block_sparse_moe.experts.124.w3", "model.layers.34.block_sparse_moe.experts.125.w3", "model.layers.34.block_sparse_moe.experts.126.w3", "model.layers.34.block_sparse_moe.experts.127.w3", "model.layers.34.block_sparse_moe.experts.128.w3", "model.layers.34.block_sparse_moe.experts.129.w3", "model.layers.34.block_sparse_moe.experts.130.w3", "model.layers.34.block_sparse_moe.experts.131.w3", "model.layers.34.block_sparse_moe.experts.132.w3", "model.layers.34.block_sparse_moe.experts.133.w3", "model.layers.34.block_sparse_moe.experts.134.w3", "model.layers.34.block_sparse_moe.experts.135.w3", "model.layers.34.block_sparse_moe.experts.136.w3", "model.layers.34.block_sparse_moe.experts.137.w3", "model.layers.34.block_sparse_moe.experts.138.w3", "model.layers.34.block_sparse_moe.experts.139.w3", "model.layers.34.block_sparse_moe.experts.140.w3", "model.layers.34.block_sparse_moe.experts.141.w3", "model.layers.34.block_sparse_moe.experts.142.w3", "model.layers.34.block_sparse_moe.experts.143.w3", "model.layers.34.block_sparse_moe.experts.144.w3", "model.layers.34.block_sparse_moe.experts.145.w3", "model.layers.34.block_sparse_moe.experts.146.w3", "model.layers.34.block_sparse_moe.experts.147.w3", "model.layers.34.block_sparse_moe.experts.148.w3", "model.layers.34.block_sparse_moe.experts.149.w3", "model.layers.34.block_sparse_moe.experts.150.w3", "model.layers.34.block_sparse_moe.experts.151.w3", "model.layers.34.block_sparse_moe.experts.152.w3", "model.layers.34.block_sparse_moe.experts.153.w3", "model.layers.34.block_sparse_moe.experts.154.w3", "model.layers.34.block_sparse_moe.experts.155.w3", "model.layers.34.block_sparse_moe.experts.156.w3", "model.layers.34.block_sparse_moe.experts.157.w3", "model.layers.34.block_sparse_moe.experts.158.w3", "model.layers.34.block_sparse_moe.experts.159.w3", "model.layers.34.block_sparse_moe.experts.160.w3", "model.layers.34.block_sparse_moe.experts.161.w3", "model.layers.34.block_sparse_moe.experts.162.w3", "model.layers.34.block_sparse_moe.experts.163.w3", "model.layers.34.block_sparse_moe.experts.164.w3", "model.layers.34.block_sparse_moe.experts.165.w3", "model.layers.34.block_sparse_moe.experts.166.w3", "model.layers.34.block_sparse_moe.experts.167.w3", "model.layers.34.block_sparse_moe.experts.168.w3", "model.layers.34.block_sparse_moe.experts.169.w3", "model.layers.34.block_sparse_moe.experts.170.w3", "model.layers.34.block_sparse_moe.experts.171.w3", "model.layers.34.block_sparse_moe.experts.172.w3", "model.layers.34.block_sparse_moe.experts.173.w3", "model.layers.34.block_sparse_moe.experts.174.w3", "model.layers.34.block_sparse_moe.experts.175.w3", "model.layers.34.block_sparse_moe.experts.176.w3", "model.layers.34.block_sparse_moe.experts.177.w3", "model.layers.34.block_sparse_moe.experts.178.w3", "model.layers.34.block_sparse_moe.experts.179.w3", "model.layers.34.block_sparse_moe.experts.180.w3", "model.layers.34.block_sparse_moe.experts.181.w3", "model.layers.34.block_sparse_moe.experts.182.w3", "model.layers.34.block_sparse_moe.experts.183.w3", "model.layers.34.block_sparse_moe.experts.184.w3", "model.layers.34.block_sparse_moe.experts.185.w3", "model.layers.34.block_sparse_moe.experts.186.w3", "model.layers.34.block_sparse_moe.experts.187.w3", "model.layers.34.block_sparse_moe.experts.188.w3", "model.layers.34.block_sparse_moe.experts.189.w3", "model.layers.34.block_sparse_moe.experts.190.w3", "model.layers.34.block_sparse_moe.experts.191.w3", "model.layers.34.block_sparse_moe.experts.192.w3", "model.layers.34.block_sparse_moe.experts.193.w3", "model.layers.34.block_sparse_moe.experts.194.w3", "model.layers.34.block_sparse_moe.experts.195.w3", "model.layers.34.block_sparse_moe.experts.196.w3", "model.layers.34.block_sparse_moe.experts.197.w3", "model.layers.34.block_sparse_moe.experts.198.w3", "model.layers.34.block_sparse_moe.experts.199.w3", "model.layers.34.block_sparse_moe.experts.200.w3", "model.layers.34.block_sparse_moe.experts.201.w3", "model.layers.34.block_sparse_moe.experts.202.w3", "model.layers.34.block_sparse_moe.experts.203.w3", "model.layers.34.block_sparse_moe.experts.204.w3", "model.layers.34.block_sparse_moe.experts.205.w3", "model.layers.34.block_sparse_moe.experts.206.w3", "model.layers.34.block_sparse_moe.experts.207.w3", "model.layers.34.block_sparse_moe.experts.208.w3", "model.layers.34.block_sparse_moe.experts.209.w3", "model.layers.34.block_sparse_moe.experts.210.w3", "model.layers.34.block_sparse_moe.experts.211.w3", "model.layers.34.block_sparse_moe.experts.212.w3", "model.layers.34.block_sparse_moe.experts.213.w3", "model.layers.34.block_sparse_moe.experts.214.w3", "model.layers.34.block_sparse_moe.experts.215.w3", "model.layers.34.block_sparse_moe.experts.216.w3", "model.layers.34.block_sparse_moe.experts.217.w3", "model.layers.34.block_sparse_moe.experts.218.w3", "model.layers.34.block_sparse_moe.experts.219.w3", "model.layers.34.block_sparse_moe.experts.220.w3", "model.layers.34.block_sparse_moe.experts.221.w3", "model.layers.34.block_sparse_moe.experts.222.w3", "model.layers.34.block_sparse_moe.experts.223.w3", "model.layers.34.block_sparse_moe.experts.224.w3", "model.layers.34.block_sparse_moe.experts.225.w3", "model.layers.34.block_sparse_moe.experts.226.w3", "model.layers.34.block_sparse_moe.experts.227.w3", "model.layers.34.block_sparse_moe.experts.228.w3", "model.layers.34.block_sparse_moe.experts.229.w3", "model.layers.34.block_sparse_moe.experts.230.w3", "model.layers.34.block_sparse_moe.experts.231.w3", "model.layers.34.block_sparse_moe.experts.232.w3", "model.layers.34.block_sparse_moe.experts.233.w3", "model.layers.34.block_sparse_moe.experts.234.w3", "model.layers.34.block_sparse_moe.experts.235.w3", "model.layers.34.block_sparse_moe.experts.236.w3", "model.layers.34.block_sparse_moe.experts.237.w3", "model.layers.34.block_sparse_moe.experts.238.w3", "model.layers.34.block_sparse_moe.experts.239.w3", "model.layers.34.block_sparse_moe.experts.240.w3", "model.layers.34.block_sparse_moe.experts.241.w3", "model.layers.34.block_sparse_moe.experts.242.w3", "model.layers.34.block_sparse_moe.experts.243.w3", "model.layers.34.block_sparse_moe.experts.244.w3", "model.layers.34.block_sparse_moe.experts.245.w3", "model.layers.34.block_sparse_moe.experts.246.w3", "model.layers.34.block_sparse_moe.experts.247.w3", "model.layers.34.block_sparse_moe.experts.248.w3", "model.layers.34.block_sparse_moe.experts.249.w3", "model.layers.34.block_sparse_moe.experts.250.w3", "model.layers.34.block_sparse_moe.experts.251.w3", "model.layers.34.block_sparse_moe.experts.252.w3", "model.layers.34.block_sparse_moe.experts.253.w3", "model.layers.34.block_sparse_moe.experts.254.w3", "model.layers.34.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0007704831659793854, "dbits": 2415919104 } ] }, { "idx": 174, "layers": [ "model.layers.34.block_sparse_moe.experts.0.w2", "model.layers.34.block_sparse_moe.experts.1.w2", "model.layers.34.block_sparse_moe.experts.2.w2", "model.layers.34.block_sparse_moe.experts.3.w2", "model.layers.34.block_sparse_moe.experts.4.w2", "model.layers.34.block_sparse_moe.experts.5.w2", "model.layers.34.block_sparse_moe.experts.6.w2", "model.layers.34.block_sparse_moe.experts.7.w2", "model.layers.34.block_sparse_moe.experts.8.w2", "model.layers.34.block_sparse_moe.experts.9.w2", "model.layers.34.block_sparse_moe.experts.10.w2", "model.layers.34.block_sparse_moe.experts.11.w2", "model.layers.34.block_sparse_moe.experts.12.w2", "model.layers.34.block_sparse_moe.experts.13.w2", "model.layers.34.block_sparse_moe.experts.14.w2", "model.layers.34.block_sparse_moe.experts.15.w2", "model.layers.34.block_sparse_moe.experts.16.w2", "model.layers.34.block_sparse_moe.experts.17.w2", "model.layers.34.block_sparse_moe.experts.18.w2", "model.layers.34.block_sparse_moe.experts.19.w2", "model.layers.34.block_sparse_moe.experts.20.w2", "model.layers.34.block_sparse_moe.experts.21.w2", "model.layers.34.block_sparse_moe.experts.22.w2", "model.layers.34.block_sparse_moe.experts.23.w2", "model.layers.34.block_sparse_moe.experts.24.w2", "model.layers.34.block_sparse_moe.experts.25.w2", "model.layers.34.block_sparse_moe.experts.26.w2", "model.layers.34.block_sparse_moe.experts.27.w2", "model.layers.34.block_sparse_moe.experts.28.w2", "model.layers.34.block_sparse_moe.experts.29.w2", "model.layers.34.block_sparse_moe.experts.30.w2", "model.layers.34.block_sparse_moe.experts.31.w2", "model.layers.34.block_sparse_moe.experts.32.w2", "model.layers.34.block_sparse_moe.experts.33.w2", "model.layers.34.block_sparse_moe.experts.34.w2", "model.layers.34.block_sparse_moe.experts.35.w2", "model.layers.34.block_sparse_moe.experts.36.w2", "model.layers.34.block_sparse_moe.experts.37.w2", "model.layers.34.block_sparse_moe.experts.38.w2", "model.layers.34.block_sparse_moe.experts.39.w2", "model.layers.34.block_sparse_moe.experts.40.w2", "model.layers.34.block_sparse_moe.experts.41.w2", "model.layers.34.block_sparse_moe.experts.42.w2", "model.layers.34.block_sparse_moe.experts.43.w2", "model.layers.34.block_sparse_moe.experts.44.w2", "model.layers.34.block_sparse_moe.experts.45.w2", "model.layers.34.block_sparse_moe.experts.46.w2", "model.layers.34.block_sparse_moe.experts.47.w2", "model.layers.34.block_sparse_moe.experts.48.w2", "model.layers.34.block_sparse_moe.experts.49.w2", "model.layers.34.block_sparse_moe.experts.50.w2", "model.layers.34.block_sparse_moe.experts.51.w2", "model.layers.34.block_sparse_moe.experts.52.w2", "model.layers.34.block_sparse_moe.experts.53.w2", "model.layers.34.block_sparse_moe.experts.54.w2", "model.layers.34.block_sparse_moe.experts.55.w2", "model.layers.34.block_sparse_moe.experts.56.w2", "model.layers.34.block_sparse_moe.experts.57.w2", "model.layers.34.block_sparse_moe.experts.58.w2", "model.layers.34.block_sparse_moe.experts.59.w2", "model.layers.34.block_sparse_moe.experts.60.w2", "model.layers.34.block_sparse_moe.experts.61.w2", "model.layers.34.block_sparse_moe.experts.62.w2", "model.layers.34.block_sparse_moe.experts.63.w2", "model.layers.34.block_sparse_moe.experts.64.w2", "model.layers.34.block_sparse_moe.experts.65.w2", "model.layers.34.block_sparse_moe.experts.66.w2", "model.layers.34.block_sparse_moe.experts.67.w2", "model.layers.34.block_sparse_moe.experts.68.w2", "model.layers.34.block_sparse_moe.experts.69.w2", "model.layers.34.block_sparse_moe.experts.70.w2", "model.layers.34.block_sparse_moe.experts.71.w2", "model.layers.34.block_sparse_moe.experts.72.w2", "model.layers.34.block_sparse_moe.experts.73.w2", "model.layers.34.block_sparse_moe.experts.74.w2", "model.layers.34.block_sparse_moe.experts.75.w2", "model.layers.34.block_sparse_moe.experts.76.w2", "model.layers.34.block_sparse_moe.experts.77.w2", "model.layers.34.block_sparse_moe.experts.78.w2", "model.layers.34.block_sparse_moe.experts.79.w2", "model.layers.34.block_sparse_moe.experts.80.w2", "model.layers.34.block_sparse_moe.experts.81.w2", "model.layers.34.block_sparse_moe.experts.82.w2", "model.layers.34.block_sparse_moe.experts.83.w2", "model.layers.34.block_sparse_moe.experts.84.w2", "model.layers.34.block_sparse_moe.experts.85.w2", "model.layers.34.block_sparse_moe.experts.86.w2", "model.layers.34.block_sparse_moe.experts.87.w2", "model.layers.34.block_sparse_moe.experts.88.w2", "model.layers.34.block_sparse_moe.experts.89.w2", "model.layers.34.block_sparse_moe.experts.90.w2", "model.layers.34.block_sparse_moe.experts.91.w2", "model.layers.34.block_sparse_moe.experts.92.w2", "model.layers.34.block_sparse_moe.experts.93.w2", "model.layers.34.block_sparse_moe.experts.94.w2", "model.layers.34.block_sparse_moe.experts.95.w2", "model.layers.34.block_sparse_moe.experts.96.w2", "model.layers.34.block_sparse_moe.experts.97.w2", "model.layers.34.block_sparse_moe.experts.98.w2", "model.layers.34.block_sparse_moe.experts.99.w2", "model.layers.34.block_sparse_moe.experts.100.w2", "model.layers.34.block_sparse_moe.experts.101.w2", "model.layers.34.block_sparse_moe.experts.102.w2", "model.layers.34.block_sparse_moe.experts.103.w2", "model.layers.34.block_sparse_moe.experts.104.w2", "model.layers.34.block_sparse_moe.experts.105.w2", "model.layers.34.block_sparse_moe.experts.106.w2", "model.layers.34.block_sparse_moe.experts.107.w2", "model.layers.34.block_sparse_moe.experts.108.w2", "model.layers.34.block_sparse_moe.experts.109.w2", "model.layers.34.block_sparse_moe.experts.110.w2", "model.layers.34.block_sparse_moe.experts.111.w2", "model.layers.34.block_sparse_moe.experts.112.w2", "model.layers.34.block_sparse_moe.experts.113.w2", "model.layers.34.block_sparse_moe.experts.114.w2", "model.layers.34.block_sparse_moe.experts.115.w2", "model.layers.34.block_sparse_moe.experts.116.w2", "model.layers.34.block_sparse_moe.experts.117.w2", "model.layers.34.block_sparse_moe.experts.118.w2", "model.layers.34.block_sparse_moe.experts.119.w2", "model.layers.34.block_sparse_moe.experts.120.w2", "model.layers.34.block_sparse_moe.experts.121.w2", "model.layers.34.block_sparse_moe.experts.122.w2", "model.layers.34.block_sparse_moe.experts.123.w2", "model.layers.34.block_sparse_moe.experts.124.w2", "model.layers.34.block_sparse_moe.experts.125.w2", "model.layers.34.block_sparse_moe.experts.126.w2", "model.layers.34.block_sparse_moe.experts.127.w2", "model.layers.34.block_sparse_moe.experts.128.w2", "model.layers.34.block_sparse_moe.experts.129.w2", "model.layers.34.block_sparse_moe.experts.130.w2", "model.layers.34.block_sparse_moe.experts.131.w2", "model.layers.34.block_sparse_moe.experts.132.w2", "model.layers.34.block_sparse_moe.experts.133.w2", "model.layers.34.block_sparse_moe.experts.134.w2", "model.layers.34.block_sparse_moe.experts.135.w2", "model.layers.34.block_sparse_moe.experts.136.w2", "model.layers.34.block_sparse_moe.experts.137.w2", "model.layers.34.block_sparse_moe.experts.138.w2", "model.layers.34.block_sparse_moe.experts.139.w2", "model.layers.34.block_sparse_moe.experts.140.w2", "model.layers.34.block_sparse_moe.experts.141.w2", "model.layers.34.block_sparse_moe.experts.142.w2", "model.layers.34.block_sparse_moe.experts.143.w2", "model.layers.34.block_sparse_moe.experts.144.w2", "model.layers.34.block_sparse_moe.experts.145.w2", "model.layers.34.block_sparse_moe.experts.146.w2", "model.layers.34.block_sparse_moe.experts.147.w2", "model.layers.34.block_sparse_moe.experts.148.w2", "model.layers.34.block_sparse_moe.experts.149.w2", "model.layers.34.block_sparse_moe.experts.150.w2", "model.layers.34.block_sparse_moe.experts.151.w2", "model.layers.34.block_sparse_moe.experts.152.w2", "model.layers.34.block_sparse_moe.experts.153.w2", "model.layers.34.block_sparse_moe.experts.154.w2", "model.layers.34.block_sparse_moe.experts.155.w2", "model.layers.34.block_sparse_moe.experts.156.w2", "model.layers.34.block_sparse_moe.experts.157.w2", "model.layers.34.block_sparse_moe.experts.158.w2", "model.layers.34.block_sparse_moe.experts.159.w2", "model.layers.34.block_sparse_moe.experts.160.w2", "model.layers.34.block_sparse_moe.experts.161.w2", "model.layers.34.block_sparse_moe.experts.162.w2", "model.layers.34.block_sparse_moe.experts.163.w2", "model.layers.34.block_sparse_moe.experts.164.w2", "model.layers.34.block_sparse_moe.experts.165.w2", "model.layers.34.block_sparse_moe.experts.166.w2", "model.layers.34.block_sparse_moe.experts.167.w2", "model.layers.34.block_sparse_moe.experts.168.w2", "model.layers.34.block_sparse_moe.experts.169.w2", "model.layers.34.block_sparse_moe.experts.170.w2", "model.layers.34.block_sparse_moe.experts.171.w2", "model.layers.34.block_sparse_moe.experts.172.w2", "model.layers.34.block_sparse_moe.experts.173.w2", "model.layers.34.block_sparse_moe.experts.174.w2", "model.layers.34.block_sparse_moe.experts.175.w2", "model.layers.34.block_sparse_moe.experts.176.w2", "model.layers.34.block_sparse_moe.experts.177.w2", "model.layers.34.block_sparse_moe.experts.178.w2", "model.layers.34.block_sparse_moe.experts.179.w2", "model.layers.34.block_sparse_moe.experts.180.w2", "model.layers.34.block_sparse_moe.experts.181.w2", "model.layers.34.block_sparse_moe.experts.182.w2", "model.layers.34.block_sparse_moe.experts.183.w2", "model.layers.34.block_sparse_moe.experts.184.w2", "model.layers.34.block_sparse_moe.experts.185.w2", "model.layers.34.block_sparse_moe.experts.186.w2", "model.layers.34.block_sparse_moe.experts.187.w2", "model.layers.34.block_sparse_moe.experts.188.w2", "model.layers.34.block_sparse_moe.experts.189.w2", "model.layers.34.block_sparse_moe.experts.190.w2", "model.layers.34.block_sparse_moe.experts.191.w2", "model.layers.34.block_sparse_moe.experts.192.w2", "model.layers.34.block_sparse_moe.experts.193.w2", "model.layers.34.block_sparse_moe.experts.194.w2", "model.layers.34.block_sparse_moe.experts.195.w2", "model.layers.34.block_sparse_moe.experts.196.w2", "model.layers.34.block_sparse_moe.experts.197.w2", "model.layers.34.block_sparse_moe.experts.198.w2", "model.layers.34.block_sparse_moe.experts.199.w2", "model.layers.34.block_sparse_moe.experts.200.w2", "model.layers.34.block_sparse_moe.experts.201.w2", "model.layers.34.block_sparse_moe.experts.202.w2", "model.layers.34.block_sparse_moe.experts.203.w2", "model.layers.34.block_sparse_moe.experts.204.w2", "model.layers.34.block_sparse_moe.experts.205.w2", "model.layers.34.block_sparse_moe.experts.206.w2", "model.layers.34.block_sparse_moe.experts.207.w2", "model.layers.34.block_sparse_moe.experts.208.w2", "model.layers.34.block_sparse_moe.experts.209.w2", "model.layers.34.block_sparse_moe.experts.210.w2", "model.layers.34.block_sparse_moe.experts.211.w2", "model.layers.34.block_sparse_moe.experts.212.w2", "model.layers.34.block_sparse_moe.experts.213.w2", "model.layers.34.block_sparse_moe.experts.214.w2", "model.layers.34.block_sparse_moe.experts.215.w2", "model.layers.34.block_sparse_moe.experts.216.w2", "model.layers.34.block_sparse_moe.experts.217.w2", "model.layers.34.block_sparse_moe.experts.218.w2", "model.layers.34.block_sparse_moe.experts.219.w2", "model.layers.34.block_sparse_moe.experts.220.w2", "model.layers.34.block_sparse_moe.experts.221.w2", "model.layers.34.block_sparse_moe.experts.222.w2", "model.layers.34.block_sparse_moe.experts.223.w2", "model.layers.34.block_sparse_moe.experts.224.w2", "model.layers.34.block_sparse_moe.experts.225.w2", "model.layers.34.block_sparse_moe.experts.226.w2", "model.layers.34.block_sparse_moe.experts.227.w2", "model.layers.34.block_sparse_moe.experts.228.w2", "model.layers.34.block_sparse_moe.experts.229.w2", "model.layers.34.block_sparse_moe.experts.230.w2", "model.layers.34.block_sparse_moe.experts.231.w2", "model.layers.34.block_sparse_moe.experts.232.w2", "model.layers.34.block_sparse_moe.experts.233.w2", "model.layers.34.block_sparse_moe.experts.234.w2", "model.layers.34.block_sparse_moe.experts.235.w2", "model.layers.34.block_sparse_moe.experts.236.w2", "model.layers.34.block_sparse_moe.experts.237.w2", "model.layers.34.block_sparse_moe.experts.238.w2", "model.layers.34.block_sparse_moe.experts.239.w2", "model.layers.34.block_sparse_moe.experts.240.w2", "model.layers.34.block_sparse_moe.experts.241.w2", "model.layers.34.block_sparse_moe.experts.242.w2", "model.layers.34.block_sparse_moe.experts.243.w2", "model.layers.34.block_sparse_moe.experts.244.w2", "model.layers.34.block_sparse_moe.experts.245.w2", "model.layers.34.block_sparse_moe.experts.246.w2", "model.layers.34.block_sparse_moe.experts.247.w2", "model.layers.34.block_sparse_moe.experts.248.w2", "model.layers.34.block_sparse_moe.experts.249.w2", "model.layers.34.block_sparse_moe.experts.250.w2", "model.layers.34.block_sparse_moe.experts.251.w2", "model.layers.34.block_sparse_moe.experts.252.w2", "model.layers.34.block_sparse_moe.experts.253.w2", "model.layers.34.block_sparse_moe.experts.254.w2", "model.layers.34.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0002849336713552364, "dbits": 1207959552 } ] }, { "idx": 175, "layers": [ "model.layers.35.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0014840636402368435, "dbits": 18874368 } ] }, { "idx": 176, "layers": [ "model.layers.35.self_attn.k_proj", "model.layers.35.self_attn.v_proj" ], "candidates": [ { "dkld": 0.015123725309967972, "dbits": 6291456 } ] }, { "idx": 177, "layers": [ "model.layers.35.self_attn.o_proj" ], "candidates": [ { "dkld": -0.006342316046357155, "dbits": 18874368 } ] }, { "idx": 178, "layers": [ "model.layers.35.block_sparse_moe.experts.0.w1", "model.layers.35.block_sparse_moe.experts.1.w1", "model.layers.35.block_sparse_moe.experts.2.w1", "model.layers.35.block_sparse_moe.experts.3.w1", "model.layers.35.block_sparse_moe.experts.4.w1", "model.layers.35.block_sparse_moe.experts.5.w1", "model.layers.35.block_sparse_moe.experts.6.w1", "model.layers.35.block_sparse_moe.experts.7.w1", "model.layers.35.block_sparse_moe.experts.8.w1", "model.layers.35.block_sparse_moe.experts.9.w1", "model.layers.35.block_sparse_moe.experts.10.w1", "model.layers.35.block_sparse_moe.experts.11.w1", "model.layers.35.block_sparse_moe.experts.12.w1", "model.layers.35.block_sparse_moe.experts.13.w1", "model.layers.35.block_sparse_moe.experts.14.w1", "model.layers.35.block_sparse_moe.experts.15.w1", "model.layers.35.block_sparse_moe.experts.16.w1", "model.layers.35.block_sparse_moe.experts.17.w1", "model.layers.35.block_sparse_moe.experts.18.w1", "model.layers.35.block_sparse_moe.experts.19.w1", "model.layers.35.block_sparse_moe.experts.20.w1", "model.layers.35.block_sparse_moe.experts.21.w1", "model.layers.35.block_sparse_moe.experts.22.w1", "model.layers.35.block_sparse_moe.experts.23.w1", "model.layers.35.block_sparse_moe.experts.24.w1", "model.layers.35.block_sparse_moe.experts.25.w1", "model.layers.35.block_sparse_moe.experts.26.w1", "model.layers.35.block_sparse_moe.experts.27.w1", "model.layers.35.block_sparse_moe.experts.28.w1", "model.layers.35.block_sparse_moe.experts.29.w1", "model.layers.35.block_sparse_moe.experts.30.w1", "model.layers.35.block_sparse_moe.experts.31.w1", "model.layers.35.block_sparse_moe.experts.32.w1", "model.layers.35.block_sparse_moe.experts.33.w1", "model.layers.35.block_sparse_moe.experts.34.w1", "model.layers.35.block_sparse_moe.experts.35.w1", "model.layers.35.block_sparse_moe.experts.36.w1", "model.layers.35.block_sparse_moe.experts.37.w1", "model.layers.35.block_sparse_moe.experts.38.w1", "model.layers.35.block_sparse_moe.experts.39.w1", "model.layers.35.block_sparse_moe.experts.40.w1", "model.layers.35.block_sparse_moe.experts.41.w1", "model.layers.35.block_sparse_moe.experts.42.w1", "model.layers.35.block_sparse_moe.experts.43.w1", "model.layers.35.block_sparse_moe.experts.44.w1", "model.layers.35.block_sparse_moe.experts.45.w1", "model.layers.35.block_sparse_moe.experts.46.w1", "model.layers.35.block_sparse_moe.experts.47.w1", "model.layers.35.block_sparse_moe.experts.48.w1", "model.layers.35.block_sparse_moe.experts.49.w1", "model.layers.35.block_sparse_moe.experts.50.w1", "model.layers.35.block_sparse_moe.experts.51.w1", "model.layers.35.block_sparse_moe.experts.52.w1", "model.layers.35.block_sparse_moe.experts.53.w1", "model.layers.35.block_sparse_moe.experts.54.w1", "model.layers.35.block_sparse_moe.experts.55.w1", "model.layers.35.block_sparse_moe.experts.56.w1", "model.layers.35.block_sparse_moe.experts.57.w1", "model.layers.35.block_sparse_moe.experts.58.w1", "model.layers.35.block_sparse_moe.experts.59.w1", "model.layers.35.block_sparse_moe.experts.60.w1", "model.layers.35.block_sparse_moe.experts.61.w1", "model.layers.35.block_sparse_moe.experts.62.w1", "model.layers.35.block_sparse_moe.experts.63.w1", "model.layers.35.block_sparse_moe.experts.64.w1", "model.layers.35.block_sparse_moe.experts.65.w1", "model.layers.35.block_sparse_moe.experts.66.w1", "model.layers.35.block_sparse_moe.experts.67.w1", "model.layers.35.block_sparse_moe.experts.68.w1", "model.layers.35.block_sparse_moe.experts.69.w1", "model.layers.35.block_sparse_moe.experts.70.w1", "model.layers.35.block_sparse_moe.experts.71.w1", "model.layers.35.block_sparse_moe.experts.72.w1", "model.layers.35.block_sparse_moe.experts.73.w1", "model.layers.35.block_sparse_moe.experts.74.w1", "model.layers.35.block_sparse_moe.experts.75.w1", "model.layers.35.block_sparse_moe.experts.76.w1", "model.layers.35.block_sparse_moe.experts.77.w1", "model.layers.35.block_sparse_moe.experts.78.w1", "model.layers.35.block_sparse_moe.experts.79.w1", "model.layers.35.block_sparse_moe.experts.80.w1", "model.layers.35.block_sparse_moe.experts.81.w1", "model.layers.35.block_sparse_moe.experts.82.w1", "model.layers.35.block_sparse_moe.experts.83.w1", "model.layers.35.block_sparse_moe.experts.84.w1", "model.layers.35.block_sparse_moe.experts.85.w1", "model.layers.35.block_sparse_moe.experts.86.w1", "model.layers.35.block_sparse_moe.experts.87.w1", "model.layers.35.block_sparse_moe.experts.88.w1", "model.layers.35.block_sparse_moe.experts.89.w1", "model.layers.35.block_sparse_moe.experts.90.w1", "model.layers.35.block_sparse_moe.experts.91.w1", "model.layers.35.block_sparse_moe.experts.92.w1", "model.layers.35.block_sparse_moe.experts.93.w1", "model.layers.35.block_sparse_moe.experts.94.w1", "model.layers.35.block_sparse_moe.experts.95.w1", "model.layers.35.block_sparse_moe.experts.96.w1", "model.layers.35.block_sparse_moe.experts.97.w1", "model.layers.35.block_sparse_moe.experts.98.w1", "model.layers.35.block_sparse_moe.experts.99.w1", "model.layers.35.block_sparse_moe.experts.100.w1", "model.layers.35.block_sparse_moe.experts.101.w1", "model.layers.35.block_sparse_moe.experts.102.w1", "model.layers.35.block_sparse_moe.experts.103.w1", "model.layers.35.block_sparse_moe.experts.104.w1", "model.layers.35.block_sparse_moe.experts.105.w1", "model.layers.35.block_sparse_moe.experts.106.w1", "model.layers.35.block_sparse_moe.experts.107.w1", "model.layers.35.block_sparse_moe.experts.108.w1", "model.layers.35.block_sparse_moe.experts.109.w1", "model.layers.35.block_sparse_moe.experts.110.w1", "model.layers.35.block_sparse_moe.experts.111.w1", "model.layers.35.block_sparse_moe.experts.112.w1", "model.layers.35.block_sparse_moe.experts.113.w1", "model.layers.35.block_sparse_moe.experts.114.w1", "model.layers.35.block_sparse_moe.experts.115.w1", "model.layers.35.block_sparse_moe.experts.116.w1", "model.layers.35.block_sparse_moe.experts.117.w1", "model.layers.35.block_sparse_moe.experts.118.w1", "model.layers.35.block_sparse_moe.experts.119.w1", "model.layers.35.block_sparse_moe.experts.120.w1", "model.layers.35.block_sparse_moe.experts.121.w1", "model.layers.35.block_sparse_moe.experts.122.w1", "model.layers.35.block_sparse_moe.experts.123.w1", "model.layers.35.block_sparse_moe.experts.124.w1", "model.layers.35.block_sparse_moe.experts.125.w1", "model.layers.35.block_sparse_moe.experts.126.w1", "model.layers.35.block_sparse_moe.experts.127.w1", "model.layers.35.block_sparse_moe.experts.128.w1", "model.layers.35.block_sparse_moe.experts.129.w1", "model.layers.35.block_sparse_moe.experts.130.w1", "model.layers.35.block_sparse_moe.experts.131.w1", "model.layers.35.block_sparse_moe.experts.132.w1", "model.layers.35.block_sparse_moe.experts.133.w1", "model.layers.35.block_sparse_moe.experts.134.w1", "model.layers.35.block_sparse_moe.experts.135.w1", "model.layers.35.block_sparse_moe.experts.136.w1", "model.layers.35.block_sparse_moe.experts.137.w1", "model.layers.35.block_sparse_moe.experts.138.w1", "model.layers.35.block_sparse_moe.experts.139.w1", "model.layers.35.block_sparse_moe.experts.140.w1", "model.layers.35.block_sparse_moe.experts.141.w1", "model.layers.35.block_sparse_moe.experts.142.w1", "model.layers.35.block_sparse_moe.experts.143.w1", "model.layers.35.block_sparse_moe.experts.144.w1", "model.layers.35.block_sparse_moe.experts.145.w1", "model.layers.35.block_sparse_moe.experts.146.w1", "model.layers.35.block_sparse_moe.experts.147.w1", "model.layers.35.block_sparse_moe.experts.148.w1", "model.layers.35.block_sparse_moe.experts.149.w1", "model.layers.35.block_sparse_moe.experts.150.w1", "model.layers.35.block_sparse_moe.experts.151.w1", "model.layers.35.block_sparse_moe.experts.152.w1", "model.layers.35.block_sparse_moe.experts.153.w1", "model.layers.35.block_sparse_moe.experts.154.w1", "model.layers.35.block_sparse_moe.experts.155.w1", "model.layers.35.block_sparse_moe.experts.156.w1", "model.layers.35.block_sparse_moe.experts.157.w1", "model.layers.35.block_sparse_moe.experts.158.w1", "model.layers.35.block_sparse_moe.experts.159.w1", "model.layers.35.block_sparse_moe.experts.160.w1", "model.layers.35.block_sparse_moe.experts.161.w1", "model.layers.35.block_sparse_moe.experts.162.w1", "model.layers.35.block_sparse_moe.experts.163.w1", "model.layers.35.block_sparse_moe.experts.164.w1", "model.layers.35.block_sparse_moe.experts.165.w1", "model.layers.35.block_sparse_moe.experts.166.w1", "model.layers.35.block_sparse_moe.experts.167.w1", "model.layers.35.block_sparse_moe.experts.168.w1", "model.layers.35.block_sparse_moe.experts.169.w1", "model.layers.35.block_sparse_moe.experts.170.w1", "model.layers.35.block_sparse_moe.experts.171.w1", "model.layers.35.block_sparse_moe.experts.172.w1", "model.layers.35.block_sparse_moe.experts.173.w1", "model.layers.35.block_sparse_moe.experts.174.w1", "model.layers.35.block_sparse_moe.experts.175.w1", "model.layers.35.block_sparse_moe.experts.176.w1", "model.layers.35.block_sparse_moe.experts.177.w1", "model.layers.35.block_sparse_moe.experts.178.w1", "model.layers.35.block_sparse_moe.experts.179.w1", "model.layers.35.block_sparse_moe.experts.180.w1", "model.layers.35.block_sparse_moe.experts.181.w1", "model.layers.35.block_sparse_moe.experts.182.w1", "model.layers.35.block_sparse_moe.experts.183.w1", "model.layers.35.block_sparse_moe.experts.184.w1", "model.layers.35.block_sparse_moe.experts.185.w1", "model.layers.35.block_sparse_moe.experts.186.w1", "model.layers.35.block_sparse_moe.experts.187.w1", "model.layers.35.block_sparse_moe.experts.188.w1", "model.layers.35.block_sparse_moe.experts.189.w1", "model.layers.35.block_sparse_moe.experts.190.w1", "model.layers.35.block_sparse_moe.experts.191.w1", "model.layers.35.block_sparse_moe.experts.192.w1", "model.layers.35.block_sparse_moe.experts.193.w1", "model.layers.35.block_sparse_moe.experts.194.w1", "model.layers.35.block_sparse_moe.experts.195.w1", "model.layers.35.block_sparse_moe.experts.196.w1", "model.layers.35.block_sparse_moe.experts.197.w1", "model.layers.35.block_sparse_moe.experts.198.w1", "model.layers.35.block_sparse_moe.experts.199.w1", "model.layers.35.block_sparse_moe.experts.200.w1", "model.layers.35.block_sparse_moe.experts.201.w1", "model.layers.35.block_sparse_moe.experts.202.w1", "model.layers.35.block_sparse_moe.experts.203.w1", "model.layers.35.block_sparse_moe.experts.204.w1", "model.layers.35.block_sparse_moe.experts.205.w1", "model.layers.35.block_sparse_moe.experts.206.w1", "model.layers.35.block_sparse_moe.experts.207.w1", "model.layers.35.block_sparse_moe.experts.208.w1", "model.layers.35.block_sparse_moe.experts.209.w1", "model.layers.35.block_sparse_moe.experts.210.w1", "model.layers.35.block_sparse_moe.experts.211.w1", "model.layers.35.block_sparse_moe.experts.212.w1", "model.layers.35.block_sparse_moe.experts.213.w1", "model.layers.35.block_sparse_moe.experts.214.w1", "model.layers.35.block_sparse_moe.experts.215.w1", "model.layers.35.block_sparse_moe.experts.216.w1", "model.layers.35.block_sparse_moe.experts.217.w1", "model.layers.35.block_sparse_moe.experts.218.w1", "model.layers.35.block_sparse_moe.experts.219.w1", "model.layers.35.block_sparse_moe.experts.220.w1", "model.layers.35.block_sparse_moe.experts.221.w1", "model.layers.35.block_sparse_moe.experts.222.w1", "model.layers.35.block_sparse_moe.experts.223.w1", "model.layers.35.block_sparse_moe.experts.224.w1", "model.layers.35.block_sparse_moe.experts.225.w1", "model.layers.35.block_sparse_moe.experts.226.w1", "model.layers.35.block_sparse_moe.experts.227.w1", "model.layers.35.block_sparse_moe.experts.228.w1", "model.layers.35.block_sparse_moe.experts.229.w1", "model.layers.35.block_sparse_moe.experts.230.w1", "model.layers.35.block_sparse_moe.experts.231.w1", "model.layers.35.block_sparse_moe.experts.232.w1", "model.layers.35.block_sparse_moe.experts.233.w1", "model.layers.35.block_sparse_moe.experts.234.w1", "model.layers.35.block_sparse_moe.experts.235.w1", "model.layers.35.block_sparse_moe.experts.236.w1", "model.layers.35.block_sparse_moe.experts.237.w1", "model.layers.35.block_sparse_moe.experts.238.w1", "model.layers.35.block_sparse_moe.experts.239.w1", "model.layers.35.block_sparse_moe.experts.240.w1", "model.layers.35.block_sparse_moe.experts.241.w1", "model.layers.35.block_sparse_moe.experts.242.w1", "model.layers.35.block_sparse_moe.experts.243.w1", "model.layers.35.block_sparse_moe.experts.244.w1", "model.layers.35.block_sparse_moe.experts.245.w1", "model.layers.35.block_sparse_moe.experts.246.w1", "model.layers.35.block_sparse_moe.experts.247.w1", "model.layers.35.block_sparse_moe.experts.248.w1", "model.layers.35.block_sparse_moe.experts.249.w1", "model.layers.35.block_sparse_moe.experts.250.w1", "model.layers.35.block_sparse_moe.experts.251.w1", "model.layers.35.block_sparse_moe.experts.252.w1", "model.layers.35.block_sparse_moe.experts.253.w1", "model.layers.35.block_sparse_moe.experts.254.w1", "model.layers.35.block_sparse_moe.experts.255.w1", "model.layers.35.block_sparse_moe.experts.0.w3", "model.layers.35.block_sparse_moe.experts.1.w3", "model.layers.35.block_sparse_moe.experts.2.w3", "model.layers.35.block_sparse_moe.experts.3.w3", "model.layers.35.block_sparse_moe.experts.4.w3", "model.layers.35.block_sparse_moe.experts.5.w3", "model.layers.35.block_sparse_moe.experts.6.w3", "model.layers.35.block_sparse_moe.experts.7.w3", "model.layers.35.block_sparse_moe.experts.8.w3", "model.layers.35.block_sparse_moe.experts.9.w3", "model.layers.35.block_sparse_moe.experts.10.w3", "model.layers.35.block_sparse_moe.experts.11.w3", "model.layers.35.block_sparse_moe.experts.12.w3", "model.layers.35.block_sparse_moe.experts.13.w3", "model.layers.35.block_sparse_moe.experts.14.w3", "model.layers.35.block_sparse_moe.experts.15.w3", "model.layers.35.block_sparse_moe.experts.16.w3", "model.layers.35.block_sparse_moe.experts.17.w3", "model.layers.35.block_sparse_moe.experts.18.w3", "model.layers.35.block_sparse_moe.experts.19.w3", "model.layers.35.block_sparse_moe.experts.20.w3", "model.layers.35.block_sparse_moe.experts.21.w3", "model.layers.35.block_sparse_moe.experts.22.w3", "model.layers.35.block_sparse_moe.experts.23.w3", "model.layers.35.block_sparse_moe.experts.24.w3", "model.layers.35.block_sparse_moe.experts.25.w3", "model.layers.35.block_sparse_moe.experts.26.w3", "model.layers.35.block_sparse_moe.experts.27.w3", "model.layers.35.block_sparse_moe.experts.28.w3", "model.layers.35.block_sparse_moe.experts.29.w3", "model.layers.35.block_sparse_moe.experts.30.w3", "model.layers.35.block_sparse_moe.experts.31.w3", "model.layers.35.block_sparse_moe.experts.32.w3", "model.layers.35.block_sparse_moe.experts.33.w3", "model.layers.35.block_sparse_moe.experts.34.w3", "model.layers.35.block_sparse_moe.experts.35.w3", "model.layers.35.block_sparse_moe.experts.36.w3", "model.layers.35.block_sparse_moe.experts.37.w3", "model.layers.35.block_sparse_moe.experts.38.w3", "model.layers.35.block_sparse_moe.experts.39.w3", "model.layers.35.block_sparse_moe.experts.40.w3", "model.layers.35.block_sparse_moe.experts.41.w3", "model.layers.35.block_sparse_moe.experts.42.w3", "model.layers.35.block_sparse_moe.experts.43.w3", "model.layers.35.block_sparse_moe.experts.44.w3", "model.layers.35.block_sparse_moe.experts.45.w3", "model.layers.35.block_sparse_moe.experts.46.w3", "model.layers.35.block_sparse_moe.experts.47.w3", "model.layers.35.block_sparse_moe.experts.48.w3", "model.layers.35.block_sparse_moe.experts.49.w3", "model.layers.35.block_sparse_moe.experts.50.w3", "model.layers.35.block_sparse_moe.experts.51.w3", "model.layers.35.block_sparse_moe.experts.52.w3", "model.layers.35.block_sparse_moe.experts.53.w3", "model.layers.35.block_sparse_moe.experts.54.w3", "model.layers.35.block_sparse_moe.experts.55.w3", "model.layers.35.block_sparse_moe.experts.56.w3", "model.layers.35.block_sparse_moe.experts.57.w3", "model.layers.35.block_sparse_moe.experts.58.w3", "model.layers.35.block_sparse_moe.experts.59.w3", "model.layers.35.block_sparse_moe.experts.60.w3", "model.layers.35.block_sparse_moe.experts.61.w3", "model.layers.35.block_sparse_moe.experts.62.w3", "model.layers.35.block_sparse_moe.experts.63.w3", "model.layers.35.block_sparse_moe.experts.64.w3", "model.layers.35.block_sparse_moe.experts.65.w3", "model.layers.35.block_sparse_moe.experts.66.w3", "model.layers.35.block_sparse_moe.experts.67.w3", "model.layers.35.block_sparse_moe.experts.68.w3", "model.layers.35.block_sparse_moe.experts.69.w3", "model.layers.35.block_sparse_moe.experts.70.w3", "model.layers.35.block_sparse_moe.experts.71.w3", "model.layers.35.block_sparse_moe.experts.72.w3", "model.layers.35.block_sparse_moe.experts.73.w3", "model.layers.35.block_sparse_moe.experts.74.w3", "model.layers.35.block_sparse_moe.experts.75.w3", "model.layers.35.block_sparse_moe.experts.76.w3", "model.layers.35.block_sparse_moe.experts.77.w3", "model.layers.35.block_sparse_moe.experts.78.w3", "model.layers.35.block_sparse_moe.experts.79.w3", "model.layers.35.block_sparse_moe.experts.80.w3", "model.layers.35.block_sparse_moe.experts.81.w3", "model.layers.35.block_sparse_moe.experts.82.w3", "model.layers.35.block_sparse_moe.experts.83.w3", "model.layers.35.block_sparse_moe.experts.84.w3", "model.layers.35.block_sparse_moe.experts.85.w3", "model.layers.35.block_sparse_moe.experts.86.w3", "model.layers.35.block_sparse_moe.experts.87.w3", "model.layers.35.block_sparse_moe.experts.88.w3", "model.layers.35.block_sparse_moe.experts.89.w3", "model.layers.35.block_sparse_moe.experts.90.w3", "model.layers.35.block_sparse_moe.experts.91.w3", "model.layers.35.block_sparse_moe.experts.92.w3", "model.layers.35.block_sparse_moe.experts.93.w3", "model.layers.35.block_sparse_moe.experts.94.w3", "model.layers.35.block_sparse_moe.experts.95.w3", "model.layers.35.block_sparse_moe.experts.96.w3", "model.layers.35.block_sparse_moe.experts.97.w3", "model.layers.35.block_sparse_moe.experts.98.w3", "model.layers.35.block_sparse_moe.experts.99.w3", "model.layers.35.block_sparse_moe.experts.100.w3", "model.layers.35.block_sparse_moe.experts.101.w3", "model.layers.35.block_sparse_moe.experts.102.w3", "model.layers.35.block_sparse_moe.experts.103.w3", "model.layers.35.block_sparse_moe.experts.104.w3", "model.layers.35.block_sparse_moe.experts.105.w3", "model.layers.35.block_sparse_moe.experts.106.w3", "model.layers.35.block_sparse_moe.experts.107.w3", "model.layers.35.block_sparse_moe.experts.108.w3", "model.layers.35.block_sparse_moe.experts.109.w3", "model.layers.35.block_sparse_moe.experts.110.w3", "model.layers.35.block_sparse_moe.experts.111.w3", "model.layers.35.block_sparse_moe.experts.112.w3", "model.layers.35.block_sparse_moe.experts.113.w3", "model.layers.35.block_sparse_moe.experts.114.w3", "model.layers.35.block_sparse_moe.experts.115.w3", "model.layers.35.block_sparse_moe.experts.116.w3", "model.layers.35.block_sparse_moe.experts.117.w3", "model.layers.35.block_sparse_moe.experts.118.w3", "model.layers.35.block_sparse_moe.experts.119.w3", "model.layers.35.block_sparse_moe.experts.120.w3", "model.layers.35.block_sparse_moe.experts.121.w3", "model.layers.35.block_sparse_moe.experts.122.w3", "model.layers.35.block_sparse_moe.experts.123.w3", "model.layers.35.block_sparse_moe.experts.124.w3", "model.layers.35.block_sparse_moe.experts.125.w3", "model.layers.35.block_sparse_moe.experts.126.w3", "model.layers.35.block_sparse_moe.experts.127.w3", "model.layers.35.block_sparse_moe.experts.128.w3", "model.layers.35.block_sparse_moe.experts.129.w3", "model.layers.35.block_sparse_moe.experts.130.w3", "model.layers.35.block_sparse_moe.experts.131.w3", "model.layers.35.block_sparse_moe.experts.132.w3", "model.layers.35.block_sparse_moe.experts.133.w3", "model.layers.35.block_sparse_moe.experts.134.w3", "model.layers.35.block_sparse_moe.experts.135.w3", "model.layers.35.block_sparse_moe.experts.136.w3", "model.layers.35.block_sparse_moe.experts.137.w3", "model.layers.35.block_sparse_moe.experts.138.w3", "model.layers.35.block_sparse_moe.experts.139.w3", "model.layers.35.block_sparse_moe.experts.140.w3", "model.layers.35.block_sparse_moe.experts.141.w3", "model.layers.35.block_sparse_moe.experts.142.w3", "model.layers.35.block_sparse_moe.experts.143.w3", "model.layers.35.block_sparse_moe.experts.144.w3", "model.layers.35.block_sparse_moe.experts.145.w3", "model.layers.35.block_sparse_moe.experts.146.w3", "model.layers.35.block_sparse_moe.experts.147.w3", "model.layers.35.block_sparse_moe.experts.148.w3", "model.layers.35.block_sparse_moe.experts.149.w3", "model.layers.35.block_sparse_moe.experts.150.w3", "model.layers.35.block_sparse_moe.experts.151.w3", "model.layers.35.block_sparse_moe.experts.152.w3", "model.layers.35.block_sparse_moe.experts.153.w3", "model.layers.35.block_sparse_moe.experts.154.w3", "model.layers.35.block_sparse_moe.experts.155.w3", "model.layers.35.block_sparse_moe.experts.156.w3", "model.layers.35.block_sparse_moe.experts.157.w3", "model.layers.35.block_sparse_moe.experts.158.w3", "model.layers.35.block_sparse_moe.experts.159.w3", "model.layers.35.block_sparse_moe.experts.160.w3", "model.layers.35.block_sparse_moe.experts.161.w3", "model.layers.35.block_sparse_moe.experts.162.w3", "model.layers.35.block_sparse_moe.experts.163.w3", "model.layers.35.block_sparse_moe.experts.164.w3", "model.layers.35.block_sparse_moe.experts.165.w3", "model.layers.35.block_sparse_moe.experts.166.w3", "model.layers.35.block_sparse_moe.experts.167.w3", "model.layers.35.block_sparse_moe.experts.168.w3", "model.layers.35.block_sparse_moe.experts.169.w3", "model.layers.35.block_sparse_moe.experts.170.w3", "model.layers.35.block_sparse_moe.experts.171.w3", "model.layers.35.block_sparse_moe.experts.172.w3", "model.layers.35.block_sparse_moe.experts.173.w3", "model.layers.35.block_sparse_moe.experts.174.w3", "model.layers.35.block_sparse_moe.experts.175.w3", "model.layers.35.block_sparse_moe.experts.176.w3", "model.layers.35.block_sparse_moe.experts.177.w3", "model.layers.35.block_sparse_moe.experts.178.w3", "model.layers.35.block_sparse_moe.experts.179.w3", "model.layers.35.block_sparse_moe.experts.180.w3", "model.layers.35.block_sparse_moe.experts.181.w3", "model.layers.35.block_sparse_moe.experts.182.w3", "model.layers.35.block_sparse_moe.experts.183.w3", "model.layers.35.block_sparse_moe.experts.184.w3", "model.layers.35.block_sparse_moe.experts.185.w3", "model.layers.35.block_sparse_moe.experts.186.w3", "model.layers.35.block_sparse_moe.experts.187.w3", "model.layers.35.block_sparse_moe.experts.188.w3", "model.layers.35.block_sparse_moe.experts.189.w3", "model.layers.35.block_sparse_moe.experts.190.w3", "model.layers.35.block_sparse_moe.experts.191.w3", "model.layers.35.block_sparse_moe.experts.192.w3", "model.layers.35.block_sparse_moe.experts.193.w3", "model.layers.35.block_sparse_moe.experts.194.w3", "model.layers.35.block_sparse_moe.experts.195.w3", "model.layers.35.block_sparse_moe.experts.196.w3", "model.layers.35.block_sparse_moe.experts.197.w3", "model.layers.35.block_sparse_moe.experts.198.w3", "model.layers.35.block_sparse_moe.experts.199.w3", "model.layers.35.block_sparse_moe.experts.200.w3", "model.layers.35.block_sparse_moe.experts.201.w3", "model.layers.35.block_sparse_moe.experts.202.w3", "model.layers.35.block_sparse_moe.experts.203.w3", "model.layers.35.block_sparse_moe.experts.204.w3", "model.layers.35.block_sparse_moe.experts.205.w3", "model.layers.35.block_sparse_moe.experts.206.w3", "model.layers.35.block_sparse_moe.experts.207.w3", "model.layers.35.block_sparse_moe.experts.208.w3", "model.layers.35.block_sparse_moe.experts.209.w3", "model.layers.35.block_sparse_moe.experts.210.w3", "model.layers.35.block_sparse_moe.experts.211.w3", "model.layers.35.block_sparse_moe.experts.212.w3", "model.layers.35.block_sparse_moe.experts.213.w3", "model.layers.35.block_sparse_moe.experts.214.w3", "model.layers.35.block_sparse_moe.experts.215.w3", "model.layers.35.block_sparse_moe.experts.216.w3", "model.layers.35.block_sparse_moe.experts.217.w3", "model.layers.35.block_sparse_moe.experts.218.w3", "model.layers.35.block_sparse_moe.experts.219.w3", "model.layers.35.block_sparse_moe.experts.220.w3", "model.layers.35.block_sparse_moe.experts.221.w3", "model.layers.35.block_sparse_moe.experts.222.w3", "model.layers.35.block_sparse_moe.experts.223.w3", "model.layers.35.block_sparse_moe.experts.224.w3", "model.layers.35.block_sparse_moe.experts.225.w3", "model.layers.35.block_sparse_moe.experts.226.w3", "model.layers.35.block_sparse_moe.experts.227.w3", "model.layers.35.block_sparse_moe.experts.228.w3", "model.layers.35.block_sparse_moe.experts.229.w3", "model.layers.35.block_sparse_moe.experts.230.w3", "model.layers.35.block_sparse_moe.experts.231.w3", "model.layers.35.block_sparse_moe.experts.232.w3", "model.layers.35.block_sparse_moe.experts.233.w3", "model.layers.35.block_sparse_moe.experts.234.w3", "model.layers.35.block_sparse_moe.experts.235.w3", "model.layers.35.block_sparse_moe.experts.236.w3", "model.layers.35.block_sparse_moe.experts.237.w3", "model.layers.35.block_sparse_moe.experts.238.w3", "model.layers.35.block_sparse_moe.experts.239.w3", "model.layers.35.block_sparse_moe.experts.240.w3", "model.layers.35.block_sparse_moe.experts.241.w3", "model.layers.35.block_sparse_moe.experts.242.w3", "model.layers.35.block_sparse_moe.experts.243.w3", "model.layers.35.block_sparse_moe.experts.244.w3", "model.layers.35.block_sparse_moe.experts.245.w3", "model.layers.35.block_sparse_moe.experts.246.w3", "model.layers.35.block_sparse_moe.experts.247.w3", "model.layers.35.block_sparse_moe.experts.248.w3", "model.layers.35.block_sparse_moe.experts.249.w3", "model.layers.35.block_sparse_moe.experts.250.w3", "model.layers.35.block_sparse_moe.experts.251.w3", "model.layers.35.block_sparse_moe.experts.252.w3", "model.layers.35.block_sparse_moe.experts.253.w3", "model.layers.35.block_sparse_moe.experts.254.w3", "model.layers.35.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.000919315218925465, "dbits": 2415919104 } ] }, { "idx": 179, "layers": [ "model.layers.35.block_sparse_moe.experts.0.w2", "model.layers.35.block_sparse_moe.experts.1.w2", "model.layers.35.block_sparse_moe.experts.2.w2", "model.layers.35.block_sparse_moe.experts.3.w2", "model.layers.35.block_sparse_moe.experts.4.w2", "model.layers.35.block_sparse_moe.experts.5.w2", "model.layers.35.block_sparse_moe.experts.6.w2", "model.layers.35.block_sparse_moe.experts.7.w2", "model.layers.35.block_sparse_moe.experts.8.w2", "model.layers.35.block_sparse_moe.experts.9.w2", "model.layers.35.block_sparse_moe.experts.10.w2", "model.layers.35.block_sparse_moe.experts.11.w2", "model.layers.35.block_sparse_moe.experts.12.w2", "model.layers.35.block_sparse_moe.experts.13.w2", "model.layers.35.block_sparse_moe.experts.14.w2", "model.layers.35.block_sparse_moe.experts.15.w2", "model.layers.35.block_sparse_moe.experts.16.w2", "model.layers.35.block_sparse_moe.experts.17.w2", "model.layers.35.block_sparse_moe.experts.18.w2", "model.layers.35.block_sparse_moe.experts.19.w2", "model.layers.35.block_sparse_moe.experts.20.w2", "model.layers.35.block_sparse_moe.experts.21.w2", "model.layers.35.block_sparse_moe.experts.22.w2", "model.layers.35.block_sparse_moe.experts.23.w2", "model.layers.35.block_sparse_moe.experts.24.w2", "model.layers.35.block_sparse_moe.experts.25.w2", "model.layers.35.block_sparse_moe.experts.26.w2", "model.layers.35.block_sparse_moe.experts.27.w2", "model.layers.35.block_sparse_moe.experts.28.w2", "model.layers.35.block_sparse_moe.experts.29.w2", "model.layers.35.block_sparse_moe.experts.30.w2", "model.layers.35.block_sparse_moe.experts.31.w2", "model.layers.35.block_sparse_moe.experts.32.w2", "model.layers.35.block_sparse_moe.experts.33.w2", "model.layers.35.block_sparse_moe.experts.34.w2", "model.layers.35.block_sparse_moe.experts.35.w2", "model.layers.35.block_sparse_moe.experts.36.w2", "model.layers.35.block_sparse_moe.experts.37.w2", "model.layers.35.block_sparse_moe.experts.38.w2", "model.layers.35.block_sparse_moe.experts.39.w2", "model.layers.35.block_sparse_moe.experts.40.w2", "model.layers.35.block_sparse_moe.experts.41.w2", "model.layers.35.block_sparse_moe.experts.42.w2", "model.layers.35.block_sparse_moe.experts.43.w2", "model.layers.35.block_sparse_moe.experts.44.w2", "model.layers.35.block_sparse_moe.experts.45.w2", "model.layers.35.block_sparse_moe.experts.46.w2", "model.layers.35.block_sparse_moe.experts.47.w2", "model.layers.35.block_sparse_moe.experts.48.w2", "model.layers.35.block_sparse_moe.experts.49.w2", "model.layers.35.block_sparse_moe.experts.50.w2", "model.layers.35.block_sparse_moe.experts.51.w2", "model.layers.35.block_sparse_moe.experts.52.w2", "model.layers.35.block_sparse_moe.experts.53.w2", "model.layers.35.block_sparse_moe.experts.54.w2", "model.layers.35.block_sparse_moe.experts.55.w2", "model.layers.35.block_sparse_moe.experts.56.w2", "model.layers.35.block_sparse_moe.experts.57.w2", "model.layers.35.block_sparse_moe.experts.58.w2", "model.layers.35.block_sparse_moe.experts.59.w2", "model.layers.35.block_sparse_moe.experts.60.w2", "model.layers.35.block_sparse_moe.experts.61.w2", "model.layers.35.block_sparse_moe.experts.62.w2", "model.layers.35.block_sparse_moe.experts.63.w2", "model.layers.35.block_sparse_moe.experts.64.w2", "model.layers.35.block_sparse_moe.experts.65.w2", "model.layers.35.block_sparse_moe.experts.66.w2", "model.layers.35.block_sparse_moe.experts.67.w2", "model.layers.35.block_sparse_moe.experts.68.w2", "model.layers.35.block_sparse_moe.experts.69.w2", "model.layers.35.block_sparse_moe.experts.70.w2", "model.layers.35.block_sparse_moe.experts.71.w2", "model.layers.35.block_sparse_moe.experts.72.w2", "model.layers.35.block_sparse_moe.experts.73.w2", "model.layers.35.block_sparse_moe.experts.74.w2", "model.layers.35.block_sparse_moe.experts.75.w2", "model.layers.35.block_sparse_moe.experts.76.w2", "model.layers.35.block_sparse_moe.experts.77.w2", "model.layers.35.block_sparse_moe.experts.78.w2", "model.layers.35.block_sparse_moe.experts.79.w2", "model.layers.35.block_sparse_moe.experts.80.w2", "model.layers.35.block_sparse_moe.experts.81.w2", "model.layers.35.block_sparse_moe.experts.82.w2", "model.layers.35.block_sparse_moe.experts.83.w2", "model.layers.35.block_sparse_moe.experts.84.w2", "model.layers.35.block_sparse_moe.experts.85.w2", "model.layers.35.block_sparse_moe.experts.86.w2", "model.layers.35.block_sparse_moe.experts.87.w2", "model.layers.35.block_sparse_moe.experts.88.w2", "model.layers.35.block_sparse_moe.experts.89.w2", "model.layers.35.block_sparse_moe.experts.90.w2", "model.layers.35.block_sparse_moe.experts.91.w2", "model.layers.35.block_sparse_moe.experts.92.w2", "model.layers.35.block_sparse_moe.experts.93.w2", "model.layers.35.block_sparse_moe.experts.94.w2", "model.layers.35.block_sparse_moe.experts.95.w2", "model.layers.35.block_sparse_moe.experts.96.w2", "model.layers.35.block_sparse_moe.experts.97.w2", "model.layers.35.block_sparse_moe.experts.98.w2", "model.layers.35.block_sparse_moe.experts.99.w2", "model.layers.35.block_sparse_moe.experts.100.w2", "model.layers.35.block_sparse_moe.experts.101.w2", "model.layers.35.block_sparse_moe.experts.102.w2", "model.layers.35.block_sparse_moe.experts.103.w2", "model.layers.35.block_sparse_moe.experts.104.w2", "model.layers.35.block_sparse_moe.experts.105.w2", "model.layers.35.block_sparse_moe.experts.106.w2", "model.layers.35.block_sparse_moe.experts.107.w2", "model.layers.35.block_sparse_moe.experts.108.w2", "model.layers.35.block_sparse_moe.experts.109.w2", "model.layers.35.block_sparse_moe.experts.110.w2", "model.layers.35.block_sparse_moe.experts.111.w2", "model.layers.35.block_sparse_moe.experts.112.w2", "model.layers.35.block_sparse_moe.experts.113.w2", "model.layers.35.block_sparse_moe.experts.114.w2", "model.layers.35.block_sparse_moe.experts.115.w2", "model.layers.35.block_sparse_moe.experts.116.w2", "model.layers.35.block_sparse_moe.experts.117.w2", "model.layers.35.block_sparse_moe.experts.118.w2", "model.layers.35.block_sparse_moe.experts.119.w2", "model.layers.35.block_sparse_moe.experts.120.w2", "model.layers.35.block_sparse_moe.experts.121.w2", "model.layers.35.block_sparse_moe.experts.122.w2", "model.layers.35.block_sparse_moe.experts.123.w2", "model.layers.35.block_sparse_moe.experts.124.w2", "model.layers.35.block_sparse_moe.experts.125.w2", "model.layers.35.block_sparse_moe.experts.126.w2", "model.layers.35.block_sparse_moe.experts.127.w2", "model.layers.35.block_sparse_moe.experts.128.w2", "model.layers.35.block_sparse_moe.experts.129.w2", "model.layers.35.block_sparse_moe.experts.130.w2", "model.layers.35.block_sparse_moe.experts.131.w2", "model.layers.35.block_sparse_moe.experts.132.w2", "model.layers.35.block_sparse_moe.experts.133.w2", "model.layers.35.block_sparse_moe.experts.134.w2", "model.layers.35.block_sparse_moe.experts.135.w2", "model.layers.35.block_sparse_moe.experts.136.w2", "model.layers.35.block_sparse_moe.experts.137.w2", "model.layers.35.block_sparse_moe.experts.138.w2", "model.layers.35.block_sparse_moe.experts.139.w2", "model.layers.35.block_sparse_moe.experts.140.w2", "model.layers.35.block_sparse_moe.experts.141.w2", "model.layers.35.block_sparse_moe.experts.142.w2", "model.layers.35.block_sparse_moe.experts.143.w2", "model.layers.35.block_sparse_moe.experts.144.w2", "model.layers.35.block_sparse_moe.experts.145.w2", "model.layers.35.block_sparse_moe.experts.146.w2", "model.layers.35.block_sparse_moe.experts.147.w2", "model.layers.35.block_sparse_moe.experts.148.w2", "model.layers.35.block_sparse_moe.experts.149.w2", "model.layers.35.block_sparse_moe.experts.150.w2", "model.layers.35.block_sparse_moe.experts.151.w2", "model.layers.35.block_sparse_moe.experts.152.w2", "model.layers.35.block_sparse_moe.experts.153.w2", "model.layers.35.block_sparse_moe.experts.154.w2", "model.layers.35.block_sparse_moe.experts.155.w2", "model.layers.35.block_sparse_moe.experts.156.w2", "model.layers.35.block_sparse_moe.experts.157.w2", "model.layers.35.block_sparse_moe.experts.158.w2", "model.layers.35.block_sparse_moe.experts.159.w2", "model.layers.35.block_sparse_moe.experts.160.w2", "model.layers.35.block_sparse_moe.experts.161.w2", "model.layers.35.block_sparse_moe.experts.162.w2", "model.layers.35.block_sparse_moe.experts.163.w2", "model.layers.35.block_sparse_moe.experts.164.w2", "model.layers.35.block_sparse_moe.experts.165.w2", "model.layers.35.block_sparse_moe.experts.166.w2", "model.layers.35.block_sparse_moe.experts.167.w2", "model.layers.35.block_sparse_moe.experts.168.w2", "model.layers.35.block_sparse_moe.experts.169.w2", "model.layers.35.block_sparse_moe.experts.170.w2", "model.layers.35.block_sparse_moe.experts.171.w2", "model.layers.35.block_sparse_moe.experts.172.w2", "model.layers.35.block_sparse_moe.experts.173.w2", "model.layers.35.block_sparse_moe.experts.174.w2", "model.layers.35.block_sparse_moe.experts.175.w2", "model.layers.35.block_sparse_moe.experts.176.w2", "model.layers.35.block_sparse_moe.experts.177.w2", "model.layers.35.block_sparse_moe.experts.178.w2", "model.layers.35.block_sparse_moe.experts.179.w2", "model.layers.35.block_sparse_moe.experts.180.w2", "model.layers.35.block_sparse_moe.experts.181.w2", "model.layers.35.block_sparse_moe.experts.182.w2", "model.layers.35.block_sparse_moe.experts.183.w2", "model.layers.35.block_sparse_moe.experts.184.w2", "model.layers.35.block_sparse_moe.experts.185.w2", "model.layers.35.block_sparse_moe.experts.186.w2", "model.layers.35.block_sparse_moe.experts.187.w2", "model.layers.35.block_sparse_moe.experts.188.w2", "model.layers.35.block_sparse_moe.experts.189.w2", "model.layers.35.block_sparse_moe.experts.190.w2", "model.layers.35.block_sparse_moe.experts.191.w2", "model.layers.35.block_sparse_moe.experts.192.w2", "model.layers.35.block_sparse_moe.experts.193.w2", "model.layers.35.block_sparse_moe.experts.194.w2", "model.layers.35.block_sparse_moe.experts.195.w2", "model.layers.35.block_sparse_moe.experts.196.w2", "model.layers.35.block_sparse_moe.experts.197.w2", "model.layers.35.block_sparse_moe.experts.198.w2", "model.layers.35.block_sparse_moe.experts.199.w2", "model.layers.35.block_sparse_moe.experts.200.w2", "model.layers.35.block_sparse_moe.experts.201.w2", "model.layers.35.block_sparse_moe.experts.202.w2", "model.layers.35.block_sparse_moe.experts.203.w2", "model.layers.35.block_sparse_moe.experts.204.w2", "model.layers.35.block_sparse_moe.experts.205.w2", "model.layers.35.block_sparse_moe.experts.206.w2", "model.layers.35.block_sparse_moe.experts.207.w2", "model.layers.35.block_sparse_moe.experts.208.w2", "model.layers.35.block_sparse_moe.experts.209.w2", "model.layers.35.block_sparse_moe.experts.210.w2", "model.layers.35.block_sparse_moe.experts.211.w2", "model.layers.35.block_sparse_moe.experts.212.w2", "model.layers.35.block_sparse_moe.experts.213.w2", "model.layers.35.block_sparse_moe.experts.214.w2", "model.layers.35.block_sparse_moe.experts.215.w2", "model.layers.35.block_sparse_moe.experts.216.w2", "model.layers.35.block_sparse_moe.experts.217.w2", "model.layers.35.block_sparse_moe.experts.218.w2", "model.layers.35.block_sparse_moe.experts.219.w2", "model.layers.35.block_sparse_moe.experts.220.w2", "model.layers.35.block_sparse_moe.experts.221.w2", "model.layers.35.block_sparse_moe.experts.222.w2", "model.layers.35.block_sparse_moe.experts.223.w2", "model.layers.35.block_sparse_moe.experts.224.w2", "model.layers.35.block_sparse_moe.experts.225.w2", "model.layers.35.block_sparse_moe.experts.226.w2", "model.layers.35.block_sparse_moe.experts.227.w2", "model.layers.35.block_sparse_moe.experts.228.w2", "model.layers.35.block_sparse_moe.experts.229.w2", "model.layers.35.block_sparse_moe.experts.230.w2", "model.layers.35.block_sparse_moe.experts.231.w2", "model.layers.35.block_sparse_moe.experts.232.w2", "model.layers.35.block_sparse_moe.experts.233.w2", "model.layers.35.block_sparse_moe.experts.234.w2", "model.layers.35.block_sparse_moe.experts.235.w2", "model.layers.35.block_sparse_moe.experts.236.w2", "model.layers.35.block_sparse_moe.experts.237.w2", "model.layers.35.block_sparse_moe.experts.238.w2", "model.layers.35.block_sparse_moe.experts.239.w2", "model.layers.35.block_sparse_moe.experts.240.w2", "model.layers.35.block_sparse_moe.experts.241.w2", "model.layers.35.block_sparse_moe.experts.242.w2", "model.layers.35.block_sparse_moe.experts.243.w2", "model.layers.35.block_sparse_moe.experts.244.w2", "model.layers.35.block_sparse_moe.experts.245.w2", "model.layers.35.block_sparse_moe.experts.246.w2", "model.layers.35.block_sparse_moe.experts.247.w2", "model.layers.35.block_sparse_moe.experts.248.w2", "model.layers.35.block_sparse_moe.experts.249.w2", "model.layers.35.block_sparse_moe.experts.250.w2", "model.layers.35.block_sparse_moe.experts.251.w2", "model.layers.35.block_sparse_moe.experts.252.w2", "model.layers.35.block_sparse_moe.experts.253.w2", "model.layers.35.block_sparse_moe.experts.254.w2", "model.layers.35.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 7.843896746634327e-05, "dbits": 1207959552 } ] }, { "idx": 180, "layers": [ "model.layers.36.self_attn.q_proj" ], "candidates": [ { "dkld": -0.002364363521337509, "dbits": 18874368 } ] }, { "idx": 181, "layers": [ "model.layers.36.self_attn.k_proj", "model.layers.36.self_attn.v_proj" ], "candidates": [ { "dkld": -0.007723036035895359, "dbits": 6291456 } ] }, { "idx": 182, "layers": [ "model.layers.36.self_attn.o_proj" ], "candidates": [ { "dkld": -0.005692108348011982, "dbits": 18874368 } ] }, { "idx": 183, "layers": [ "model.layers.36.block_sparse_moe.experts.0.w1", "model.layers.36.block_sparse_moe.experts.1.w1", "model.layers.36.block_sparse_moe.experts.2.w1", "model.layers.36.block_sparse_moe.experts.3.w1", "model.layers.36.block_sparse_moe.experts.4.w1", "model.layers.36.block_sparse_moe.experts.5.w1", "model.layers.36.block_sparse_moe.experts.6.w1", "model.layers.36.block_sparse_moe.experts.7.w1", "model.layers.36.block_sparse_moe.experts.8.w1", "model.layers.36.block_sparse_moe.experts.9.w1", "model.layers.36.block_sparse_moe.experts.10.w1", "model.layers.36.block_sparse_moe.experts.11.w1", "model.layers.36.block_sparse_moe.experts.12.w1", "model.layers.36.block_sparse_moe.experts.13.w1", "model.layers.36.block_sparse_moe.experts.14.w1", "model.layers.36.block_sparse_moe.experts.15.w1", "model.layers.36.block_sparse_moe.experts.16.w1", "model.layers.36.block_sparse_moe.experts.17.w1", "model.layers.36.block_sparse_moe.experts.18.w1", "model.layers.36.block_sparse_moe.experts.19.w1", "model.layers.36.block_sparse_moe.experts.20.w1", "model.layers.36.block_sparse_moe.experts.21.w1", "model.layers.36.block_sparse_moe.experts.22.w1", "model.layers.36.block_sparse_moe.experts.23.w1", "model.layers.36.block_sparse_moe.experts.24.w1", "model.layers.36.block_sparse_moe.experts.25.w1", "model.layers.36.block_sparse_moe.experts.26.w1", "model.layers.36.block_sparse_moe.experts.27.w1", "model.layers.36.block_sparse_moe.experts.28.w1", "model.layers.36.block_sparse_moe.experts.29.w1", "model.layers.36.block_sparse_moe.experts.30.w1", "model.layers.36.block_sparse_moe.experts.31.w1", "model.layers.36.block_sparse_moe.experts.32.w1", "model.layers.36.block_sparse_moe.experts.33.w1", "model.layers.36.block_sparse_moe.experts.34.w1", "model.layers.36.block_sparse_moe.experts.35.w1", "model.layers.36.block_sparse_moe.experts.36.w1", "model.layers.36.block_sparse_moe.experts.37.w1", "model.layers.36.block_sparse_moe.experts.38.w1", "model.layers.36.block_sparse_moe.experts.39.w1", "model.layers.36.block_sparse_moe.experts.40.w1", "model.layers.36.block_sparse_moe.experts.41.w1", "model.layers.36.block_sparse_moe.experts.42.w1", "model.layers.36.block_sparse_moe.experts.43.w1", "model.layers.36.block_sparse_moe.experts.44.w1", "model.layers.36.block_sparse_moe.experts.45.w1", "model.layers.36.block_sparse_moe.experts.46.w1", "model.layers.36.block_sparse_moe.experts.47.w1", "model.layers.36.block_sparse_moe.experts.48.w1", "model.layers.36.block_sparse_moe.experts.49.w1", "model.layers.36.block_sparse_moe.experts.50.w1", "model.layers.36.block_sparse_moe.experts.51.w1", "model.layers.36.block_sparse_moe.experts.52.w1", "model.layers.36.block_sparse_moe.experts.53.w1", "model.layers.36.block_sparse_moe.experts.54.w1", "model.layers.36.block_sparse_moe.experts.55.w1", "model.layers.36.block_sparse_moe.experts.56.w1", "model.layers.36.block_sparse_moe.experts.57.w1", "model.layers.36.block_sparse_moe.experts.58.w1", "model.layers.36.block_sparse_moe.experts.59.w1", "model.layers.36.block_sparse_moe.experts.60.w1", "model.layers.36.block_sparse_moe.experts.61.w1", "model.layers.36.block_sparse_moe.experts.62.w1", "model.layers.36.block_sparse_moe.experts.63.w1", "model.layers.36.block_sparse_moe.experts.64.w1", "model.layers.36.block_sparse_moe.experts.65.w1", "model.layers.36.block_sparse_moe.experts.66.w1", "model.layers.36.block_sparse_moe.experts.67.w1", "model.layers.36.block_sparse_moe.experts.68.w1", "model.layers.36.block_sparse_moe.experts.69.w1", "model.layers.36.block_sparse_moe.experts.70.w1", "model.layers.36.block_sparse_moe.experts.71.w1", "model.layers.36.block_sparse_moe.experts.72.w1", "model.layers.36.block_sparse_moe.experts.73.w1", "model.layers.36.block_sparse_moe.experts.74.w1", "model.layers.36.block_sparse_moe.experts.75.w1", "model.layers.36.block_sparse_moe.experts.76.w1", "model.layers.36.block_sparse_moe.experts.77.w1", "model.layers.36.block_sparse_moe.experts.78.w1", "model.layers.36.block_sparse_moe.experts.79.w1", "model.layers.36.block_sparse_moe.experts.80.w1", "model.layers.36.block_sparse_moe.experts.81.w1", "model.layers.36.block_sparse_moe.experts.82.w1", "model.layers.36.block_sparse_moe.experts.83.w1", "model.layers.36.block_sparse_moe.experts.84.w1", "model.layers.36.block_sparse_moe.experts.85.w1", "model.layers.36.block_sparse_moe.experts.86.w1", "model.layers.36.block_sparse_moe.experts.87.w1", "model.layers.36.block_sparse_moe.experts.88.w1", "model.layers.36.block_sparse_moe.experts.89.w1", "model.layers.36.block_sparse_moe.experts.90.w1", "model.layers.36.block_sparse_moe.experts.91.w1", "model.layers.36.block_sparse_moe.experts.92.w1", "model.layers.36.block_sparse_moe.experts.93.w1", "model.layers.36.block_sparse_moe.experts.94.w1", "model.layers.36.block_sparse_moe.experts.95.w1", "model.layers.36.block_sparse_moe.experts.96.w1", "model.layers.36.block_sparse_moe.experts.97.w1", "model.layers.36.block_sparse_moe.experts.98.w1", "model.layers.36.block_sparse_moe.experts.99.w1", "model.layers.36.block_sparse_moe.experts.100.w1", "model.layers.36.block_sparse_moe.experts.101.w1", "model.layers.36.block_sparse_moe.experts.102.w1", "model.layers.36.block_sparse_moe.experts.103.w1", "model.layers.36.block_sparse_moe.experts.104.w1", "model.layers.36.block_sparse_moe.experts.105.w1", "model.layers.36.block_sparse_moe.experts.106.w1", "model.layers.36.block_sparse_moe.experts.107.w1", "model.layers.36.block_sparse_moe.experts.108.w1", "model.layers.36.block_sparse_moe.experts.109.w1", "model.layers.36.block_sparse_moe.experts.110.w1", "model.layers.36.block_sparse_moe.experts.111.w1", "model.layers.36.block_sparse_moe.experts.112.w1", "model.layers.36.block_sparse_moe.experts.113.w1", "model.layers.36.block_sparse_moe.experts.114.w1", "model.layers.36.block_sparse_moe.experts.115.w1", "model.layers.36.block_sparse_moe.experts.116.w1", "model.layers.36.block_sparse_moe.experts.117.w1", "model.layers.36.block_sparse_moe.experts.118.w1", "model.layers.36.block_sparse_moe.experts.119.w1", "model.layers.36.block_sparse_moe.experts.120.w1", "model.layers.36.block_sparse_moe.experts.121.w1", "model.layers.36.block_sparse_moe.experts.122.w1", "model.layers.36.block_sparse_moe.experts.123.w1", "model.layers.36.block_sparse_moe.experts.124.w1", "model.layers.36.block_sparse_moe.experts.125.w1", "model.layers.36.block_sparse_moe.experts.126.w1", "model.layers.36.block_sparse_moe.experts.127.w1", "model.layers.36.block_sparse_moe.experts.128.w1", "model.layers.36.block_sparse_moe.experts.129.w1", "model.layers.36.block_sparse_moe.experts.130.w1", "model.layers.36.block_sparse_moe.experts.131.w1", "model.layers.36.block_sparse_moe.experts.132.w1", "model.layers.36.block_sparse_moe.experts.133.w1", "model.layers.36.block_sparse_moe.experts.134.w1", "model.layers.36.block_sparse_moe.experts.135.w1", "model.layers.36.block_sparse_moe.experts.136.w1", "model.layers.36.block_sparse_moe.experts.137.w1", "model.layers.36.block_sparse_moe.experts.138.w1", "model.layers.36.block_sparse_moe.experts.139.w1", "model.layers.36.block_sparse_moe.experts.140.w1", "model.layers.36.block_sparse_moe.experts.141.w1", "model.layers.36.block_sparse_moe.experts.142.w1", "model.layers.36.block_sparse_moe.experts.143.w1", "model.layers.36.block_sparse_moe.experts.144.w1", "model.layers.36.block_sparse_moe.experts.145.w1", "model.layers.36.block_sparse_moe.experts.146.w1", "model.layers.36.block_sparse_moe.experts.147.w1", "model.layers.36.block_sparse_moe.experts.148.w1", "model.layers.36.block_sparse_moe.experts.149.w1", "model.layers.36.block_sparse_moe.experts.150.w1", "model.layers.36.block_sparse_moe.experts.151.w1", "model.layers.36.block_sparse_moe.experts.152.w1", "model.layers.36.block_sparse_moe.experts.153.w1", "model.layers.36.block_sparse_moe.experts.154.w1", "model.layers.36.block_sparse_moe.experts.155.w1", "model.layers.36.block_sparse_moe.experts.156.w1", "model.layers.36.block_sparse_moe.experts.157.w1", "model.layers.36.block_sparse_moe.experts.158.w1", "model.layers.36.block_sparse_moe.experts.159.w1", "model.layers.36.block_sparse_moe.experts.160.w1", "model.layers.36.block_sparse_moe.experts.161.w1", "model.layers.36.block_sparse_moe.experts.162.w1", "model.layers.36.block_sparse_moe.experts.163.w1", "model.layers.36.block_sparse_moe.experts.164.w1", "model.layers.36.block_sparse_moe.experts.165.w1", "model.layers.36.block_sparse_moe.experts.166.w1", "model.layers.36.block_sparse_moe.experts.167.w1", "model.layers.36.block_sparse_moe.experts.168.w1", "model.layers.36.block_sparse_moe.experts.169.w1", "model.layers.36.block_sparse_moe.experts.170.w1", "model.layers.36.block_sparse_moe.experts.171.w1", "model.layers.36.block_sparse_moe.experts.172.w1", "model.layers.36.block_sparse_moe.experts.173.w1", "model.layers.36.block_sparse_moe.experts.174.w1", "model.layers.36.block_sparse_moe.experts.175.w1", "model.layers.36.block_sparse_moe.experts.176.w1", "model.layers.36.block_sparse_moe.experts.177.w1", "model.layers.36.block_sparse_moe.experts.178.w1", "model.layers.36.block_sparse_moe.experts.179.w1", "model.layers.36.block_sparse_moe.experts.180.w1", "model.layers.36.block_sparse_moe.experts.181.w1", "model.layers.36.block_sparse_moe.experts.182.w1", "model.layers.36.block_sparse_moe.experts.183.w1", "model.layers.36.block_sparse_moe.experts.184.w1", "model.layers.36.block_sparse_moe.experts.185.w1", "model.layers.36.block_sparse_moe.experts.186.w1", "model.layers.36.block_sparse_moe.experts.187.w1", "model.layers.36.block_sparse_moe.experts.188.w1", "model.layers.36.block_sparse_moe.experts.189.w1", "model.layers.36.block_sparse_moe.experts.190.w1", "model.layers.36.block_sparse_moe.experts.191.w1", "model.layers.36.block_sparse_moe.experts.192.w1", "model.layers.36.block_sparse_moe.experts.193.w1", "model.layers.36.block_sparse_moe.experts.194.w1", "model.layers.36.block_sparse_moe.experts.195.w1", "model.layers.36.block_sparse_moe.experts.196.w1", "model.layers.36.block_sparse_moe.experts.197.w1", "model.layers.36.block_sparse_moe.experts.198.w1", "model.layers.36.block_sparse_moe.experts.199.w1", "model.layers.36.block_sparse_moe.experts.200.w1", "model.layers.36.block_sparse_moe.experts.201.w1", "model.layers.36.block_sparse_moe.experts.202.w1", "model.layers.36.block_sparse_moe.experts.203.w1", "model.layers.36.block_sparse_moe.experts.204.w1", "model.layers.36.block_sparse_moe.experts.205.w1", "model.layers.36.block_sparse_moe.experts.206.w1", "model.layers.36.block_sparse_moe.experts.207.w1", "model.layers.36.block_sparse_moe.experts.208.w1", "model.layers.36.block_sparse_moe.experts.209.w1", "model.layers.36.block_sparse_moe.experts.210.w1", "model.layers.36.block_sparse_moe.experts.211.w1", "model.layers.36.block_sparse_moe.experts.212.w1", "model.layers.36.block_sparse_moe.experts.213.w1", "model.layers.36.block_sparse_moe.experts.214.w1", "model.layers.36.block_sparse_moe.experts.215.w1", "model.layers.36.block_sparse_moe.experts.216.w1", "model.layers.36.block_sparse_moe.experts.217.w1", "model.layers.36.block_sparse_moe.experts.218.w1", "model.layers.36.block_sparse_moe.experts.219.w1", "model.layers.36.block_sparse_moe.experts.220.w1", "model.layers.36.block_sparse_moe.experts.221.w1", "model.layers.36.block_sparse_moe.experts.222.w1", "model.layers.36.block_sparse_moe.experts.223.w1", "model.layers.36.block_sparse_moe.experts.224.w1", "model.layers.36.block_sparse_moe.experts.225.w1", "model.layers.36.block_sparse_moe.experts.226.w1", "model.layers.36.block_sparse_moe.experts.227.w1", "model.layers.36.block_sparse_moe.experts.228.w1", "model.layers.36.block_sparse_moe.experts.229.w1", "model.layers.36.block_sparse_moe.experts.230.w1", "model.layers.36.block_sparse_moe.experts.231.w1", "model.layers.36.block_sparse_moe.experts.232.w1", "model.layers.36.block_sparse_moe.experts.233.w1", "model.layers.36.block_sparse_moe.experts.234.w1", "model.layers.36.block_sparse_moe.experts.235.w1", "model.layers.36.block_sparse_moe.experts.236.w1", "model.layers.36.block_sparse_moe.experts.237.w1", "model.layers.36.block_sparse_moe.experts.238.w1", "model.layers.36.block_sparse_moe.experts.239.w1", "model.layers.36.block_sparse_moe.experts.240.w1", "model.layers.36.block_sparse_moe.experts.241.w1", "model.layers.36.block_sparse_moe.experts.242.w1", "model.layers.36.block_sparse_moe.experts.243.w1", "model.layers.36.block_sparse_moe.experts.244.w1", "model.layers.36.block_sparse_moe.experts.245.w1", "model.layers.36.block_sparse_moe.experts.246.w1", "model.layers.36.block_sparse_moe.experts.247.w1", "model.layers.36.block_sparse_moe.experts.248.w1", "model.layers.36.block_sparse_moe.experts.249.w1", "model.layers.36.block_sparse_moe.experts.250.w1", "model.layers.36.block_sparse_moe.experts.251.w1", "model.layers.36.block_sparse_moe.experts.252.w1", "model.layers.36.block_sparse_moe.experts.253.w1", "model.layers.36.block_sparse_moe.experts.254.w1", "model.layers.36.block_sparse_moe.experts.255.w1", "model.layers.36.block_sparse_moe.experts.0.w3", "model.layers.36.block_sparse_moe.experts.1.w3", "model.layers.36.block_sparse_moe.experts.2.w3", "model.layers.36.block_sparse_moe.experts.3.w3", "model.layers.36.block_sparse_moe.experts.4.w3", "model.layers.36.block_sparse_moe.experts.5.w3", "model.layers.36.block_sparse_moe.experts.6.w3", "model.layers.36.block_sparse_moe.experts.7.w3", "model.layers.36.block_sparse_moe.experts.8.w3", "model.layers.36.block_sparse_moe.experts.9.w3", "model.layers.36.block_sparse_moe.experts.10.w3", "model.layers.36.block_sparse_moe.experts.11.w3", "model.layers.36.block_sparse_moe.experts.12.w3", "model.layers.36.block_sparse_moe.experts.13.w3", "model.layers.36.block_sparse_moe.experts.14.w3", "model.layers.36.block_sparse_moe.experts.15.w3", "model.layers.36.block_sparse_moe.experts.16.w3", "model.layers.36.block_sparse_moe.experts.17.w3", "model.layers.36.block_sparse_moe.experts.18.w3", "model.layers.36.block_sparse_moe.experts.19.w3", "model.layers.36.block_sparse_moe.experts.20.w3", "model.layers.36.block_sparse_moe.experts.21.w3", "model.layers.36.block_sparse_moe.experts.22.w3", "model.layers.36.block_sparse_moe.experts.23.w3", "model.layers.36.block_sparse_moe.experts.24.w3", "model.layers.36.block_sparse_moe.experts.25.w3", "model.layers.36.block_sparse_moe.experts.26.w3", "model.layers.36.block_sparse_moe.experts.27.w3", "model.layers.36.block_sparse_moe.experts.28.w3", "model.layers.36.block_sparse_moe.experts.29.w3", "model.layers.36.block_sparse_moe.experts.30.w3", "model.layers.36.block_sparse_moe.experts.31.w3", "model.layers.36.block_sparse_moe.experts.32.w3", "model.layers.36.block_sparse_moe.experts.33.w3", "model.layers.36.block_sparse_moe.experts.34.w3", "model.layers.36.block_sparse_moe.experts.35.w3", "model.layers.36.block_sparse_moe.experts.36.w3", "model.layers.36.block_sparse_moe.experts.37.w3", "model.layers.36.block_sparse_moe.experts.38.w3", "model.layers.36.block_sparse_moe.experts.39.w3", "model.layers.36.block_sparse_moe.experts.40.w3", "model.layers.36.block_sparse_moe.experts.41.w3", "model.layers.36.block_sparse_moe.experts.42.w3", "model.layers.36.block_sparse_moe.experts.43.w3", "model.layers.36.block_sparse_moe.experts.44.w3", "model.layers.36.block_sparse_moe.experts.45.w3", "model.layers.36.block_sparse_moe.experts.46.w3", "model.layers.36.block_sparse_moe.experts.47.w3", "model.layers.36.block_sparse_moe.experts.48.w3", "model.layers.36.block_sparse_moe.experts.49.w3", "model.layers.36.block_sparse_moe.experts.50.w3", "model.layers.36.block_sparse_moe.experts.51.w3", "model.layers.36.block_sparse_moe.experts.52.w3", "model.layers.36.block_sparse_moe.experts.53.w3", "model.layers.36.block_sparse_moe.experts.54.w3", "model.layers.36.block_sparse_moe.experts.55.w3", "model.layers.36.block_sparse_moe.experts.56.w3", "model.layers.36.block_sparse_moe.experts.57.w3", "model.layers.36.block_sparse_moe.experts.58.w3", "model.layers.36.block_sparse_moe.experts.59.w3", "model.layers.36.block_sparse_moe.experts.60.w3", "model.layers.36.block_sparse_moe.experts.61.w3", "model.layers.36.block_sparse_moe.experts.62.w3", "model.layers.36.block_sparse_moe.experts.63.w3", "model.layers.36.block_sparse_moe.experts.64.w3", "model.layers.36.block_sparse_moe.experts.65.w3", "model.layers.36.block_sparse_moe.experts.66.w3", "model.layers.36.block_sparse_moe.experts.67.w3", "model.layers.36.block_sparse_moe.experts.68.w3", "model.layers.36.block_sparse_moe.experts.69.w3", "model.layers.36.block_sparse_moe.experts.70.w3", "model.layers.36.block_sparse_moe.experts.71.w3", "model.layers.36.block_sparse_moe.experts.72.w3", "model.layers.36.block_sparse_moe.experts.73.w3", "model.layers.36.block_sparse_moe.experts.74.w3", "model.layers.36.block_sparse_moe.experts.75.w3", "model.layers.36.block_sparse_moe.experts.76.w3", "model.layers.36.block_sparse_moe.experts.77.w3", "model.layers.36.block_sparse_moe.experts.78.w3", "model.layers.36.block_sparse_moe.experts.79.w3", "model.layers.36.block_sparse_moe.experts.80.w3", "model.layers.36.block_sparse_moe.experts.81.w3", "model.layers.36.block_sparse_moe.experts.82.w3", "model.layers.36.block_sparse_moe.experts.83.w3", "model.layers.36.block_sparse_moe.experts.84.w3", "model.layers.36.block_sparse_moe.experts.85.w3", "model.layers.36.block_sparse_moe.experts.86.w3", "model.layers.36.block_sparse_moe.experts.87.w3", "model.layers.36.block_sparse_moe.experts.88.w3", "model.layers.36.block_sparse_moe.experts.89.w3", "model.layers.36.block_sparse_moe.experts.90.w3", "model.layers.36.block_sparse_moe.experts.91.w3", "model.layers.36.block_sparse_moe.experts.92.w3", "model.layers.36.block_sparse_moe.experts.93.w3", "model.layers.36.block_sparse_moe.experts.94.w3", "model.layers.36.block_sparse_moe.experts.95.w3", "model.layers.36.block_sparse_moe.experts.96.w3", "model.layers.36.block_sparse_moe.experts.97.w3", "model.layers.36.block_sparse_moe.experts.98.w3", "model.layers.36.block_sparse_moe.experts.99.w3", "model.layers.36.block_sparse_moe.experts.100.w3", "model.layers.36.block_sparse_moe.experts.101.w3", "model.layers.36.block_sparse_moe.experts.102.w3", "model.layers.36.block_sparse_moe.experts.103.w3", "model.layers.36.block_sparse_moe.experts.104.w3", "model.layers.36.block_sparse_moe.experts.105.w3", "model.layers.36.block_sparse_moe.experts.106.w3", "model.layers.36.block_sparse_moe.experts.107.w3", "model.layers.36.block_sparse_moe.experts.108.w3", "model.layers.36.block_sparse_moe.experts.109.w3", "model.layers.36.block_sparse_moe.experts.110.w3", "model.layers.36.block_sparse_moe.experts.111.w3", "model.layers.36.block_sparse_moe.experts.112.w3", "model.layers.36.block_sparse_moe.experts.113.w3", "model.layers.36.block_sparse_moe.experts.114.w3", "model.layers.36.block_sparse_moe.experts.115.w3", "model.layers.36.block_sparse_moe.experts.116.w3", "model.layers.36.block_sparse_moe.experts.117.w3", "model.layers.36.block_sparse_moe.experts.118.w3", "model.layers.36.block_sparse_moe.experts.119.w3", "model.layers.36.block_sparse_moe.experts.120.w3", "model.layers.36.block_sparse_moe.experts.121.w3", "model.layers.36.block_sparse_moe.experts.122.w3", "model.layers.36.block_sparse_moe.experts.123.w3", "model.layers.36.block_sparse_moe.experts.124.w3", "model.layers.36.block_sparse_moe.experts.125.w3", "model.layers.36.block_sparse_moe.experts.126.w3", "model.layers.36.block_sparse_moe.experts.127.w3", "model.layers.36.block_sparse_moe.experts.128.w3", "model.layers.36.block_sparse_moe.experts.129.w3", "model.layers.36.block_sparse_moe.experts.130.w3", "model.layers.36.block_sparse_moe.experts.131.w3", "model.layers.36.block_sparse_moe.experts.132.w3", "model.layers.36.block_sparse_moe.experts.133.w3", "model.layers.36.block_sparse_moe.experts.134.w3", "model.layers.36.block_sparse_moe.experts.135.w3", "model.layers.36.block_sparse_moe.experts.136.w3", "model.layers.36.block_sparse_moe.experts.137.w3", "model.layers.36.block_sparse_moe.experts.138.w3", "model.layers.36.block_sparse_moe.experts.139.w3", "model.layers.36.block_sparse_moe.experts.140.w3", "model.layers.36.block_sparse_moe.experts.141.w3", "model.layers.36.block_sparse_moe.experts.142.w3", "model.layers.36.block_sparse_moe.experts.143.w3", "model.layers.36.block_sparse_moe.experts.144.w3", "model.layers.36.block_sparse_moe.experts.145.w3", "model.layers.36.block_sparse_moe.experts.146.w3", "model.layers.36.block_sparse_moe.experts.147.w3", "model.layers.36.block_sparse_moe.experts.148.w3", "model.layers.36.block_sparse_moe.experts.149.w3", "model.layers.36.block_sparse_moe.experts.150.w3", "model.layers.36.block_sparse_moe.experts.151.w3", "model.layers.36.block_sparse_moe.experts.152.w3", "model.layers.36.block_sparse_moe.experts.153.w3", "model.layers.36.block_sparse_moe.experts.154.w3", "model.layers.36.block_sparse_moe.experts.155.w3", "model.layers.36.block_sparse_moe.experts.156.w3", "model.layers.36.block_sparse_moe.experts.157.w3", "model.layers.36.block_sparse_moe.experts.158.w3", "model.layers.36.block_sparse_moe.experts.159.w3", "model.layers.36.block_sparse_moe.experts.160.w3", "model.layers.36.block_sparse_moe.experts.161.w3", "model.layers.36.block_sparse_moe.experts.162.w3", "model.layers.36.block_sparse_moe.experts.163.w3", "model.layers.36.block_sparse_moe.experts.164.w3", "model.layers.36.block_sparse_moe.experts.165.w3", "model.layers.36.block_sparse_moe.experts.166.w3", "model.layers.36.block_sparse_moe.experts.167.w3", "model.layers.36.block_sparse_moe.experts.168.w3", "model.layers.36.block_sparse_moe.experts.169.w3", "model.layers.36.block_sparse_moe.experts.170.w3", "model.layers.36.block_sparse_moe.experts.171.w3", "model.layers.36.block_sparse_moe.experts.172.w3", "model.layers.36.block_sparse_moe.experts.173.w3", "model.layers.36.block_sparse_moe.experts.174.w3", "model.layers.36.block_sparse_moe.experts.175.w3", "model.layers.36.block_sparse_moe.experts.176.w3", "model.layers.36.block_sparse_moe.experts.177.w3", "model.layers.36.block_sparse_moe.experts.178.w3", "model.layers.36.block_sparse_moe.experts.179.w3", "model.layers.36.block_sparse_moe.experts.180.w3", "model.layers.36.block_sparse_moe.experts.181.w3", "model.layers.36.block_sparse_moe.experts.182.w3", "model.layers.36.block_sparse_moe.experts.183.w3", "model.layers.36.block_sparse_moe.experts.184.w3", "model.layers.36.block_sparse_moe.experts.185.w3", "model.layers.36.block_sparse_moe.experts.186.w3", "model.layers.36.block_sparse_moe.experts.187.w3", "model.layers.36.block_sparse_moe.experts.188.w3", "model.layers.36.block_sparse_moe.experts.189.w3", "model.layers.36.block_sparse_moe.experts.190.w3", "model.layers.36.block_sparse_moe.experts.191.w3", "model.layers.36.block_sparse_moe.experts.192.w3", "model.layers.36.block_sparse_moe.experts.193.w3", "model.layers.36.block_sparse_moe.experts.194.w3", "model.layers.36.block_sparse_moe.experts.195.w3", "model.layers.36.block_sparse_moe.experts.196.w3", "model.layers.36.block_sparse_moe.experts.197.w3", "model.layers.36.block_sparse_moe.experts.198.w3", "model.layers.36.block_sparse_moe.experts.199.w3", "model.layers.36.block_sparse_moe.experts.200.w3", "model.layers.36.block_sparse_moe.experts.201.w3", "model.layers.36.block_sparse_moe.experts.202.w3", "model.layers.36.block_sparse_moe.experts.203.w3", "model.layers.36.block_sparse_moe.experts.204.w3", "model.layers.36.block_sparse_moe.experts.205.w3", "model.layers.36.block_sparse_moe.experts.206.w3", "model.layers.36.block_sparse_moe.experts.207.w3", "model.layers.36.block_sparse_moe.experts.208.w3", "model.layers.36.block_sparse_moe.experts.209.w3", "model.layers.36.block_sparse_moe.experts.210.w3", "model.layers.36.block_sparse_moe.experts.211.w3", "model.layers.36.block_sparse_moe.experts.212.w3", "model.layers.36.block_sparse_moe.experts.213.w3", "model.layers.36.block_sparse_moe.experts.214.w3", "model.layers.36.block_sparse_moe.experts.215.w3", "model.layers.36.block_sparse_moe.experts.216.w3", "model.layers.36.block_sparse_moe.experts.217.w3", "model.layers.36.block_sparse_moe.experts.218.w3", "model.layers.36.block_sparse_moe.experts.219.w3", "model.layers.36.block_sparse_moe.experts.220.w3", "model.layers.36.block_sparse_moe.experts.221.w3", "model.layers.36.block_sparse_moe.experts.222.w3", "model.layers.36.block_sparse_moe.experts.223.w3", "model.layers.36.block_sparse_moe.experts.224.w3", "model.layers.36.block_sparse_moe.experts.225.w3", "model.layers.36.block_sparse_moe.experts.226.w3", "model.layers.36.block_sparse_moe.experts.227.w3", "model.layers.36.block_sparse_moe.experts.228.w3", "model.layers.36.block_sparse_moe.experts.229.w3", "model.layers.36.block_sparse_moe.experts.230.w3", "model.layers.36.block_sparse_moe.experts.231.w3", "model.layers.36.block_sparse_moe.experts.232.w3", "model.layers.36.block_sparse_moe.experts.233.w3", "model.layers.36.block_sparse_moe.experts.234.w3", "model.layers.36.block_sparse_moe.experts.235.w3", "model.layers.36.block_sparse_moe.experts.236.w3", "model.layers.36.block_sparse_moe.experts.237.w3", "model.layers.36.block_sparse_moe.experts.238.w3", "model.layers.36.block_sparse_moe.experts.239.w3", "model.layers.36.block_sparse_moe.experts.240.w3", "model.layers.36.block_sparse_moe.experts.241.w3", "model.layers.36.block_sparse_moe.experts.242.w3", "model.layers.36.block_sparse_moe.experts.243.w3", "model.layers.36.block_sparse_moe.experts.244.w3", "model.layers.36.block_sparse_moe.experts.245.w3", "model.layers.36.block_sparse_moe.experts.246.w3", "model.layers.36.block_sparse_moe.experts.247.w3", "model.layers.36.block_sparse_moe.experts.248.w3", "model.layers.36.block_sparse_moe.experts.249.w3", "model.layers.36.block_sparse_moe.experts.250.w3", "model.layers.36.block_sparse_moe.experts.251.w3", "model.layers.36.block_sparse_moe.experts.252.w3", "model.layers.36.block_sparse_moe.experts.253.w3", "model.layers.36.block_sparse_moe.experts.254.w3", "model.layers.36.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.001267579197883617, "dbits": 2415919104 } ] }, { "idx": 184, "layers": [ "model.layers.36.block_sparse_moe.experts.0.w2", "model.layers.36.block_sparse_moe.experts.1.w2", "model.layers.36.block_sparse_moe.experts.2.w2", "model.layers.36.block_sparse_moe.experts.3.w2", "model.layers.36.block_sparse_moe.experts.4.w2", "model.layers.36.block_sparse_moe.experts.5.w2", "model.layers.36.block_sparse_moe.experts.6.w2", "model.layers.36.block_sparse_moe.experts.7.w2", "model.layers.36.block_sparse_moe.experts.8.w2", "model.layers.36.block_sparse_moe.experts.9.w2", "model.layers.36.block_sparse_moe.experts.10.w2", "model.layers.36.block_sparse_moe.experts.11.w2", "model.layers.36.block_sparse_moe.experts.12.w2", "model.layers.36.block_sparse_moe.experts.13.w2", "model.layers.36.block_sparse_moe.experts.14.w2", "model.layers.36.block_sparse_moe.experts.15.w2", "model.layers.36.block_sparse_moe.experts.16.w2", "model.layers.36.block_sparse_moe.experts.17.w2", "model.layers.36.block_sparse_moe.experts.18.w2", "model.layers.36.block_sparse_moe.experts.19.w2", "model.layers.36.block_sparse_moe.experts.20.w2", "model.layers.36.block_sparse_moe.experts.21.w2", "model.layers.36.block_sparse_moe.experts.22.w2", "model.layers.36.block_sparse_moe.experts.23.w2", "model.layers.36.block_sparse_moe.experts.24.w2", "model.layers.36.block_sparse_moe.experts.25.w2", "model.layers.36.block_sparse_moe.experts.26.w2", "model.layers.36.block_sparse_moe.experts.27.w2", "model.layers.36.block_sparse_moe.experts.28.w2", "model.layers.36.block_sparse_moe.experts.29.w2", "model.layers.36.block_sparse_moe.experts.30.w2", "model.layers.36.block_sparse_moe.experts.31.w2", "model.layers.36.block_sparse_moe.experts.32.w2", "model.layers.36.block_sparse_moe.experts.33.w2", "model.layers.36.block_sparse_moe.experts.34.w2", "model.layers.36.block_sparse_moe.experts.35.w2", "model.layers.36.block_sparse_moe.experts.36.w2", "model.layers.36.block_sparse_moe.experts.37.w2", "model.layers.36.block_sparse_moe.experts.38.w2", "model.layers.36.block_sparse_moe.experts.39.w2", "model.layers.36.block_sparse_moe.experts.40.w2", "model.layers.36.block_sparse_moe.experts.41.w2", "model.layers.36.block_sparse_moe.experts.42.w2", "model.layers.36.block_sparse_moe.experts.43.w2", "model.layers.36.block_sparse_moe.experts.44.w2", "model.layers.36.block_sparse_moe.experts.45.w2", "model.layers.36.block_sparse_moe.experts.46.w2", "model.layers.36.block_sparse_moe.experts.47.w2", "model.layers.36.block_sparse_moe.experts.48.w2", "model.layers.36.block_sparse_moe.experts.49.w2", "model.layers.36.block_sparse_moe.experts.50.w2", "model.layers.36.block_sparse_moe.experts.51.w2", "model.layers.36.block_sparse_moe.experts.52.w2", "model.layers.36.block_sparse_moe.experts.53.w2", "model.layers.36.block_sparse_moe.experts.54.w2", "model.layers.36.block_sparse_moe.experts.55.w2", "model.layers.36.block_sparse_moe.experts.56.w2", "model.layers.36.block_sparse_moe.experts.57.w2", "model.layers.36.block_sparse_moe.experts.58.w2", "model.layers.36.block_sparse_moe.experts.59.w2", "model.layers.36.block_sparse_moe.experts.60.w2", "model.layers.36.block_sparse_moe.experts.61.w2", "model.layers.36.block_sparse_moe.experts.62.w2", "model.layers.36.block_sparse_moe.experts.63.w2", "model.layers.36.block_sparse_moe.experts.64.w2", "model.layers.36.block_sparse_moe.experts.65.w2", "model.layers.36.block_sparse_moe.experts.66.w2", "model.layers.36.block_sparse_moe.experts.67.w2", "model.layers.36.block_sparse_moe.experts.68.w2", "model.layers.36.block_sparse_moe.experts.69.w2", "model.layers.36.block_sparse_moe.experts.70.w2", "model.layers.36.block_sparse_moe.experts.71.w2", "model.layers.36.block_sparse_moe.experts.72.w2", "model.layers.36.block_sparse_moe.experts.73.w2", "model.layers.36.block_sparse_moe.experts.74.w2", "model.layers.36.block_sparse_moe.experts.75.w2", "model.layers.36.block_sparse_moe.experts.76.w2", "model.layers.36.block_sparse_moe.experts.77.w2", "model.layers.36.block_sparse_moe.experts.78.w2", "model.layers.36.block_sparse_moe.experts.79.w2", "model.layers.36.block_sparse_moe.experts.80.w2", "model.layers.36.block_sparse_moe.experts.81.w2", "model.layers.36.block_sparse_moe.experts.82.w2", "model.layers.36.block_sparse_moe.experts.83.w2", "model.layers.36.block_sparse_moe.experts.84.w2", "model.layers.36.block_sparse_moe.experts.85.w2", "model.layers.36.block_sparse_moe.experts.86.w2", "model.layers.36.block_sparse_moe.experts.87.w2", "model.layers.36.block_sparse_moe.experts.88.w2", "model.layers.36.block_sparse_moe.experts.89.w2", "model.layers.36.block_sparse_moe.experts.90.w2", "model.layers.36.block_sparse_moe.experts.91.w2", "model.layers.36.block_sparse_moe.experts.92.w2", "model.layers.36.block_sparse_moe.experts.93.w2", "model.layers.36.block_sparse_moe.experts.94.w2", "model.layers.36.block_sparse_moe.experts.95.w2", "model.layers.36.block_sparse_moe.experts.96.w2", "model.layers.36.block_sparse_moe.experts.97.w2", "model.layers.36.block_sparse_moe.experts.98.w2", "model.layers.36.block_sparse_moe.experts.99.w2", "model.layers.36.block_sparse_moe.experts.100.w2", "model.layers.36.block_sparse_moe.experts.101.w2", "model.layers.36.block_sparse_moe.experts.102.w2", "model.layers.36.block_sparse_moe.experts.103.w2", "model.layers.36.block_sparse_moe.experts.104.w2", "model.layers.36.block_sparse_moe.experts.105.w2", "model.layers.36.block_sparse_moe.experts.106.w2", "model.layers.36.block_sparse_moe.experts.107.w2", "model.layers.36.block_sparse_moe.experts.108.w2", "model.layers.36.block_sparse_moe.experts.109.w2", "model.layers.36.block_sparse_moe.experts.110.w2", "model.layers.36.block_sparse_moe.experts.111.w2", "model.layers.36.block_sparse_moe.experts.112.w2", "model.layers.36.block_sparse_moe.experts.113.w2", "model.layers.36.block_sparse_moe.experts.114.w2", "model.layers.36.block_sparse_moe.experts.115.w2", "model.layers.36.block_sparse_moe.experts.116.w2", "model.layers.36.block_sparse_moe.experts.117.w2", "model.layers.36.block_sparse_moe.experts.118.w2", "model.layers.36.block_sparse_moe.experts.119.w2", "model.layers.36.block_sparse_moe.experts.120.w2", "model.layers.36.block_sparse_moe.experts.121.w2", "model.layers.36.block_sparse_moe.experts.122.w2", "model.layers.36.block_sparse_moe.experts.123.w2", "model.layers.36.block_sparse_moe.experts.124.w2", "model.layers.36.block_sparse_moe.experts.125.w2", "model.layers.36.block_sparse_moe.experts.126.w2", "model.layers.36.block_sparse_moe.experts.127.w2", "model.layers.36.block_sparse_moe.experts.128.w2", "model.layers.36.block_sparse_moe.experts.129.w2", "model.layers.36.block_sparse_moe.experts.130.w2", "model.layers.36.block_sparse_moe.experts.131.w2", "model.layers.36.block_sparse_moe.experts.132.w2", "model.layers.36.block_sparse_moe.experts.133.w2", "model.layers.36.block_sparse_moe.experts.134.w2", "model.layers.36.block_sparse_moe.experts.135.w2", "model.layers.36.block_sparse_moe.experts.136.w2", "model.layers.36.block_sparse_moe.experts.137.w2", "model.layers.36.block_sparse_moe.experts.138.w2", "model.layers.36.block_sparse_moe.experts.139.w2", "model.layers.36.block_sparse_moe.experts.140.w2", "model.layers.36.block_sparse_moe.experts.141.w2", "model.layers.36.block_sparse_moe.experts.142.w2", "model.layers.36.block_sparse_moe.experts.143.w2", "model.layers.36.block_sparse_moe.experts.144.w2", "model.layers.36.block_sparse_moe.experts.145.w2", "model.layers.36.block_sparse_moe.experts.146.w2", "model.layers.36.block_sparse_moe.experts.147.w2", "model.layers.36.block_sparse_moe.experts.148.w2", "model.layers.36.block_sparse_moe.experts.149.w2", "model.layers.36.block_sparse_moe.experts.150.w2", "model.layers.36.block_sparse_moe.experts.151.w2", "model.layers.36.block_sparse_moe.experts.152.w2", "model.layers.36.block_sparse_moe.experts.153.w2", "model.layers.36.block_sparse_moe.experts.154.w2", "model.layers.36.block_sparse_moe.experts.155.w2", "model.layers.36.block_sparse_moe.experts.156.w2", "model.layers.36.block_sparse_moe.experts.157.w2", "model.layers.36.block_sparse_moe.experts.158.w2", "model.layers.36.block_sparse_moe.experts.159.w2", "model.layers.36.block_sparse_moe.experts.160.w2", "model.layers.36.block_sparse_moe.experts.161.w2", "model.layers.36.block_sparse_moe.experts.162.w2", "model.layers.36.block_sparse_moe.experts.163.w2", "model.layers.36.block_sparse_moe.experts.164.w2", "model.layers.36.block_sparse_moe.experts.165.w2", "model.layers.36.block_sparse_moe.experts.166.w2", "model.layers.36.block_sparse_moe.experts.167.w2", "model.layers.36.block_sparse_moe.experts.168.w2", "model.layers.36.block_sparse_moe.experts.169.w2", "model.layers.36.block_sparse_moe.experts.170.w2", "model.layers.36.block_sparse_moe.experts.171.w2", "model.layers.36.block_sparse_moe.experts.172.w2", "model.layers.36.block_sparse_moe.experts.173.w2", "model.layers.36.block_sparse_moe.experts.174.w2", "model.layers.36.block_sparse_moe.experts.175.w2", "model.layers.36.block_sparse_moe.experts.176.w2", "model.layers.36.block_sparse_moe.experts.177.w2", "model.layers.36.block_sparse_moe.experts.178.w2", "model.layers.36.block_sparse_moe.experts.179.w2", "model.layers.36.block_sparse_moe.experts.180.w2", "model.layers.36.block_sparse_moe.experts.181.w2", "model.layers.36.block_sparse_moe.experts.182.w2", "model.layers.36.block_sparse_moe.experts.183.w2", "model.layers.36.block_sparse_moe.experts.184.w2", "model.layers.36.block_sparse_moe.experts.185.w2", "model.layers.36.block_sparse_moe.experts.186.w2", "model.layers.36.block_sparse_moe.experts.187.w2", "model.layers.36.block_sparse_moe.experts.188.w2", "model.layers.36.block_sparse_moe.experts.189.w2", "model.layers.36.block_sparse_moe.experts.190.w2", "model.layers.36.block_sparse_moe.experts.191.w2", "model.layers.36.block_sparse_moe.experts.192.w2", "model.layers.36.block_sparse_moe.experts.193.w2", "model.layers.36.block_sparse_moe.experts.194.w2", "model.layers.36.block_sparse_moe.experts.195.w2", "model.layers.36.block_sparse_moe.experts.196.w2", "model.layers.36.block_sparse_moe.experts.197.w2", "model.layers.36.block_sparse_moe.experts.198.w2", "model.layers.36.block_sparse_moe.experts.199.w2", "model.layers.36.block_sparse_moe.experts.200.w2", "model.layers.36.block_sparse_moe.experts.201.w2", "model.layers.36.block_sparse_moe.experts.202.w2", "model.layers.36.block_sparse_moe.experts.203.w2", "model.layers.36.block_sparse_moe.experts.204.w2", "model.layers.36.block_sparse_moe.experts.205.w2", "model.layers.36.block_sparse_moe.experts.206.w2", "model.layers.36.block_sparse_moe.experts.207.w2", "model.layers.36.block_sparse_moe.experts.208.w2", "model.layers.36.block_sparse_moe.experts.209.w2", "model.layers.36.block_sparse_moe.experts.210.w2", "model.layers.36.block_sparse_moe.experts.211.w2", "model.layers.36.block_sparse_moe.experts.212.w2", "model.layers.36.block_sparse_moe.experts.213.w2", "model.layers.36.block_sparse_moe.experts.214.w2", "model.layers.36.block_sparse_moe.experts.215.w2", "model.layers.36.block_sparse_moe.experts.216.w2", "model.layers.36.block_sparse_moe.experts.217.w2", "model.layers.36.block_sparse_moe.experts.218.w2", "model.layers.36.block_sparse_moe.experts.219.w2", "model.layers.36.block_sparse_moe.experts.220.w2", "model.layers.36.block_sparse_moe.experts.221.w2", "model.layers.36.block_sparse_moe.experts.222.w2", "model.layers.36.block_sparse_moe.experts.223.w2", "model.layers.36.block_sparse_moe.experts.224.w2", "model.layers.36.block_sparse_moe.experts.225.w2", "model.layers.36.block_sparse_moe.experts.226.w2", "model.layers.36.block_sparse_moe.experts.227.w2", "model.layers.36.block_sparse_moe.experts.228.w2", "model.layers.36.block_sparse_moe.experts.229.w2", "model.layers.36.block_sparse_moe.experts.230.w2", "model.layers.36.block_sparse_moe.experts.231.w2", "model.layers.36.block_sparse_moe.experts.232.w2", "model.layers.36.block_sparse_moe.experts.233.w2", "model.layers.36.block_sparse_moe.experts.234.w2", "model.layers.36.block_sparse_moe.experts.235.w2", "model.layers.36.block_sparse_moe.experts.236.w2", "model.layers.36.block_sparse_moe.experts.237.w2", "model.layers.36.block_sparse_moe.experts.238.w2", "model.layers.36.block_sparse_moe.experts.239.w2", "model.layers.36.block_sparse_moe.experts.240.w2", "model.layers.36.block_sparse_moe.experts.241.w2", "model.layers.36.block_sparse_moe.experts.242.w2", "model.layers.36.block_sparse_moe.experts.243.w2", "model.layers.36.block_sparse_moe.experts.244.w2", "model.layers.36.block_sparse_moe.experts.245.w2", "model.layers.36.block_sparse_moe.experts.246.w2", "model.layers.36.block_sparse_moe.experts.247.w2", "model.layers.36.block_sparse_moe.experts.248.w2", "model.layers.36.block_sparse_moe.experts.249.w2", "model.layers.36.block_sparse_moe.experts.250.w2", "model.layers.36.block_sparse_moe.experts.251.w2", "model.layers.36.block_sparse_moe.experts.252.w2", "model.layers.36.block_sparse_moe.experts.253.w2", "model.layers.36.block_sparse_moe.experts.254.w2", "model.layers.36.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.000845168158412013, "dbits": 1207959552 } ] }, { "idx": 185, "layers": [ "model.layers.37.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0017913822084665298, "dbits": 18874368 } ] }, { "idx": 186, "layers": [ "model.layers.37.self_attn.k_proj", "model.layers.37.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0061232693493366575, "dbits": 6291456 } ] }, { "idx": 187, "layers": [ "model.layers.37.self_attn.o_proj" ], "candidates": [ { "dkld": -0.005984794348478317, "dbits": 18874368 } ] }, { "idx": 188, "layers": [ "model.layers.37.block_sparse_moe.experts.0.w1", "model.layers.37.block_sparse_moe.experts.1.w1", "model.layers.37.block_sparse_moe.experts.2.w1", "model.layers.37.block_sparse_moe.experts.3.w1", "model.layers.37.block_sparse_moe.experts.4.w1", "model.layers.37.block_sparse_moe.experts.5.w1", "model.layers.37.block_sparse_moe.experts.6.w1", "model.layers.37.block_sparse_moe.experts.7.w1", "model.layers.37.block_sparse_moe.experts.8.w1", "model.layers.37.block_sparse_moe.experts.9.w1", "model.layers.37.block_sparse_moe.experts.10.w1", "model.layers.37.block_sparse_moe.experts.11.w1", "model.layers.37.block_sparse_moe.experts.12.w1", "model.layers.37.block_sparse_moe.experts.13.w1", "model.layers.37.block_sparse_moe.experts.14.w1", "model.layers.37.block_sparse_moe.experts.15.w1", "model.layers.37.block_sparse_moe.experts.16.w1", "model.layers.37.block_sparse_moe.experts.17.w1", "model.layers.37.block_sparse_moe.experts.18.w1", "model.layers.37.block_sparse_moe.experts.19.w1", "model.layers.37.block_sparse_moe.experts.20.w1", "model.layers.37.block_sparse_moe.experts.21.w1", "model.layers.37.block_sparse_moe.experts.22.w1", "model.layers.37.block_sparse_moe.experts.23.w1", "model.layers.37.block_sparse_moe.experts.24.w1", "model.layers.37.block_sparse_moe.experts.25.w1", "model.layers.37.block_sparse_moe.experts.26.w1", "model.layers.37.block_sparse_moe.experts.27.w1", "model.layers.37.block_sparse_moe.experts.28.w1", "model.layers.37.block_sparse_moe.experts.29.w1", "model.layers.37.block_sparse_moe.experts.30.w1", "model.layers.37.block_sparse_moe.experts.31.w1", "model.layers.37.block_sparse_moe.experts.32.w1", "model.layers.37.block_sparse_moe.experts.33.w1", "model.layers.37.block_sparse_moe.experts.34.w1", "model.layers.37.block_sparse_moe.experts.35.w1", "model.layers.37.block_sparse_moe.experts.36.w1", "model.layers.37.block_sparse_moe.experts.37.w1", "model.layers.37.block_sparse_moe.experts.38.w1", "model.layers.37.block_sparse_moe.experts.39.w1", "model.layers.37.block_sparse_moe.experts.40.w1", "model.layers.37.block_sparse_moe.experts.41.w1", "model.layers.37.block_sparse_moe.experts.42.w1", "model.layers.37.block_sparse_moe.experts.43.w1", "model.layers.37.block_sparse_moe.experts.44.w1", "model.layers.37.block_sparse_moe.experts.45.w1", "model.layers.37.block_sparse_moe.experts.46.w1", "model.layers.37.block_sparse_moe.experts.47.w1", "model.layers.37.block_sparse_moe.experts.48.w1", "model.layers.37.block_sparse_moe.experts.49.w1", "model.layers.37.block_sparse_moe.experts.50.w1", "model.layers.37.block_sparse_moe.experts.51.w1", "model.layers.37.block_sparse_moe.experts.52.w1", "model.layers.37.block_sparse_moe.experts.53.w1", "model.layers.37.block_sparse_moe.experts.54.w1", "model.layers.37.block_sparse_moe.experts.55.w1", "model.layers.37.block_sparse_moe.experts.56.w1", "model.layers.37.block_sparse_moe.experts.57.w1", "model.layers.37.block_sparse_moe.experts.58.w1", "model.layers.37.block_sparse_moe.experts.59.w1", "model.layers.37.block_sparse_moe.experts.60.w1", "model.layers.37.block_sparse_moe.experts.61.w1", "model.layers.37.block_sparse_moe.experts.62.w1", "model.layers.37.block_sparse_moe.experts.63.w1", "model.layers.37.block_sparse_moe.experts.64.w1", "model.layers.37.block_sparse_moe.experts.65.w1", "model.layers.37.block_sparse_moe.experts.66.w1", "model.layers.37.block_sparse_moe.experts.67.w1", "model.layers.37.block_sparse_moe.experts.68.w1", "model.layers.37.block_sparse_moe.experts.69.w1", "model.layers.37.block_sparse_moe.experts.70.w1", "model.layers.37.block_sparse_moe.experts.71.w1", "model.layers.37.block_sparse_moe.experts.72.w1", "model.layers.37.block_sparse_moe.experts.73.w1", "model.layers.37.block_sparse_moe.experts.74.w1", "model.layers.37.block_sparse_moe.experts.75.w1", "model.layers.37.block_sparse_moe.experts.76.w1", "model.layers.37.block_sparse_moe.experts.77.w1", "model.layers.37.block_sparse_moe.experts.78.w1", "model.layers.37.block_sparse_moe.experts.79.w1", "model.layers.37.block_sparse_moe.experts.80.w1", "model.layers.37.block_sparse_moe.experts.81.w1", "model.layers.37.block_sparse_moe.experts.82.w1", "model.layers.37.block_sparse_moe.experts.83.w1", "model.layers.37.block_sparse_moe.experts.84.w1", "model.layers.37.block_sparse_moe.experts.85.w1", "model.layers.37.block_sparse_moe.experts.86.w1", "model.layers.37.block_sparse_moe.experts.87.w1", "model.layers.37.block_sparse_moe.experts.88.w1", "model.layers.37.block_sparse_moe.experts.89.w1", "model.layers.37.block_sparse_moe.experts.90.w1", "model.layers.37.block_sparse_moe.experts.91.w1", "model.layers.37.block_sparse_moe.experts.92.w1", "model.layers.37.block_sparse_moe.experts.93.w1", "model.layers.37.block_sparse_moe.experts.94.w1", "model.layers.37.block_sparse_moe.experts.95.w1", "model.layers.37.block_sparse_moe.experts.96.w1", "model.layers.37.block_sparse_moe.experts.97.w1", "model.layers.37.block_sparse_moe.experts.98.w1", "model.layers.37.block_sparse_moe.experts.99.w1", "model.layers.37.block_sparse_moe.experts.100.w1", "model.layers.37.block_sparse_moe.experts.101.w1", "model.layers.37.block_sparse_moe.experts.102.w1", "model.layers.37.block_sparse_moe.experts.103.w1", "model.layers.37.block_sparse_moe.experts.104.w1", "model.layers.37.block_sparse_moe.experts.105.w1", "model.layers.37.block_sparse_moe.experts.106.w1", "model.layers.37.block_sparse_moe.experts.107.w1", "model.layers.37.block_sparse_moe.experts.108.w1", "model.layers.37.block_sparse_moe.experts.109.w1", "model.layers.37.block_sparse_moe.experts.110.w1", "model.layers.37.block_sparse_moe.experts.111.w1", "model.layers.37.block_sparse_moe.experts.112.w1", "model.layers.37.block_sparse_moe.experts.113.w1", "model.layers.37.block_sparse_moe.experts.114.w1", "model.layers.37.block_sparse_moe.experts.115.w1", "model.layers.37.block_sparse_moe.experts.116.w1", "model.layers.37.block_sparse_moe.experts.117.w1", "model.layers.37.block_sparse_moe.experts.118.w1", "model.layers.37.block_sparse_moe.experts.119.w1", "model.layers.37.block_sparse_moe.experts.120.w1", "model.layers.37.block_sparse_moe.experts.121.w1", "model.layers.37.block_sparse_moe.experts.122.w1", "model.layers.37.block_sparse_moe.experts.123.w1", "model.layers.37.block_sparse_moe.experts.124.w1", "model.layers.37.block_sparse_moe.experts.125.w1", "model.layers.37.block_sparse_moe.experts.126.w1", "model.layers.37.block_sparse_moe.experts.127.w1", "model.layers.37.block_sparse_moe.experts.128.w1", "model.layers.37.block_sparse_moe.experts.129.w1", "model.layers.37.block_sparse_moe.experts.130.w1", "model.layers.37.block_sparse_moe.experts.131.w1", "model.layers.37.block_sparse_moe.experts.132.w1", "model.layers.37.block_sparse_moe.experts.133.w1", "model.layers.37.block_sparse_moe.experts.134.w1", "model.layers.37.block_sparse_moe.experts.135.w1", "model.layers.37.block_sparse_moe.experts.136.w1", "model.layers.37.block_sparse_moe.experts.137.w1", "model.layers.37.block_sparse_moe.experts.138.w1", "model.layers.37.block_sparse_moe.experts.139.w1", "model.layers.37.block_sparse_moe.experts.140.w1", "model.layers.37.block_sparse_moe.experts.141.w1", "model.layers.37.block_sparse_moe.experts.142.w1", "model.layers.37.block_sparse_moe.experts.143.w1", "model.layers.37.block_sparse_moe.experts.144.w1", "model.layers.37.block_sparse_moe.experts.145.w1", "model.layers.37.block_sparse_moe.experts.146.w1", "model.layers.37.block_sparse_moe.experts.147.w1", "model.layers.37.block_sparse_moe.experts.148.w1", "model.layers.37.block_sparse_moe.experts.149.w1", "model.layers.37.block_sparse_moe.experts.150.w1", "model.layers.37.block_sparse_moe.experts.151.w1", "model.layers.37.block_sparse_moe.experts.152.w1", "model.layers.37.block_sparse_moe.experts.153.w1", "model.layers.37.block_sparse_moe.experts.154.w1", "model.layers.37.block_sparse_moe.experts.155.w1", "model.layers.37.block_sparse_moe.experts.156.w1", "model.layers.37.block_sparse_moe.experts.157.w1", "model.layers.37.block_sparse_moe.experts.158.w1", "model.layers.37.block_sparse_moe.experts.159.w1", "model.layers.37.block_sparse_moe.experts.160.w1", "model.layers.37.block_sparse_moe.experts.161.w1", "model.layers.37.block_sparse_moe.experts.162.w1", "model.layers.37.block_sparse_moe.experts.163.w1", "model.layers.37.block_sparse_moe.experts.164.w1", "model.layers.37.block_sparse_moe.experts.165.w1", "model.layers.37.block_sparse_moe.experts.166.w1", "model.layers.37.block_sparse_moe.experts.167.w1", "model.layers.37.block_sparse_moe.experts.168.w1", "model.layers.37.block_sparse_moe.experts.169.w1", "model.layers.37.block_sparse_moe.experts.170.w1", "model.layers.37.block_sparse_moe.experts.171.w1", "model.layers.37.block_sparse_moe.experts.172.w1", "model.layers.37.block_sparse_moe.experts.173.w1", "model.layers.37.block_sparse_moe.experts.174.w1", "model.layers.37.block_sparse_moe.experts.175.w1", "model.layers.37.block_sparse_moe.experts.176.w1", "model.layers.37.block_sparse_moe.experts.177.w1", "model.layers.37.block_sparse_moe.experts.178.w1", "model.layers.37.block_sparse_moe.experts.179.w1", "model.layers.37.block_sparse_moe.experts.180.w1", "model.layers.37.block_sparse_moe.experts.181.w1", "model.layers.37.block_sparse_moe.experts.182.w1", "model.layers.37.block_sparse_moe.experts.183.w1", "model.layers.37.block_sparse_moe.experts.184.w1", "model.layers.37.block_sparse_moe.experts.185.w1", "model.layers.37.block_sparse_moe.experts.186.w1", "model.layers.37.block_sparse_moe.experts.187.w1", "model.layers.37.block_sparse_moe.experts.188.w1", "model.layers.37.block_sparse_moe.experts.189.w1", "model.layers.37.block_sparse_moe.experts.190.w1", "model.layers.37.block_sparse_moe.experts.191.w1", "model.layers.37.block_sparse_moe.experts.192.w1", "model.layers.37.block_sparse_moe.experts.193.w1", "model.layers.37.block_sparse_moe.experts.194.w1", "model.layers.37.block_sparse_moe.experts.195.w1", "model.layers.37.block_sparse_moe.experts.196.w1", "model.layers.37.block_sparse_moe.experts.197.w1", "model.layers.37.block_sparse_moe.experts.198.w1", "model.layers.37.block_sparse_moe.experts.199.w1", "model.layers.37.block_sparse_moe.experts.200.w1", "model.layers.37.block_sparse_moe.experts.201.w1", "model.layers.37.block_sparse_moe.experts.202.w1", "model.layers.37.block_sparse_moe.experts.203.w1", "model.layers.37.block_sparse_moe.experts.204.w1", "model.layers.37.block_sparse_moe.experts.205.w1", "model.layers.37.block_sparse_moe.experts.206.w1", "model.layers.37.block_sparse_moe.experts.207.w1", "model.layers.37.block_sparse_moe.experts.208.w1", "model.layers.37.block_sparse_moe.experts.209.w1", "model.layers.37.block_sparse_moe.experts.210.w1", "model.layers.37.block_sparse_moe.experts.211.w1", "model.layers.37.block_sparse_moe.experts.212.w1", "model.layers.37.block_sparse_moe.experts.213.w1", "model.layers.37.block_sparse_moe.experts.214.w1", "model.layers.37.block_sparse_moe.experts.215.w1", "model.layers.37.block_sparse_moe.experts.216.w1", "model.layers.37.block_sparse_moe.experts.217.w1", "model.layers.37.block_sparse_moe.experts.218.w1", "model.layers.37.block_sparse_moe.experts.219.w1", "model.layers.37.block_sparse_moe.experts.220.w1", "model.layers.37.block_sparse_moe.experts.221.w1", "model.layers.37.block_sparse_moe.experts.222.w1", "model.layers.37.block_sparse_moe.experts.223.w1", "model.layers.37.block_sparse_moe.experts.224.w1", "model.layers.37.block_sparse_moe.experts.225.w1", "model.layers.37.block_sparse_moe.experts.226.w1", "model.layers.37.block_sparse_moe.experts.227.w1", "model.layers.37.block_sparse_moe.experts.228.w1", "model.layers.37.block_sparse_moe.experts.229.w1", "model.layers.37.block_sparse_moe.experts.230.w1", "model.layers.37.block_sparse_moe.experts.231.w1", "model.layers.37.block_sparse_moe.experts.232.w1", "model.layers.37.block_sparse_moe.experts.233.w1", "model.layers.37.block_sparse_moe.experts.234.w1", "model.layers.37.block_sparse_moe.experts.235.w1", "model.layers.37.block_sparse_moe.experts.236.w1", "model.layers.37.block_sparse_moe.experts.237.w1", "model.layers.37.block_sparse_moe.experts.238.w1", "model.layers.37.block_sparse_moe.experts.239.w1", "model.layers.37.block_sparse_moe.experts.240.w1", "model.layers.37.block_sparse_moe.experts.241.w1", "model.layers.37.block_sparse_moe.experts.242.w1", "model.layers.37.block_sparse_moe.experts.243.w1", "model.layers.37.block_sparse_moe.experts.244.w1", "model.layers.37.block_sparse_moe.experts.245.w1", "model.layers.37.block_sparse_moe.experts.246.w1", "model.layers.37.block_sparse_moe.experts.247.w1", "model.layers.37.block_sparse_moe.experts.248.w1", "model.layers.37.block_sparse_moe.experts.249.w1", "model.layers.37.block_sparse_moe.experts.250.w1", "model.layers.37.block_sparse_moe.experts.251.w1", "model.layers.37.block_sparse_moe.experts.252.w1", "model.layers.37.block_sparse_moe.experts.253.w1", "model.layers.37.block_sparse_moe.experts.254.w1", "model.layers.37.block_sparse_moe.experts.255.w1", "model.layers.37.block_sparse_moe.experts.0.w3", "model.layers.37.block_sparse_moe.experts.1.w3", "model.layers.37.block_sparse_moe.experts.2.w3", "model.layers.37.block_sparse_moe.experts.3.w3", "model.layers.37.block_sparse_moe.experts.4.w3", "model.layers.37.block_sparse_moe.experts.5.w3", "model.layers.37.block_sparse_moe.experts.6.w3", "model.layers.37.block_sparse_moe.experts.7.w3", "model.layers.37.block_sparse_moe.experts.8.w3", "model.layers.37.block_sparse_moe.experts.9.w3", "model.layers.37.block_sparse_moe.experts.10.w3", "model.layers.37.block_sparse_moe.experts.11.w3", "model.layers.37.block_sparse_moe.experts.12.w3", "model.layers.37.block_sparse_moe.experts.13.w3", "model.layers.37.block_sparse_moe.experts.14.w3", "model.layers.37.block_sparse_moe.experts.15.w3", "model.layers.37.block_sparse_moe.experts.16.w3", "model.layers.37.block_sparse_moe.experts.17.w3", "model.layers.37.block_sparse_moe.experts.18.w3", "model.layers.37.block_sparse_moe.experts.19.w3", "model.layers.37.block_sparse_moe.experts.20.w3", "model.layers.37.block_sparse_moe.experts.21.w3", "model.layers.37.block_sparse_moe.experts.22.w3", "model.layers.37.block_sparse_moe.experts.23.w3", "model.layers.37.block_sparse_moe.experts.24.w3", "model.layers.37.block_sparse_moe.experts.25.w3", "model.layers.37.block_sparse_moe.experts.26.w3", "model.layers.37.block_sparse_moe.experts.27.w3", "model.layers.37.block_sparse_moe.experts.28.w3", "model.layers.37.block_sparse_moe.experts.29.w3", "model.layers.37.block_sparse_moe.experts.30.w3", "model.layers.37.block_sparse_moe.experts.31.w3", "model.layers.37.block_sparse_moe.experts.32.w3", "model.layers.37.block_sparse_moe.experts.33.w3", "model.layers.37.block_sparse_moe.experts.34.w3", "model.layers.37.block_sparse_moe.experts.35.w3", "model.layers.37.block_sparse_moe.experts.36.w3", "model.layers.37.block_sparse_moe.experts.37.w3", "model.layers.37.block_sparse_moe.experts.38.w3", "model.layers.37.block_sparse_moe.experts.39.w3", "model.layers.37.block_sparse_moe.experts.40.w3", "model.layers.37.block_sparse_moe.experts.41.w3", "model.layers.37.block_sparse_moe.experts.42.w3", "model.layers.37.block_sparse_moe.experts.43.w3", "model.layers.37.block_sparse_moe.experts.44.w3", "model.layers.37.block_sparse_moe.experts.45.w3", "model.layers.37.block_sparse_moe.experts.46.w3", "model.layers.37.block_sparse_moe.experts.47.w3", "model.layers.37.block_sparse_moe.experts.48.w3", "model.layers.37.block_sparse_moe.experts.49.w3", "model.layers.37.block_sparse_moe.experts.50.w3", "model.layers.37.block_sparse_moe.experts.51.w3", "model.layers.37.block_sparse_moe.experts.52.w3", "model.layers.37.block_sparse_moe.experts.53.w3", "model.layers.37.block_sparse_moe.experts.54.w3", "model.layers.37.block_sparse_moe.experts.55.w3", "model.layers.37.block_sparse_moe.experts.56.w3", "model.layers.37.block_sparse_moe.experts.57.w3", "model.layers.37.block_sparse_moe.experts.58.w3", "model.layers.37.block_sparse_moe.experts.59.w3", "model.layers.37.block_sparse_moe.experts.60.w3", "model.layers.37.block_sparse_moe.experts.61.w3", "model.layers.37.block_sparse_moe.experts.62.w3", "model.layers.37.block_sparse_moe.experts.63.w3", "model.layers.37.block_sparse_moe.experts.64.w3", "model.layers.37.block_sparse_moe.experts.65.w3", "model.layers.37.block_sparse_moe.experts.66.w3", "model.layers.37.block_sparse_moe.experts.67.w3", "model.layers.37.block_sparse_moe.experts.68.w3", "model.layers.37.block_sparse_moe.experts.69.w3", "model.layers.37.block_sparse_moe.experts.70.w3", "model.layers.37.block_sparse_moe.experts.71.w3", "model.layers.37.block_sparse_moe.experts.72.w3", "model.layers.37.block_sparse_moe.experts.73.w3", "model.layers.37.block_sparse_moe.experts.74.w3", "model.layers.37.block_sparse_moe.experts.75.w3", "model.layers.37.block_sparse_moe.experts.76.w3", "model.layers.37.block_sparse_moe.experts.77.w3", "model.layers.37.block_sparse_moe.experts.78.w3", "model.layers.37.block_sparse_moe.experts.79.w3", "model.layers.37.block_sparse_moe.experts.80.w3", "model.layers.37.block_sparse_moe.experts.81.w3", "model.layers.37.block_sparse_moe.experts.82.w3", "model.layers.37.block_sparse_moe.experts.83.w3", "model.layers.37.block_sparse_moe.experts.84.w3", "model.layers.37.block_sparse_moe.experts.85.w3", "model.layers.37.block_sparse_moe.experts.86.w3", "model.layers.37.block_sparse_moe.experts.87.w3", "model.layers.37.block_sparse_moe.experts.88.w3", "model.layers.37.block_sparse_moe.experts.89.w3", "model.layers.37.block_sparse_moe.experts.90.w3", "model.layers.37.block_sparse_moe.experts.91.w3", "model.layers.37.block_sparse_moe.experts.92.w3", "model.layers.37.block_sparse_moe.experts.93.w3", "model.layers.37.block_sparse_moe.experts.94.w3", "model.layers.37.block_sparse_moe.experts.95.w3", "model.layers.37.block_sparse_moe.experts.96.w3", "model.layers.37.block_sparse_moe.experts.97.w3", "model.layers.37.block_sparse_moe.experts.98.w3", "model.layers.37.block_sparse_moe.experts.99.w3", "model.layers.37.block_sparse_moe.experts.100.w3", "model.layers.37.block_sparse_moe.experts.101.w3", "model.layers.37.block_sparse_moe.experts.102.w3", "model.layers.37.block_sparse_moe.experts.103.w3", "model.layers.37.block_sparse_moe.experts.104.w3", "model.layers.37.block_sparse_moe.experts.105.w3", "model.layers.37.block_sparse_moe.experts.106.w3", "model.layers.37.block_sparse_moe.experts.107.w3", "model.layers.37.block_sparse_moe.experts.108.w3", "model.layers.37.block_sparse_moe.experts.109.w3", "model.layers.37.block_sparse_moe.experts.110.w3", "model.layers.37.block_sparse_moe.experts.111.w3", "model.layers.37.block_sparse_moe.experts.112.w3", "model.layers.37.block_sparse_moe.experts.113.w3", "model.layers.37.block_sparse_moe.experts.114.w3", "model.layers.37.block_sparse_moe.experts.115.w3", "model.layers.37.block_sparse_moe.experts.116.w3", "model.layers.37.block_sparse_moe.experts.117.w3", "model.layers.37.block_sparse_moe.experts.118.w3", "model.layers.37.block_sparse_moe.experts.119.w3", "model.layers.37.block_sparse_moe.experts.120.w3", "model.layers.37.block_sparse_moe.experts.121.w3", "model.layers.37.block_sparse_moe.experts.122.w3", "model.layers.37.block_sparse_moe.experts.123.w3", "model.layers.37.block_sparse_moe.experts.124.w3", "model.layers.37.block_sparse_moe.experts.125.w3", "model.layers.37.block_sparse_moe.experts.126.w3", "model.layers.37.block_sparse_moe.experts.127.w3", "model.layers.37.block_sparse_moe.experts.128.w3", "model.layers.37.block_sparse_moe.experts.129.w3", "model.layers.37.block_sparse_moe.experts.130.w3", "model.layers.37.block_sparse_moe.experts.131.w3", "model.layers.37.block_sparse_moe.experts.132.w3", "model.layers.37.block_sparse_moe.experts.133.w3", "model.layers.37.block_sparse_moe.experts.134.w3", "model.layers.37.block_sparse_moe.experts.135.w3", "model.layers.37.block_sparse_moe.experts.136.w3", "model.layers.37.block_sparse_moe.experts.137.w3", "model.layers.37.block_sparse_moe.experts.138.w3", "model.layers.37.block_sparse_moe.experts.139.w3", "model.layers.37.block_sparse_moe.experts.140.w3", "model.layers.37.block_sparse_moe.experts.141.w3", "model.layers.37.block_sparse_moe.experts.142.w3", "model.layers.37.block_sparse_moe.experts.143.w3", "model.layers.37.block_sparse_moe.experts.144.w3", "model.layers.37.block_sparse_moe.experts.145.w3", "model.layers.37.block_sparse_moe.experts.146.w3", "model.layers.37.block_sparse_moe.experts.147.w3", "model.layers.37.block_sparse_moe.experts.148.w3", "model.layers.37.block_sparse_moe.experts.149.w3", "model.layers.37.block_sparse_moe.experts.150.w3", "model.layers.37.block_sparse_moe.experts.151.w3", "model.layers.37.block_sparse_moe.experts.152.w3", "model.layers.37.block_sparse_moe.experts.153.w3", "model.layers.37.block_sparse_moe.experts.154.w3", "model.layers.37.block_sparse_moe.experts.155.w3", "model.layers.37.block_sparse_moe.experts.156.w3", "model.layers.37.block_sparse_moe.experts.157.w3", "model.layers.37.block_sparse_moe.experts.158.w3", "model.layers.37.block_sparse_moe.experts.159.w3", "model.layers.37.block_sparse_moe.experts.160.w3", "model.layers.37.block_sparse_moe.experts.161.w3", "model.layers.37.block_sparse_moe.experts.162.w3", "model.layers.37.block_sparse_moe.experts.163.w3", "model.layers.37.block_sparse_moe.experts.164.w3", "model.layers.37.block_sparse_moe.experts.165.w3", "model.layers.37.block_sparse_moe.experts.166.w3", "model.layers.37.block_sparse_moe.experts.167.w3", "model.layers.37.block_sparse_moe.experts.168.w3", "model.layers.37.block_sparse_moe.experts.169.w3", "model.layers.37.block_sparse_moe.experts.170.w3", "model.layers.37.block_sparse_moe.experts.171.w3", "model.layers.37.block_sparse_moe.experts.172.w3", "model.layers.37.block_sparse_moe.experts.173.w3", "model.layers.37.block_sparse_moe.experts.174.w3", "model.layers.37.block_sparse_moe.experts.175.w3", "model.layers.37.block_sparse_moe.experts.176.w3", "model.layers.37.block_sparse_moe.experts.177.w3", "model.layers.37.block_sparse_moe.experts.178.w3", "model.layers.37.block_sparse_moe.experts.179.w3", "model.layers.37.block_sparse_moe.experts.180.w3", "model.layers.37.block_sparse_moe.experts.181.w3", "model.layers.37.block_sparse_moe.experts.182.w3", "model.layers.37.block_sparse_moe.experts.183.w3", "model.layers.37.block_sparse_moe.experts.184.w3", "model.layers.37.block_sparse_moe.experts.185.w3", "model.layers.37.block_sparse_moe.experts.186.w3", "model.layers.37.block_sparse_moe.experts.187.w3", "model.layers.37.block_sparse_moe.experts.188.w3", "model.layers.37.block_sparse_moe.experts.189.w3", "model.layers.37.block_sparse_moe.experts.190.w3", "model.layers.37.block_sparse_moe.experts.191.w3", "model.layers.37.block_sparse_moe.experts.192.w3", "model.layers.37.block_sparse_moe.experts.193.w3", "model.layers.37.block_sparse_moe.experts.194.w3", "model.layers.37.block_sparse_moe.experts.195.w3", "model.layers.37.block_sparse_moe.experts.196.w3", "model.layers.37.block_sparse_moe.experts.197.w3", "model.layers.37.block_sparse_moe.experts.198.w3", "model.layers.37.block_sparse_moe.experts.199.w3", "model.layers.37.block_sparse_moe.experts.200.w3", "model.layers.37.block_sparse_moe.experts.201.w3", "model.layers.37.block_sparse_moe.experts.202.w3", "model.layers.37.block_sparse_moe.experts.203.w3", "model.layers.37.block_sparse_moe.experts.204.w3", "model.layers.37.block_sparse_moe.experts.205.w3", "model.layers.37.block_sparse_moe.experts.206.w3", "model.layers.37.block_sparse_moe.experts.207.w3", "model.layers.37.block_sparse_moe.experts.208.w3", "model.layers.37.block_sparse_moe.experts.209.w3", "model.layers.37.block_sparse_moe.experts.210.w3", "model.layers.37.block_sparse_moe.experts.211.w3", "model.layers.37.block_sparse_moe.experts.212.w3", "model.layers.37.block_sparse_moe.experts.213.w3", "model.layers.37.block_sparse_moe.experts.214.w3", "model.layers.37.block_sparse_moe.experts.215.w3", "model.layers.37.block_sparse_moe.experts.216.w3", "model.layers.37.block_sparse_moe.experts.217.w3", "model.layers.37.block_sparse_moe.experts.218.w3", "model.layers.37.block_sparse_moe.experts.219.w3", "model.layers.37.block_sparse_moe.experts.220.w3", "model.layers.37.block_sparse_moe.experts.221.w3", "model.layers.37.block_sparse_moe.experts.222.w3", "model.layers.37.block_sparse_moe.experts.223.w3", "model.layers.37.block_sparse_moe.experts.224.w3", "model.layers.37.block_sparse_moe.experts.225.w3", "model.layers.37.block_sparse_moe.experts.226.w3", "model.layers.37.block_sparse_moe.experts.227.w3", "model.layers.37.block_sparse_moe.experts.228.w3", "model.layers.37.block_sparse_moe.experts.229.w3", "model.layers.37.block_sparse_moe.experts.230.w3", "model.layers.37.block_sparse_moe.experts.231.w3", "model.layers.37.block_sparse_moe.experts.232.w3", "model.layers.37.block_sparse_moe.experts.233.w3", "model.layers.37.block_sparse_moe.experts.234.w3", "model.layers.37.block_sparse_moe.experts.235.w3", "model.layers.37.block_sparse_moe.experts.236.w3", "model.layers.37.block_sparse_moe.experts.237.w3", "model.layers.37.block_sparse_moe.experts.238.w3", "model.layers.37.block_sparse_moe.experts.239.w3", "model.layers.37.block_sparse_moe.experts.240.w3", "model.layers.37.block_sparse_moe.experts.241.w3", "model.layers.37.block_sparse_moe.experts.242.w3", "model.layers.37.block_sparse_moe.experts.243.w3", "model.layers.37.block_sparse_moe.experts.244.w3", "model.layers.37.block_sparse_moe.experts.245.w3", "model.layers.37.block_sparse_moe.experts.246.w3", "model.layers.37.block_sparse_moe.experts.247.w3", "model.layers.37.block_sparse_moe.experts.248.w3", "model.layers.37.block_sparse_moe.experts.249.w3", "model.layers.37.block_sparse_moe.experts.250.w3", "model.layers.37.block_sparse_moe.experts.251.w3", "model.layers.37.block_sparse_moe.experts.252.w3", "model.layers.37.block_sparse_moe.experts.253.w3", "model.layers.37.block_sparse_moe.experts.254.w3", "model.layers.37.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0015804577618837024, "dbits": 2415919104 } ] }, { "idx": 189, "layers": [ "model.layers.37.block_sparse_moe.experts.0.w2", "model.layers.37.block_sparse_moe.experts.1.w2", "model.layers.37.block_sparse_moe.experts.2.w2", "model.layers.37.block_sparse_moe.experts.3.w2", "model.layers.37.block_sparse_moe.experts.4.w2", "model.layers.37.block_sparse_moe.experts.5.w2", "model.layers.37.block_sparse_moe.experts.6.w2", "model.layers.37.block_sparse_moe.experts.7.w2", "model.layers.37.block_sparse_moe.experts.8.w2", "model.layers.37.block_sparse_moe.experts.9.w2", "model.layers.37.block_sparse_moe.experts.10.w2", "model.layers.37.block_sparse_moe.experts.11.w2", "model.layers.37.block_sparse_moe.experts.12.w2", "model.layers.37.block_sparse_moe.experts.13.w2", "model.layers.37.block_sparse_moe.experts.14.w2", "model.layers.37.block_sparse_moe.experts.15.w2", "model.layers.37.block_sparse_moe.experts.16.w2", "model.layers.37.block_sparse_moe.experts.17.w2", "model.layers.37.block_sparse_moe.experts.18.w2", "model.layers.37.block_sparse_moe.experts.19.w2", "model.layers.37.block_sparse_moe.experts.20.w2", "model.layers.37.block_sparse_moe.experts.21.w2", "model.layers.37.block_sparse_moe.experts.22.w2", "model.layers.37.block_sparse_moe.experts.23.w2", "model.layers.37.block_sparse_moe.experts.24.w2", "model.layers.37.block_sparse_moe.experts.25.w2", "model.layers.37.block_sparse_moe.experts.26.w2", "model.layers.37.block_sparse_moe.experts.27.w2", "model.layers.37.block_sparse_moe.experts.28.w2", "model.layers.37.block_sparse_moe.experts.29.w2", "model.layers.37.block_sparse_moe.experts.30.w2", "model.layers.37.block_sparse_moe.experts.31.w2", "model.layers.37.block_sparse_moe.experts.32.w2", "model.layers.37.block_sparse_moe.experts.33.w2", "model.layers.37.block_sparse_moe.experts.34.w2", "model.layers.37.block_sparse_moe.experts.35.w2", "model.layers.37.block_sparse_moe.experts.36.w2", "model.layers.37.block_sparse_moe.experts.37.w2", "model.layers.37.block_sparse_moe.experts.38.w2", "model.layers.37.block_sparse_moe.experts.39.w2", "model.layers.37.block_sparse_moe.experts.40.w2", "model.layers.37.block_sparse_moe.experts.41.w2", "model.layers.37.block_sparse_moe.experts.42.w2", "model.layers.37.block_sparse_moe.experts.43.w2", "model.layers.37.block_sparse_moe.experts.44.w2", "model.layers.37.block_sparse_moe.experts.45.w2", "model.layers.37.block_sparse_moe.experts.46.w2", "model.layers.37.block_sparse_moe.experts.47.w2", "model.layers.37.block_sparse_moe.experts.48.w2", "model.layers.37.block_sparse_moe.experts.49.w2", "model.layers.37.block_sparse_moe.experts.50.w2", "model.layers.37.block_sparse_moe.experts.51.w2", "model.layers.37.block_sparse_moe.experts.52.w2", "model.layers.37.block_sparse_moe.experts.53.w2", "model.layers.37.block_sparse_moe.experts.54.w2", "model.layers.37.block_sparse_moe.experts.55.w2", "model.layers.37.block_sparse_moe.experts.56.w2", "model.layers.37.block_sparse_moe.experts.57.w2", "model.layers.37.block_sparse_moe.experts.58.w2", "model.layers.37.block_sparse_moe.experts.59.w2", "model.layers.37.block_sparse_moe.experts.60.w2", "model.layers.37.block_sparse_moe.experts.61.w2", "model.layers.37.block_sparse_moe.experts.62.w2", "model.layers.37.block_sparse_moe.experts.63.w2", "model.layers.37.block_sparse_moe.experts.64.w2", "model.layers.37.block_sparse_moe.experts.65.w2", "model.layers.37.block_sparse_moe.experts.66.w2", "model.layers.37.block_sparse_moe.experts.67.w2", "model.layers.37.block_sparse_moe.experts.68.w2", "model.layers.37.block_sparse_moe.experts.69.w2", "model.layers.37.block_sparse_moe.experts.70.w2", "model.layers.37.block_sparse_moe.experts.71.w2", "model.layers.37.block_sparse_moe.experts.72.w2", "model.layers.37.block_sparse_moe.experts.73.w2", "model.layers.37.block_sparse_moe.experts.74.w2", "model.layers.37.block_sparse_moe.experts.75.w2", "model.layers.37.block_sparse_moe.experts.76.w2", "model.layers.37.block_sparse_moe.experts.77.w2", "model.layers.37.block_sparse_moe.experts.78.w2", "model.layers.37.block_sparse_moe.experts.79.w2", "model.layers.37.block_sparse_moe.experts.80.w2", "model.layers.37.block_sparse_moe.experts.81.w2", "model.layers.37.block_sparse_moe.experts.82.w2", "model.layers.37.block_sparse_moe.experts.83.w2", "model.layers.37.block_sparse_moe.experts.84.w2", "model.layers.37.block_sparse_moe.experts.85.w2", "model.layers.37.block_sparse_moe.experts.86.w2", "model.layers.37.block_sparse_moe.experts.87.w2", "model.layers.37.block_sparse_moe.experts.88.w2", "model.layers.37.block_sparse_moe.experts.89.w2", "model.layers.37.block_sparse_moe.experts.90.w2", "model.layers.37.block_sparse_moe.experts.91.w2", "model.layers.37.block_sparse_moe.experts.92.w2", "model.layers.37.block_sparse_moe.experts.93.w2", "model.layers.37.block_sparse_moe.experts.94.w2", "model.layers.37.block_sparse_moe.experts.95.w2", "model.layers.37.block_sparse_moe.experts.96.w2", "model.layers.37.block_sparse_moe.experts.97.w2", "model.layers.37.block_sparse_moe.experts.98.w2", "model.layers.37.block_sparse_moe.experts.99.w2", "model.layers.37.block_sparse_moe.experts.100.w2", "model.layers.37.block_sparse_moe.experts.101.w2", "model.layers.37.block_sparse_moe.experts.102.w2", "model.layers.37.block_sparse_moe.experts.103.w2", "model.layers.37.block_sparse_moe.experts.104.w2", "model.layers.37.block_sparse_moe.experts.105.w2", "model.layers.37.block_sparse_moe.experts.106.w2", "model.layers.37.block_sparse_moe.experts.107.w2", "model.layers.37.block_sparse_moe.experts.108.w2", "model.layers.37.block_sparse_moe.experts.109.w2", "model.layers.37.block_sparse_moe.experts.110.w2", "model.layers.37.block_sparse_moe.experts.111.w2", "model.layers.37.block_sparse_moe.experts.112.w2", "model.layers.37.block_sparse_moe.experts.113.w2", "model.layers.37.block_sparse_moe.experts.114.w2", "model.layers.37.block_sparse_moe.experts.115.w2", "model.layers.37.block_sparse_moe.experts.116.w2", "model.layers.37.block_sparse_moe.experts.117.w2", "model.layers.37.block_sparse_moe.experts.118.w2", "model.layers.37.block_sparse_moe.experts.119.w2", "model.layers.37.block_sparse_moe.experts.120.w2", "model.layers.37.block_sparse_moe.experts.121.w2", "model.layers.37.block_sparse_moe.experts.122.w2", "model.layers.37.block_sparse_moe.experts.123.w2", "model.layers.37.block_sparse_moe.experts.124.w2", "model.layers.37.block_sparse_moe.experts.125.w2", "model.layers.37.block_sparse_moe.experts.126.w2", "model.layers.37.block_sparse_moe.experts.127.w2", "model.layers.37.block_sparse_moe.experts.128.w2", "model.layers.37.block_sparse_moe.experts.129.w2", "model.layers.37.block_sparse_moe.experts.130.w2", "model.layers.37.block_sparse_moe.experts.131.w2", "model.layers.37.block_sparse_moe.experts.132.w2", "model.layers.37.block_sparse_moe.experts.133.w2", "model.layers.37.block_sparse_moe.experts.134.w2", "model.layers.37.block_sparse_moe.experts.135.w2", "model.layers.37.block_sparse_moe.experts.136.w2", "model.layers.37.block_sparse_moe.experts.137.w2", "model.layers.37.block_sparse_moe.experts.138.w2", "model.layers.37.block_sparse_moe.experts.139.w2", "model.layers.37.block_sparse_moe.experts.140.w2", "model.layers.37.block_sparse_moe.experts.141.w2", "model.layers.37.block_sparse_moe.experts.142.w2", "model.layers.37.block_sparse_moe.experts.143.w2", "model.layers.37.block_sparse_moe.experts.144.w2", "model.layers.37.block_sparse_moe.experts.145.w2", "model.layers.37.block_sparse_moe.experts.146.w2", "model.layers.37.block_sparse_moe.experts.147.w2", "model.layers.37.block_sparse_moe.experts.148.w2", "model.layers.37.block_sparse_moe.experts.149.w2", "model.layers.37.block_sparse_moe.experts.150.w2", "model.layers.37.block_sparse_moe.experts.151.w2", "model.layers.37.block_sparse_moe.experts.152.w2", "model.layers.37.block_sparse_moe.experts.153.w2", "model.layers.37.block_sparse_moe.experts.154.w2", "model.layers.37.block_sparse_moe.experts.155.w2", "model.layers.37.block_sparse_moe.experts.156.w2", "model.layers.37.block_sparse_moe.experts.157.w2", "model.layers.37.block_sparse_moe.experts.158.w2", "model.layers.37.block_sparse_moe.experts.159.w2", "model.layers.37.block_sparse_moe.experts.160.w2", "model.layers.37.block_sparse_moe.experts.161.w2", "model.layers.37.block_sparse_moe.experts.162.w2", "model.layers.37.block_sparse_moe.experts.163.w2", "model.layers.37.block_sparse_moe.experts.164.w2", "model.layers.37.block_sparse_moe.experts.165.w2", "model.layers.37.block_sparse_moe.experts.166.w2", "model.layers.37.block_sparse_moe.experts.167.w2", "model.layers.37.block_sparse_moe.experts.168.w2", "model.layers.37.block_sparse_moe.experts.169.w2", "model.layers.37.block_sparse_moe.experts.170.w2", "model.layers.37.block_sparse_moe.experts.171.w2", "model.layers.37.block_sparse_moe.experts.172.w2", "model.layers.37.block_sparse_moe.experts.173.w2", "model.layers.37.block_sparse_moe.experts.174.w2", "model.layers.37.block_sparse_moe.experts.175.w2", "model.layers.37.block_sparse_moe.experts.176.w2", "model.layers.37.block_sparse_moe.experts.177.w2", "model.layers.37.block_sparse_moe.experts.178.w2", "model.layers.37.block_sparse_moe.experts.179.w2", "model.layers.37.block_sparse_moe.experts.180.w2", "model.layers.37.block_sparse_moe.experts.181.w2", "model.layers.37.block_sparse_moe.experts.182.w2", "model.layers.37.block_sparse_moe.experts.183.w2", "model.layers.37.block_sparse_moe.experts.184.w2", "model.layers.37.block_sparse_moe.experts.185.w2", "model.layers.37.block_sparse_moe.experts.186.w2", "model.layers.37.block_sparse_moe.experts.187.w2", "model.layers.37.block_sparse_moe.experts.188.w2", "model.layers.37.block_sparse_moe.experts.189.w2", "model.layers.37.block_sparse_moe.experts.190.w2", "model.layers.37.block_sparse_moe.experts.191.w2", "model.layers.37.block_sparse_moe.experts.192.w2", "model.layers.37.block_sparse_moe.experts.193.w2", "model.layers.37.block_sparse_moe.experts.194.w2", "model.layers.37.block_sparse_moe.experts.195.w2", "model.layers.37.block_sparse_moe.experts.196.w2", "model.layers.37.block_sparse_moe.experts.197.w2", "model.layers.37.block_sparse_moe.experts.198.w2", "model.layers.37.block_sparse_moe.experts.199.w2", "model.layers.37.block_sparse_moe.experts.200.w2", "model.layers.37.block_sparse_moe.experts.201.w2", "model.layers.37.block_sparse_moe.experts.202.w2", "model.layers.37.block_sparse_moe.experts.203.w2", "model.layers.37.block_sparse_moe.experts.204.w2", "model.layers.37.block_sparse_moe.experts.205.w2", "model.layers.37.block_sparse_moe.experts.206.w2", "model.layers.37.block_sparse_moe.experts.207.w2", "model.layers.37.block_sparse_moe.experts.208.w2", "model.layers.37.block_sparse_moe.experts.209.w2", "model.layers.37.block_sparse_moe.experts.210.w2", "model.layers.37.block_sparse_moe.experts.211.w2", "model.layers.37.block_sparse_moe.experts.212.w2", "model.layers.37.block_sparse_moe.experts.213.w2", "model.layers.37.block_sparse_moe.experts.214.w2", "model.layers.37.block_sparse_moe.experts.215.w2", "model.layers.37.block_sparse_moe.experts.216.w2", "model.layers.37.block_sparse_moe.experts.217.w2", "model.layers.37.block_sparse_moe.experts.218.w2", "model.layers.37.block_sparse_moe.experts.219.w2", "model.layers.37.block_sparse_moe.experts.220.w2", "model.layers.37.block_sparse_moe.experts.221.w2", "model.layers.37.block_sparse_moe.experts.222.w2", "model.layers.37.block_sparse_moe.experts.223.w2", "model.layers.37.block_sparse_moe.experts.224.w2", "model.layers.37.block_sparse_moe.experts.225.w2", "model.layers.37.block_sparse_moe.experts.226.w2", "model.layers.37.block_sparse_moe.experts.227.w2", "model.layers.37.block_sparse_moe.experts.228.w2", "model.layers.37.block_sparse_moe.experts.229.w2", "model.layers.37.block_sparse_moe.experts.230.w2", "model.layers.37.block_sparse_moe.experts.231.w2", "model.layers.37.block_sparse_moe.experts.232.w2", "model.layers.37.block_sparse_moe.experts.233.w2", "model.layers.37.block_sparse_moe.experts.234.w2", "model.layers.37.block_sparse_moe.experts.235.w2", "model.layers.37.block_sparse_moe.experts.236.w2", "model.layers.37.block_sparse_moe.experts.237.w2", "model.layers.37.block_sparse_moe.experts.238.w2", "model.layers.37.block_sparse_moe.experts.239.w2", "model.layers.37.block_sparse_moe.experts.240.w2", "model.layers.37.block_sparse_moe.experts.241.w2", "model.layers.37.block_sparse_moe.experts.242.w2", "model.layers.37.block_sparse_moe.experts.243.w2", "model.layers.37.block_sparse_moe.experts.244.w2", "model.layers.37.block_sparse_moe.experts.245.w2", "model.layers.37.block_sparse_moe.experts.246.w2", "model.layers.37.block_sparse_moe.experts.247.w2", "model.layers.37.block_sparse_moe.experts.248.w2", "model.layers.37.block_sparse_moe.experts.249.w2", "model.layers.37.block_sparse_moe.experts.250.w2", "model.layers.37.block_sparse_moe.experts.251.w2", "model.layers.37.block_sparse_moe.experts.252.w2", "model.layers.37.block_sparse_moe.experts.253.w2", "model.layers.37.block_sparse_moe.experts.254.w2", "model.layers.37.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00013528242707250282, "dbits": 1207959552 } ] }, { "idx": 190, "layers": [ "model.layers.38.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0006262604147195816, "dbits": 18874368 } ] }, { "idx": 191, "layers": [ "model.layers.38.self_attn.k_proj", "model.layers.38.self_attn.v_proj" ], "candidates": [ { "dkld": 0.012862316519021977, "dbits": 6291456 } ] }, { "idx": 192, "layers": [ "model.layers.38.self_attn.o_proj" ], "candidates": [ { "dkld": -0.00379837825894358, "dbits": 18874368 } ] }, { "idx": 193, "layers": [ "model.layers.38.block_sparse_moe.experts.0.w1", "model.layers.38.block_sparse_moe.experts.1.w1", "model.layers.38.block_sparse_moe.experts.2.w1", "model.layers.38.block_sparse_moe.experts.3.w1", "model.layers.38.block_sparse_moe.experts.4.w1", "model.layers.38.block_sparse_moe.experts.5.w1", "model.layers.38.block_sparse_moe.experts.6.w1", "model.layers.38.block_sparse_moe.experts.7.w1", "model.layers.38.block_sparse_moe.experts.8.w1", "model.layers.38.block_sparse_moe.experts.9.w1", "model.layers.38.block_sparse_moe.experts.10.w1", "model.layers.38.block_sparse_moe.experts.11.w1", "model.layers.38.block_sparse_moe.experts.12.w1", "model.layers.38.block_sparse_moe.experts.13.w1", "model.layers.38.block_sparse_moe.experts.14.w1", "model.layers.38.block_sparse_moe.experts.15.w1", "model.layers.38.block_sparse_moe.experts.16.w1", "model.layers.38.block_sparse_moe.experts.17.w1", "model.layers.38.block_sparse_moe.experts.18.w1", "model.layers.38.block_sparse_moe.experts.19.w1", "model.layers.38.block_sparse_moe.experts.20.w1", "model.layers.38.block_sparse_moe.experts.21.w1", "model.layers.38.block_sparse_moe.experts.22.w1", "model.layers.38.block_sparse_moe.experts.23.w1", "model.layers.38.block_sparse_moe.experts.24.w1", "model.layers.38.block_sparse_moe.experts.25.w1", "model.layers.38.block_sparse_moe.experts.26.w1", "model.layers.38.block_sparse_moe.experts.27.w1", "model.layers.38.block_sparse_moe.experts.28.w1", "model.layers.38.block_sparse_moe.experts.29.w1", "model.layers.38.block_sparse_moe.experts.30.w1", "model.layers.38.block_sparse_moe.experts.31.w1", "model.layers.38.block_sparse_moe.experts.32.w1", "model.layers.38.block_sparse_moe.experts.33.w1", "model.layers.38.block_sparse_moe.experts.34.w1", "model.layers.38.block_sparse_moe.experts.35.w1", "model.layers.38.block_sparse_moe.experts.36.w1", "model.layers.38.block_sparse_moe.experts.37.w1", "model.layers.38.block_sparse_moe.experts.38.w1", "model.layers.38.block_sparse_moe.experts.39.w1", "model.layers.38.block_sparse_moe.experts.40.w1", "model.layers.38.block_sparse_moe.experts.41.w1", "model.layers.38.block_sparse_moe.experts.42.w1", "model.layers.38.block_sparse_moe.experts.43.w1", "model.layers.38.block_sparse_moe.experts.44.w1", "model.layers.38.block_sparse_moe.experts.45.w1", "model.layers.38.block_sparse_moe.experts.46.w1", "model.layers.38.block_sparse_moe.experts.47.w1", "model.layers.38.block_sparse_moe.experts.48.w1", "model.layers.38.block_sparse_moe.experts.49.w1", "model.layers.38.block_sparse_moe.experts.50.w1", "model.layers.38.block_sparse_moe.experts.51.w1", "model.layers.38.block_sparse_moe.experts.52.w1", "model.layers.38.block_sparse_moe.experts.53.w1", "model.layers.38.block_sparse_moe.experts.54.w1", "model.layers.38.block_sparse_moe.experts.55.w1", "model.layers.38.block_sparse_moe.experts.56.w1", "model.layers.38.block_sparse_moe.experts.57.w1", "model.layers.38.block_sparse_moe.experts.58.w1", "model.layers.38.block_sparse_moe.experts.59.w1", "model.layers.38.block_sparse_moe.experts.60.w1", "model.layers.38.block_sparse_moe.experts.61.w1", "model.layers.38.block_sparse_moe.experts.62.w1", "model.layers.38.block_sparse_moe.experts.63.w1", "model.layers.38.block_sparse_moe.experts.64.w1", "model.layers.38.block_sparse_moe.experts.65.w1", "model.layers.38.block_sparse_moe.experts.66.w1", "model.layers.38.block_sparse_moe.experts.67.w1", "model.layers.38.block_sparse_moe.experts.68.w1", "model.layers.38.block_sparse_moe.experts.69.w1", "model.layers.38.block_sparse_moe.experts.70.w1", "model.layers.38.block_sparse_moe.experts.71.w1", "model.layers.38.block_sparse_moe.experts.72.w1", "model.layers.38.block_sparse_moe.experts.73.w1", "model.layers.38.block_sparse_moe.experts.74.w1", "model.layers.38.block_sparse_moe.experts.75.w1", "model.layers.38.block_sparse_moe.experts.76.w1", "model.layers.38.block_sparse_moe.experts.77.w1", "model.layers.38.block_sparse_moe.experts.78.w1", "model.layers.38.block_sparse_moe.experts.79.w1", "model.layers.38.block_sparse_moe.experts.80.w1", "model.layers.38.block_sparse_moe.experts.81.w1", "model.layers.38.block_sparse_moe.experts.82.w1", "model.layers.38.block_sparse_moe.experts.83.w1", "model.layers.38.block_sparse_moe.experts.84.w1", "model.layers.38.block_sparse_moe.experts.85.w1", "model.layers.38.block_sparse_moe.experts.86.w1", "model.layers.38.block_sparse_moe.experts.87.w1", "model.layers.38.block_sparse_moe.experts.88.w1", "model.layers.38.block_sparse_moe.experts.89.w1", "model.layers.38.block_sparse_moe.experts.90.w1", "model.layers.38.block_sparse_moe.experts.91.w1", "model.layers.38.block_sparse_moe.experts.92.w1", "model.layers.38.block_sparse_moe.experts.93.w1", "model.layers.38.block_sparse_moe.experts.94.w1", "model.layers.38.block_sparse_moe.experts.95.w1", "model.layers.38.block_sparse_moe.experts.96.w1", "model.layers.38.block_sparse_moe.experts.97.w1", "model.layers.38.block_sparse_moe.experts.98.w1", "model.layers.38.block_sparse_moe.experts.99.w1", "model.layers.38.block_sparse_moe.experts.100.w1", "model.layers.38.block_sparse_moe.experts.101.w1", "model.layers.38.block_sparse_moe.experts.102.w1", "model.layers.38.block_sparse_moe.experts.103.w1", "model.layers.38.block_sparse_moe.experts.104.w1", "model.layers.38.block_sparse_moe.experts.105.w1", "model.layers.38.block_sparse_moe.experts.106.w1", "model.layers.38.block_sparse_moe.experts.107.w1", "model.layers.38.block_sparse_moe.experts.108.w1", "model.layers.38.block_sparse_moe.experts.109.w1", "model.layers.38.block_sparse_moe.experts.110.w1", "model.layers.38.block_sparse_moe.experts.111.w1", "model.layers.38.block_sparse_moe.experts.112.w1", "model.layers.38.block_sparse_moe.experts.113.w1", "model.layers.38.block_sparse_moe.experts.114.w1", "model.layers.38.block_sparse_moe.experts.115.w1", "model.layers.38.block_sparse_moe.experts.116.w1", "model.layers.38.block_sparse_moe.experts.117.w1", "model.layers.38.block_sparse_moe.experts.118.w1", "model.layers.38.block_sparse_moe.experts.119.w1", "model.layers.38.block_sparse_moe.experts.120.w1", "model.layers.38.block_sparse_moe.experts.121.w1", "model.layers.38.block_sparse_moe.experts.122.w1", "model.layers.38.block_sparse_moe.experts.123.w1", "model.layers.38.block_sparse_moe.experts.124.w1", "model.layers.38.block_sparse_moe.experts.125.w1", "model.layers.38.block_sparse_moe.experts.126.w1", "model.layers.38.block_sparse_moe.experts.127.w1", "model.layers.38.block_sparse_moe.experts.128.w1", "model.layers.38.block_sparse_moe.experts.129.w1", "model.layers.38.block_sparse_moe.experts.130.w1", "model.layers.38.block_sparse_moe.experts.131.w1", "model.layers.38.block_sparse_moe.experts.132.w1", "model.layers.38.block_sparse_moe.experts.133.w1", "model.layers.38.block_sparse_moe.experts.134.w1", "model.layers.38.block_sparse_moe.experts.135.w1", "model.layers.38.block_sparse_moe.experts.136.w1", "model.layers.38.block_sparse_moe.experts.137.w1", "model.layers.38.block_sparse_moe.experts.138.w1", "model.layers.38.block_sparse_moe.experts.139.w1", "model.layers.38.block_sparse_moe.experts.140.w1", "model.layers.38.block_sparse_moe.experts.141.w1", "model.layers.38.block_sparse_moe.experts.142.w1", "model.layers.38.block_sparse_moe.experts.143.w1", "model.layers.38.block_sparse_moe.experts.144.w1", "model.layers.38.block_sparse_moe.experts.145.w1", "model.layers.38.block_sparse_moe.experts.146.w1", "model.layers.38.block_sparse_moe.experts.147.w1", "model.layers.38.block_sparse_moe.experts.148.w1", "model.layers.38.block_sparse_moe.experts.149.w1", "model.layers.38.block_sparse_moe.experts.150.w1", "model.layers.38.block_sparse_moe.experts.151.w1", "model.layers.38.block_sparse_moe.experts.152.w1", "model.layers.38.block_sparse_moe.experts.153.w1", "model.layers.38.block_sparse_moe.experts.154.w1", "model.layers.38.block_sparse_moe.experts.155.w1", "model.layers.38.block_sparse_moe.experts.156.w1", "model.layers.38.block_sparse_moe.experts.157.w1", "model.layers.38.block_sparse_moe.experts.158.w1", "model.layers.38.block_sparse_moe.experts.159.w1", "model.layers.38.block_sparse_moe.experts.160.w1", "model.layers.38.block_sparse_moe.experts.161.w1", "model.layers.38.block_sparse_moe.experts.162.w1", "model.layers.38.block_sparse_moe.experts.163.w1", "model.layers.38.block_sparse_moe.experts.164.w1", "model.layers.38.block_sparse_moe.experts.165.w1", "model.layers.38.block_sparse_moe.experts.166.w1", "model.layers.38.block_sparse_moe.experts.167.w1", "model.layers.38.block_sparse_moe.experts.168.w1", "model.layers.38.block_sparse_moe.experts.169.w1", "model.layers.38.block_sparse_moe.experts.170.w1", "model.layers.38.block_sparse_moe.experts.171.w1", "model.layers.38.block_sparse_moe.experts.172.w1", "model.layers.38.block_sparse_moe.experts.173.w1", "model.layers.38.block_sparse_moe.experts.174.w1", "model.layers.38.block_sparse_moe.experts.175.w1", "model.layers.38.block_sparse_moe.experts.176.w1", "model.layers.38.block_sparse_moe.experts.177.w1", "model.layers.38.block_sparse_moe.experts.178.w1", "model.layers.38.block_sparse_moe.experts.179.w1", "model.layers.38.block_sparse_moe.experts.180.w1", "model.layers.38.block_sparse_moe.experts.181.w1", "model.layers.38.block_sparse_moe.experts.182.w1", "model.layers.38.block_sparse_moe.experts.183.w1", "model.layers.38.block_sparse_moe.experts.184.w1", "model.layers.38.block_sparse_moe.experts.185.w1", "model.layers.38.block_sparse_moe.experts.186.w1", "model.layers.38.block_sparse_moe.experts.187.w1", "model.layers.38.block_sparse_moe.experts.188.w1", "model.layers.38.block_sparse_moe.experts.189.w1", "model.layers.38.block_sparse_moe.experts.190.w1", "model.layers.38.block_sparse_moe.experts.191.w1", "model.layers.38.block_sparse_moe.experts.192.w1", "model.layers.38.block_sparse_moe.experts.193.w1", "model.layers.38.block_sparse_moe.experts.194.w1", "model.layers.38.block_sparse_moe.experts.195.w1", "model.layers.38.block_sparse_moe.experts.196.w1", "model.layers.38.block_sparse_moe.experts.197.w1", "model.layers.38.block_sparse_moe.experts.198.w1", "model.layers.38.block_sparse_moe.experts.199.w1", "model.layers.38.block_sparse_moe.experts.200.w1", "model.layers.38.block_sparse_moe.experts.201.w1", "model.layers.38.block_sparse_moe.experts.202.w1", "model.layers.38.block_sparse_moe.experts.203.w1", "model.layers.38.block_sparse_moe.experts.204.w1", "model.layers.38.block_sparse_moe.experts.205.w1", "model.layers.38.block_sparse_moe.experts.206.w1", "model.layers.38.block_sparse_moe.experts.207.w1", "model.layers.38.block_sparse_moe.experts.208.w1", "model.layers.38.block_sparse_moe.experts.209.w1", "model.layers.38.block_sparse_moe.experts.210.w1", "model.layers.38.block_sparse_moe.experts.211.w1", "model.layers.38.block_sparse_moe.experts.212.w1", "model.layers.38.block_sparse_moe.experts.213.w1", "model.layers.38.block_sparse_moe.experts.214.w1", "model.layers.38.block_sparse_moe.experts.215.w1", "model.layers.38.block_sparse_moe.experts.216.w1", "model.layers.38.block_sparse_moe.experts.217.w1", "model.layers.38.block_sparse_moe.experts.218.w1", "model.layers.38.block_sparse_moe.experts.219.w1", "model.layers.38.block_sparse_moe.experts.220.w1", "model.layers.38.block_sparse_moe.experts.221.w1", "model.layers.38.block_sparse_moe.experts.222.w1", "model.layers.38.block_sparse_moe.experts.223.w1", "model.layers.38.block_sparse_moe.experts.224.w1", "model.layers.38.block_sparse_moe.experts.225.w1", "model.layers.38.block_sparse_moe.experts.226.w1", "model.layers.38.block_sparse_moe.experts.227.w1", "model.layers.38.block_sparse_moe.experts.228.w1", "model.layers.38.block_sparse_moe.experts.229.w1", "model.layers.38.block_sparse_moe.experts.230.w1", "model.layers.38.block_sparse_moe.experts.231.w1", "model.layers.38.block_sparse_moe.experts.232.w1", "model.layers.38.block_sparse_moe.experts.233.w1", "model.layers.38.block_sparse_moe.experts.234.w1", "model.layers.38.block_sparse_moe.experts.235.w1", "model.layers.38.block_sparse_moe.experts.236.w1", "model.layers.38.block_sparse_moe.experts.237.w1", "model.layers.38.block_sparse_moe.experts.238.w1", "model.layers.38.block_sparse_moe.experts.239.w1", "model.layers.38.block_sparse_moe.experts.240.w1", "model.layers.38.block_sparse_moe.experts.241.w1", "model.layers.38.block_sparse_moe.experts.242.w1", "model.layers.38.block_sparse_moe.experts.243.w1", "model.layers.38.block_sparse_moe.experts.244.w1", "model.layers.38.block_sparse_moe.experts.245.w1", "model.layers.38.block_sparse_moe.experts.246.w1", "model.layers.38.block_sparse_moe.experts.247.w1", "model.layers.38.block_sparse_moe.experts.248.w1", "model.layers.38.block_sparse_moe.experts.249.w1", "model.layers.38.block_sparse_moe.experts.250.w1", "model.layers.38.block_sparse_moe.experts.251.w1", "model.layers.38.block_sparse_moe.experts.252.w1", "model.layers.38.block_sparse_moe.experts.253.w1", "model.layers.38.block_sparse_moe.experts.254.w1", "model.layers.38.block_sparse_moe.experts.255.w1", "model.layers.38.block_sparse_moe.experts.0.w3", "model.layers.38.block_sparse_moe.experts.1.w3", "model.layers.38.block_sparse_moe.experts.2.w3", "model.layers.38.block_sparse_moe.experts.3.w3", "model.layers.38.block_sparse_moe.experts.4.w3", "model.layers.38.block_sparse_moe.experts.5.w3", "model.layers.38.block_sparse_moe.experts.6.w3", "model.layers.38.block_sparse_moe.experts.7.w3", "model.layers.38.block_sparse_moe.experts.8.w3", "model.layers.38.block_sparse_moe.experts.9.w3", "model.layers.38.block_sparse_moe.experts.10.w3", "model.layers.38.block_sparse_moe.experts.11.w3", "model.layers.38.block_sparse_moe.experts.12.w3", "model.layers.38.block_sparse_moe.experts.13.w3", "model.layers.38.block_sparse_moe.experts.14.w3", "model.layers.38.block_sparse_moe.experts.15.w3", "model.layers.38.block_sparse_moe.experts.16.w3", "model.layers.38.block_sparse_moe.experts.17.w3", "model.layers.38.block_sparse_moe.experts.18.w3", "model.layers.38.block_sparse_moe.experts.19.w3", "model.layers.38.block_sparse_moe.experts.20.w3", "model.layers.38.block_sparse_moe.experts.21.w3", "model.layers.38.block_sparse_moe.experts.22.w3", "model.layers.38.block_sparse_moe.experts.23.w3", "model.layers.38.block_sparse_moe.experts.24.w3", "model.layers.38.block_sparse_moe.experts.25.w3", "model.layers.38.block_sparse_moe.experts.26.w3", "model.layers.38.block_sparse_moe.experts.27.w3", "model.layers.38.block_sparse_moe.experts.28.w3", "model.layers.38.block_sparse_moe.experts.29.w3", "model.layers.38.block_sparse_moe.experts.30.w3", "model.layers.38.block_sparse_moe.experts.31.w3", "model.layers.38.block_sparse_moe.experts.32.w3", "model.layers.38.block_sparse_moe.experts.33.w3", "model.layers.38.block_sparse_moe.experts.34.w3", "model.layers.38.block_sparse_moe.experts.35.w3", "model.layers.38.block_sparse_moe.experts.36.w3", "model.layers.38.block_sparse_moe.experts.37.w3", "model.layers.38.block_sparse_moe.experts.38.w3", "model.layers.38.block_sparse_moe.experts.39.w3", "model.layers.38.block_sparse_moe.experts.40.w3", "model.layers.38.block_sparse_moe.experts.41.w3", "model.layers.38.block_sparse_moe.experts.42.w3", "model.layers.38.block_sparse_moe.experts.43.w3", "model.layers.38.block_sparse_moe.experts.44.w3", "model.layers.38.block_sparse_moe.experts.45.w3", "model.layers.38.block_sparse_moe.experts.46.w3", "model.layers.38.block_sparse_moe.experts.47.w3", "model.layers.38.block_sparse_moe.experts.48.w3", "model.layers.38.block_sparse_moe.experts.49.w3", "model.layers.38.block_sparse_moe.experts.50.w3", "model.layers.38.block_sparse_moe.experts.51.w3", "model.layers.38.block_sparse_moe.experts.52.w3", "model.layers.38.block_sparse_moe.experts.53.w3", "model.layers.38.block_sparse_moe.experts.54.w3", "model.layers.38.block_sparse_moe.experts.55.w3", "model.layers.38.block_sparse_moe.experts.56.w3", "model.layers.38.block_sparse_moe.experts.57.w3", "model.layers.38.block_sparse_moe.experts.58.w3", "model.layers.38.block_sparse_moe.experts.59.w3", "model.layers.38.block_sparse_moe.experts.60.w3", "model.layers.38.block_sparse_moe.experts.61.w3", "model.layers.38.block_sparse_moe.experts.62.w3", "model.layers.38.block_sparse_moe.experts.63.w3", "model.layers.38.block_sparse_moe.experts.64.w3", "model.layers.38.block_sparse_moe.experts.65.w3", "model.layers.38.block_sparse_moe.experts.66.w3", "model.layers.38.block_sparse_moe.experts.67.w3", "model.layers.38.block_sparse_moe.experts.68.w3", "model.layers.38.block_sparse_moe.experts.69.w3", "model.layers.38.block_sparse_moe.experts.70.w3", "model.layers.38.block_sparse_moe.experts.71.w3", "model.layers.38.block_sparse_moe.experts.72.w3", "model.layers.38.block_sparse_moe.experts.73.w3", "model.layers.38.block_sparse_moe.experts.74.w3", "model.layers.38.block_sparse_moe.experts.75.w3", "model.layers.38.block_sparse_moe.experts.76.w3", "model.layers.38.block_sparse_moe.experts.77.w3", "model.layers.38.block_sparse_moe.experts.78.w3", "model.layers.38.block_sparse_moe.experts.79.w3", "model.layers.38.block_sparse_moe.experts.80.w3", "model.layers.38.block_sparse_moe.experts.81.w3", "model.layers.38.block_sparse_moe.experts.82.w3", "model.layers.38.block_sparse_moe.experts.83.w3", "model.layers.38.block_sparse_moe.experts.84.w3", "model.layers.38.block_sparse_moe.experts.85.w3", "model.layers.38.block_sparse_moe.experts.86.w3", "model.layers.38.block_sparse_moe.experts.87.w3", "model.layers.38.block_sparse_moe.experts.88.w3", "model.layers.38.block_sparse_moe.experts.89.w3", "model.layers.38.block_sparse_moe.experts.90.w3", "model.layers.38.block_sparse_moe.experts.91.w3", "model.layers.38.block_sparse_moe.experts.92.w3", "model.layers.38.block_sparse_moe.experts.93.w3", "model.layers.38.block_sparse_moe.experts.94.w3", "model.layers.38.block_sparse_moe.experts.95.w3", "model.layers.38.block_sparse_moe.experts.96.w3", "model.layers.38.block_sparse_moe.experts.97.w3", "model.layers.38.block_sparse_moe.experts.98.w3", "model.layers.38.block_sparse_moe.experts.99.w3", "model.layers.38.block_sparse_moe.experts.100.w3", "model.layers.38.block_sparse_moe.experts.101.w3", "model.layers.38.block_sparse_moe.experts.102.w3", "model.layers.38.block_sparse_moe.experts.103.w3", "model.layers.38.block_sparse_moe.experts.104.w3", "model.layers.38.block_sparse_moe.experts.105.w3", "model.layers.38.block_sparse_moe.experts.106.w3", "model.layers.38.block_sparse_moe.experts.107.w3", "model.layers.38.block_sparse_moe.experts.108.w3", "model.layers.38.block_sparse_moe.experts.109.w3", "model.layers.38.block_sparse_moe.experts.110.w3", "model.layers.38.block_sparse_moe.experts.111.w3", "model.layers.38.block_sparse_moe.experts.112.w3", "model.layers.38.block_sparse_moe.experts.113.w3", "model.layers.38.block_sparse_moe.experts.114.w3", "model.layers.38.block_sparse_moe.experts.115.w3", "model.layers.38.block_sparse_moe.experts.116.w3", "model.layers.38.block_sparse_moe.experts.117.w3", "model.layers.38.block_sparse_moe.experts.118.w3", "model.layers.38.block_sparse_moe.experts.119.w3", "model.layers.38.block_sparse_moe.experts.120.w3", "model.layers.38.block_sparse_moe.experts.121.w3", "model.layers.38.block_sparse_moe.experts.122.w3", "model.layers.38.block_sparse_moe.experts.123.w3", "model.layers.38.block_sparse_moe.experts.124.w3", "model.layers.38.block_sparse_moe.experts.125.w3", "model.layers.38.block_sparse_moe.experts.126.w3", "model.layers.38.block_sparse_moe.experts.127.w3", "model.layers.38.block_sparse_moe.experts.128.w3", "model.layers.38.block_sparse_moe.experts.129.w3", "model.layers.38.block_sparse_moe.experts.130.w3", "model.layers.38.block_sparse_moe.experts.131.w3", "model.layers.38.block_sparse_moe.experts.132.w3", "model.layers.38.block_sparse_moe.experts.133.w3", "model.layers.38.block_sparse_moe.experts.134.w3", "model.layers.38.block_sparse_moe.experts.135.w3", "model.layers.38.block_sparse_moe.experts.136.w3", "model.layers.38.block_sparse_moe.experts.137.w3", "model.layers.38.block_sparse_moe.experts.138.w3", "model.layers.38.block_sparse_moe.experts.139.w3", "model.layers.38.block_sparse_moe.experts.140.w3", "model.layers.38.block_sparse_moe.experts.141.w3", "model.layers.38.block_sparse_moe.experts.142.w3", "model.layers.38.block_sparse_moe.experts.143.w3", "model.layers.38.block_sparse_moe.experts.144.w3", "model.layers.38.block_sparse_moe.experts.145.w3", "model.layers.38.block_sparse_moe.experts.146.w3", "model.layers.38.block_sparse_moe.experts.147.w3", "model.layers.38.block_sparse_moe.experts.148.w3", "model.layers.38.block_sparse_moe.experts.149.w3", "model.layers.38.block_sparse_moe.experts.150.w3", "model.layers.38.block_sparse_moe.experts.151.w3", "model.layers.38.block_sparse_moe.experts.152.w3", "model.layers.38.block_sparse_moe.experts.153.w3", "model.layers.38.block_sparse_moe.experts.154.w3", "model.layers.38.block_sparse_moe.experts.155.w3", "model.layers.38.block_sparse_moe.experts.156.w3", "model.layers.38.block_sparse_moe.experts.157.w3", "model.layers.38.block_sparse_moe.experts.158.w3", "model.layers.38.block_sparse_moe.experts.159.w3", "model.layers.38.block_sparse_moe.experts.160.w3", "model.layers.38.block_sparse_moe.experts.161.w3", "model.layers.38.block_sparse_moe.experts.162.w3", "model.layers.38.block_sparse_moe.experts.163.w3", "model.layers.38.block_sparse_moe.experts.164.w3", "model.layers.38.block_sparse_moe.experts.165.w3", "model.layers.38.block_sparse_moe.experts.166.w3", "model.layers.38.block_sparse_moe.experts.167.w3", "model.layers.38.block_sparse_moe.experts.168.w3", "model.layers.38.block_sparse_moe.experts.169.w3", "model.layers.38.block_sparse_moe.experts.170.w3", "model.layers.38.block_sparse_moe.experts.171.w3", "model.layers.38.block_sparse_moe.experts.172.w3", "model.layers.38.block_sparse_moe.experts.173.w3", "model.layers.38.block_sparse_moe.experts.174.w3", "model.layers.38.block_sparse_moe.experts.175.w3", "model.layers.38.block_sparse_moe.experts.176.w3", "model.layers.38.block_sparse_moe.experts.177.w3", "model.layers.38.block_sparse_moe.experts.178.w3", "model.layers.38.block_sparse_moe.experts.179.w3", "model.layers.38.block_sparse_moe.experts.180.w3", "model.layers.38.block_sparse_moe.experts.181.w3", "model.layers.38.block_sparse_moe.experts.182.w3", "model.layers.38.block_sparse_moe.experts.183.w3", "model.layers.38.block_sparse_moe.experts.184.w3", "model.layers.38.block_sparse_moe.experts.185.w3", "model.layers.38.block_sparse_moe.experts.186.w3", "model.layers.38.block_sparse_moe.experts.187.w3", "model.layers.38.block_sparse_moe.experts.188.w3", "model.layers.38.block_sparse_moe.experts.189.w3", "model.layers.38.block_sparse_moe.experts.190.w3", "model.layers.38.block_sparse_moe.experts.191.w3", "model.layers.38.block_sparse_moe.experts.192.w3", "model.layers.38.block_sparse_moe.experts.193.w3", "model.layers.38.block_sparse_moe.experts.194.w3", "model.layers.38.block_sparse_moe.experts.195.w3", "model.layers.38.block_sparse_moe.experts.196.w3", "model.layers.38.block_sparse_moe.experts.197.w3", "model.layers.38.block_sparse_moe.experts.198.w3", "model.layers.38.block_sparse_moe.experts.199.w3", "model.layers.38.block_sparse_moe.experts.200.w3", "model.layers.38.block_sparse_moe.experts.201.w3", "model.layers.38.block_sparse_moe.experts.202.w3", "model.layers.38.block_sparse_moe.experts.203.w3", "model.layers.38.block_sparse_moe.experts.204.w3", "model.layers.38.block_sparse_moe.experts.205.w3", "model.layers.38.block_sparse_moe.experts.206.w3", "model.layers.38.block_sparse_moe.experts.207.w3", "model.layers.38.block_sparse_moe.experts.208.w3", "model.layers.38.block_sparse_moe.experts.209.w3", "model.layers.38.block_sparse_moe.experts.210.w3", "model.layers.38.block_sparse_moe.experts.211.w3", "model.layers.38.block_sparse_moe.experts.212.w3", "model.layers.38.block_sparse_moe.experts.213.w3", "model.layers.38.block_sparse_moe.experts.214.w3", "model.layers.38.block_sparse_moe.experts.215.w3", "model.layers.38.block_sparse_moe.experts.216.w3", "model.layers.38.block_sparse_moe.experts.217.w3", "model.layers.38.block_sparse_moe.experts.218.w3", "model.layers.38.block_sparse_moe.experts.219.w3", "model.layers.38.block_sparse_moe.experts.220.w3", "model.layers.38.block_sparse_moe.experts.221.w3", "model.layers.38.block_sparse_moe.experts.222.w3", "model.layers.38.block_sparse_moe.experts.223.w3", "model.layers.38.block_sparse_moe.experts.224.w3", "model.layers.38.block_sparse_moe.experts.225.w3", "model.layers.38.block_sparse_moe.experts.226.w3", "model.layers.38.block_sparse_moe.experts.227.w3", "model.layers.38.block_sparse_moe.experts.228.w3", "model.layers.38.block_sparse_moe.experts.229.w3", "model.layers.38.block_sparse_moe.experts.230.w3", "model.layers.38.block_sparse_moe.experts.231.w3", "model.layers.38.block_sparse_moe.experts.232.w3", "model.layers.38.block_sparse_moe.experts.233.w3", "model.layers.38.block_sparse_moe.experts.234.w3", "model.layers.38.block_sparse_moe.experts.235.w3", "model.layers.38.block_sparse_moe.experts.236.w3", "model.layers.38.block_sparse_moe.experts.237.w3", "model.layers.38.block_sparse_moe.experts.238.w3", "model.layers.38.block_sparse_moe.experts.239.w3", "model.layers.38.block_sparse_moe.experts.240.w3", "model.layers.38.block_sparse_moe.experts.241.w3", "model.layers.38.block_sparse_moe.experts.242.w3", "model.layers.38.block_sparse_moe.experts.243.w3", "model.layers.38.block_sparse_moe.experts.244.w3", "model.layers.38.block_sparse_moe.experts.245.w3", "model.layers.38.block_sparse_moe.experts.246.w3", "model.layers.38.block_sparse_moe.experts.247.w3", "model.layers.38.block_sparse_moe.experts.248.w3", "model.layers.38.block_sparse_moe.experts.249.w3", "model.layers.38.block_sparse_moe.experts.250.w3", "model.layers.38.block_sparse_moe.experts.251.w3", "model.layers.38.block_sparse_moe.experts.252.w3", "model.layers.38.block_sparse_moe.experts.253.w3", "model.layers.38.block_sparse_moe.experts.254.w3", "model.layers.38.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.00016011372208596386, "dbits": 2415919104 } ] }, { "idx": 194, "layers": [ "model.layers.38.block_sparse_moe.experts.0.w2", "model.layers.38.block_sparse_moe.experts.1.w2", "model.layers.38.block_sparse_moe.experts.2.w2", "model.layers.38.block_sparse_moe.experts.3.w2", "model.layers.38.block_sparse_moe.experts.4.w2", "model.layers.38.block_sparse_moe.experts.5.w2", "model.layers.38.block_sparse_moe.experts.6.w2", "model.layers.38.block_sparse_moe.experts.7.w2", "model.layers.38.block_sparse_moe.experts.8.w2", "model.layers.38.block_sparse_moe.experts.9.w2", "model.layers.38.block_sparse_moe.experts.10.w2", "model.layers.38.block_sparse_moe.experts.11.w2", "model.layers.38.block_sparse_moe.experts.12.w2", "model.layers.38.block_sparse_moe.experts.13.w2", "model.layers.38.block_sparse_moe.experts.14.w2", "model.layers.38.block_sparse_moe.experts.15.w2", "model.layers.38.block_sparse_moe.experts.16.w2", "model.layers.38.block_sparse_moe.experts.17.w2", "model.layers.38.block_sparse_moe.experts.18.w2", "model.layers.38.block_sparse_moe.experts.19.w2", "model.layers.38.block_sparse_moe.experts.20.w2", "model.layers.38.block_sparse_moe.experts.21.w2", "model.layers.38.block_sparse_moe.experts.22.w2", "model.layers.38.block_sparse_moe.experts.23.w2", "model.layers.38.block_sparse_moe.experts.24.w2", "model.layers.38.block_sparse_moe.experts.25.w2", "model.layers.38.block_sparse_moe.experts.26.w2", "model.layers.38.block_sparse_moe.experts.27.w2", "model.layers.38.block_sparse_moe.experts.28.w2", "model.layers.38.block_sparse_moe.experts.29.w2", "model.layers.38.block_sparse_moe.experts.30.w2", "model.layers.38.block_sparse_moe.experts.31.w2", "model.layers.38.block_sparse_moe.experts.32.w2", "model.layers.38.block_sparse_moe.experts.33.w2", "model.layers.38.block_sparse_moe.experts.34.w2", "model.layers.38.block_sparse_moe.experts.35.w2", "model.layers.38.block_sparse_moe.experts.36.w2", "model.layers.38.block_sparse_moe.experts.37.w2", "model.layers.38.block_sparse_moe.experts.38.w2", "model.layers.38.block_sparse_moe.experts.39.w2", "model.layers.38.block_sparse_moe.experts.40.w2", "model.layers.38.block_sparse_moe.experts.41.w2", "model.layers.38.block_sparse_moe.experts.42.w2", "model.layers.38.block_sparse_moe.experts.43.w2", "model.layers.38.block_sparse_moe.experts.44.w2", "model.layers.38.block_sparse_moe.experts.45.w2", "model.layers.38.block_sparse_moe.experts.46.w2", "model.layers.38.block_sparse_moe.experts.47.w2", "model.layers.38.block_sparse_moe.experts.48.w2", "model.layers.38.block_sparse_moe.experts.49.w2", "model.layers.38.block_sparse_moe.experts.50.w2", "model.layers.38.block_sparse_moe.experts.51.w2", "model.layers.38.block_sparse_moe.experts.52.w2", "model.layers.38.block_sparse_moe.experts.53.w2", "model.layers.38.block_sparse_moe.experts.54.w2", "model.layers.38.block_sparse_moe.experts.55.w2", "model.layers.38.block_sparse_moe.experts.56.w2", "model.layers.38.block_sparse_moe.experts.57.w2", "model.layers.38.block_sparse_moe.experts.58.w2", "model.layers.38.block_sparse_moe.experts.59.w2", "model.layers.38.block_sparse_moe.experts.60.w2", "model.layers.38.block_sparse_moe.experts.61.w2", "model.layers.38.block_sparse_moe.experts.62.w2", "model.layers.38.block_sparse_moe.experts.63.w2", "model.layers.38.block_sparse_moe.experts.64.w2", "model.layers.38.block_sparse_moe.experts.65.w2", "model.layers.38.block_sparse_moe.experts.66.w2", "model.layers.38.block_sparse_moe.experts.67.w2", "model.layers.38.block_sparse_moe.experts.68.w2", "model.layers.38.block_sparse_moe.experts.69.w2", "model.layers.38.block_sparse_moe.experts.70.w2", "model.layers.38.block_sparse_moe.experts.71.w2", "model.layers.38.block_sparse_moe.experts.72.w2", "model.layers.38.block_sparse_moe.experts.73.w2", "model.layers.38.block_sparse_moe.experts.74.w2", "model.layers.38.block_sparse_moe.experts.75.w2", "model.layers.38.block_sparse_moe.experts.76.w2", "model.layers.38.block_sparse_moe.experts.77.w2", "model.layers.38.block_sparse_moe.experts.78.w2", "model.layers.38.block_sparse_moe.experts.79.w2", "model.layers.38.block_sparse_moe.experts.80.w2", "model.layers.38.block_sparse_moe.experts.81.w2", "model.layers.38.block_sparse_moe.experts.82.w2", "model.layers.38.block_sparse_moe.experts.83.w2", "model.layers.38.block_sparse_moe.experts.84.w2", "model.layers.38.block_sparse_moe.experts.85.w2", "model.layers.38.block_sparse_moe.experts.86.w2", "model.layers.38.block_sparse_moe.experts.87.w2", "model.layers.38.block_sparse_moe.experts.88.w2", "model.layers.38.block_sparse_moe.experts.89.w2", "model.layers.38.block_sparse_moe.experts.90.w2", "model.layers.38.block_sparse_moe.experts.91.w2", "model.layers.38.block_sparse_moe.experts.92.w2", "model.layers.38.block_sparse_moe.experts.93.w2", "model.layers.38.block_sparse_moe.experts.94.w2", "model.layers.38.block_sparse_moe.experts.95.w2", "model.layers.38.block_sparse_moe.experts.96.w2", "model.layers.38.block_sparse_moe.experts.97.w2", "model.layers.38.block_sparse_moe.experts.98.w2", "model.layers.38.block_sparse_moe.experts.99.w2", "model.layers.38.block_sparse_moe.experts.100.w2", "model.layers.38.block_sparse_moe.experts.101.w2", "model.layers.38.block_sparse_moe.experts.102.w2", "model.layers.38.block_sparse_moe.experts.103.w2", "model.layers.38.block_sparse_moe.experts.104.w2", "model.layers.38.block_sparse_moe.experts.105.w2", "model.layers.38.block_sparse_moe.experts.106.w2", "model.layers.38.block_sparse_moe.experts.107.w2", "model.layers.38.block_sparse_moe.experts.108.w2", "model.layers.38.block_sparse_moe.experts.109.w2", "model.layers.38.block_sparse_moe.experts.110.w2", "model.layers.38.block_sparse_moe.experts.111.w2", "model.layers.38.block_sparse_moe.experts.112.w2", "model.layers.38.block_sparse_moe.experts.113.w2", "model.layers.38.block_sparse_moe.experts.114.w2", "model.layers.38.block_sparse_moe.experts.115.w2", "model.layers.38.block_sparse_moe.experts.116.w2", "model.layers.38.block_sparse_moe.experts.117.w2", "model.layers.38.block_sparse_moe.experts.118.w2", "model.layers.38.block_sparse_moe.experts.119.w2", "model.layers.38.block_sparse_moe.experts.120.w2", "model.layers.38.block_sparse_moe.experts.121.w2", "model.layers.38.block_sparse_moe.experts.122.w2", "model.layers.38.block_sparse_moe.experts.123.w2", "model.layers.38.block_sparse_moe.experts.124.w2", "model.layers.38.block_sparse_moe.experts.125.w2", "model.layers.38.block_sparse_moe.experts.126.w2", "model.layers.38.block_sparse_moe.experts.127.w2", "model.layers.38.block_sparse_moe.experts.128.w2", "model.layers.38.block_sparse_moe.experts.129.w2", "model.layers.38.block_sparse_moe.experts.130.w2", "model.layers.38.block_sparse_moe.experts.131.w2", "model.layers.38.block_sparse_moe.experts.132.w2", "model.layers.38.block_sparse_moe.experts.133.w2", "model.layers.38.block_sparse_moe.experts.134.w2", "model.layers.38.block_sparse_moe.experts.135.w2", "model.layers.38.block_sparse_moe.experts.136.w2", "model.layers.38.block_sparse_moe.experts.137.w2", "model.layers.38.block_sparse_moe.experts.138.w2", "model.layers.38.block_sparse_moe.experts.139.w2", "model.layers.38.block_sparse_moe.experts.140.w2", "model.layers.38.block_sparse_moe.experts.141.w2", "model.layers.38.block_sparse_moe.experts.142.w2", "model.layers.38.block_sparse_moe.experts.143.w2", "model.layers.38.block_sparse_moe.experts.144.w2", "model.layers.38.block_sparse_moe.experts.145.w2", "model.layers.38.block_sparse_moe.experts.146.w2", "model.layers.38.block_sparse_moe.experts.147.w2", "model.layers.38.block_sparse_moe.experts.148.w2", "model.layers.38.block_sparse_moe.experts.149.w2", "model.layers.38.block_sparse_moe.experts.150.w2", "model.layers.38.block_sparse_moe.experts.151.w2", "model.layers.38.block_sparse_moe.experts.152.w2", "model.layers.38.block_sparse_moe.experts.153.w2", "model.layers.38.block_sparse_moe.experts.154.w2", "model.layers.38.block_sparse_moe.experts.155.w2", "model.layers.38.block_sparse_moe.experts.156.w2", "model.layers.38.block_sparse_moe.experts.157.w2", "model.layers.38.block_sparse_moe.experts.158.w2", "model.layers.38.block_sparse_moe.experts.159.w2", "model.layers.38.block_sparse_moe.experts.160.w2", "model.layers.38.block_sparse_moe.experts.161.w2", "model.layers.38.block_sparse_moe.experts.162.w2", "model.layers.38.block_sparse_moe.experts.163.w2", "model.layers.38.block_sparse_moe.experts.164.w2", "model.layers.38.block_sparse_moe.experts.165.w2", "model.layers.38.block_sparse_moe.experts.166.w2", "model.layers.38.block_sparse_moe.experts.167.w2", "model.layers.38.block_sparse_moe.experts.168.w2", "model.layers.38.block_sparse_moe.experts.169.w2", "model.layers.38.block_sparse_moe.experts.170.w2", "model.layers.38.block_sparse_moe.experts.171.w2", "model.layers.38.block_sparse_moe.experts.172.w2", "model.layers.38.block_sparse_moe.experts.173.w2", "model.layers.38.block_sparse_moe.experts.174.w2", "model.layers.38.block_sparse_moe.experts.175.w2", "model.layers.38.block_sparse_moe.experts.176.w2", "model.layers.38.block_sparse_moe.experts.177.w2", "model.layers.38.block_sparse_moe.experts.178.w2", "model.layers.38.block_sparse_moe.experts.179.w2", "model.layers.38.block_sparse_moe.experts.180.w2", "model.layers.38.block_sparse_moe.experts.181.w2", "model.layers.38.block_sparse_moe.experts.182.w2", "model.layers.38.block_sparse_moe.experts.183.w2", "model.layers.38.block_sparse_moe.experts.184.w2", "model.layers.38.block_sparse_moe.experts.185.w2", "model.layers.38.block_sparse_moe.experts.186.w2", "model.layers.38.block_sparse_moe.experts.187.w2", "model.layers.38.block_sparse_moe.experts.188.w2", "model.layers.38.block_sparse_moe.experts.189.w2", "model.layers.38.block_sparse_moe.experts.190.w2", "model.layers.38.block_sparse_moe.experts.191.w2", "model.layers.38.block_sparse_moe.experts.192.w2", "model.layers.38.block_sparse_moe.experts.193.w2", "model.layers.38.block_sparse_moe.experts.194.w2", "model.layers.38.block_sparse_moe.experts.195.w2", "model.layers.38.block_sparse_moe.experts.196.w2", "model.layers.38.block_sparse_moe.experts.197.w2", "model.layers.38.block_sparse_moe.experts.198.w2", "model.layers.38.block_sparse_moe.experts.199.w2", "model.layers.38.block_sparse_moe.experts.200.w2", "model.layers.38.block_sparse_moe.experts.201.w2", "model.layers.38.block_sparse_moe.experts.202.w2", "model.layers.38.block_sparse_moe.experts.203.w2", "model.layers.38.block_sparse_moe.experts.204.w2", "model.layers.38.block_sparse_moe.experts.205.w2", "model.layers.38.block_sparse_moe.experts.206.w2", "model.layers.38.block_sparse_moe.experts.207.w2", "model.layers.38.block_sparse_moe.experts.208.w2", "model.layers.38.block_sparse_moe.experts.209.w2", "model.layers.38.block_sparse_moe.experts.210.w2", "model.layers.38.block_sparse_moe.experts.211.w2", "model.layers.38.block_sparse_moe.experts.212.w2", "model.layers.38.block_sparse_moe.experts.213.w2", "model.layers.38.block_sparse_moe.experts.214.w2", "model.layers.38.block_sparse_moe.experts.215.w2", "model.layers.38.block_sparse_moe.experts.216.w2", "model.layers.38.block_sparse_moe.experts.217.w2", "model.layers.38.block_sparse_moe.experts.218.w2", "model.layers.38.block_sparse_moe.experts.219.w2", "model.layers.38.block_sparse_moe.experts.220.w2", "model.layers.38.block_sparse_moe.experts.221.w2", "model.layers.38.block_sparse_moe.experts.222.w2", "model.layers.38.block_sparse_moe.experts.223.w2", "model.layers.38.block_sparse_moe.experts.224.w2", "model.layers.38.block_sparse_moe.experts.225.w2", "model.layers.38.block_sparse_moe.experts.226.w2", "model.layers.38.block_sparse_moe.experts.227.w2", "model.layers.38.block_sparse_moe.experts.228.w2", "model.layers.38.block_sparse_moe.experts.229.w2", "model.layers.38.block_sparse_moe.experts.230.w2", "model.layers.38.block_sparse_moe.experts.231.w2", "model.layers.38.block_sparse_moe.experts.232.w2", "model.layers.38.block_sparse_moe.experts.233.w2", "model.layers.38.block_sparse_moe.experts.234.w2", "model.layers.38.block_sparse_moe.experts.235.w2", "model.layers.38.block_sparse_moe.experts.236.w2", "model.layers.38.block_sparse_moe.experts.237.w2", "model.layers.38.block_sparse_moe.experts.238.w2", "model.layers.38.block_sparse_moe.experts.239.w2", "model.layers.38.block_sparse_moe.experts.240.w2", "model.layers.38.block_sparse_moe.experts.241.w2", "model.layers.38.block_sparse_moe.experts.242.w2", "model.layers.38.block_sparse_moe.experts.243.w2", "model.layers.38.block_sparse_moe.experts.244.w2", "model.layers.38.block_sparse_moe.experts.245.w2", "model.layers.38.block_sparse_moe.experts.246.w2", "model.layers.38.block_sparse_moe.experts.247.w2", "model.layers.38.block_sparse_moe.experts.248.w2", "model.layers.38.block_sparse_moe.experts.249.w2", "model.layers.38.block_sparse_moe.experts.250.w2", "model.layers.38.block_sparse_moe.experts.251.w2", "model.layers.38.block_sparse_moe.experts.252.w2", "model.layers.38.block_sparse_moe.experts.253.w2", "model.layers.38.block_sparse_moe.experts.254.w2", "model.layers.38.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0014692045748234128, "dbits": 1207959552 } ] }, { "idx": 195, "layers": [ "model.layers.39.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0011206626892089622, "dbits": 18874368 } ] }, { "idx": 196, "layers": [ "model.layers.39.self_attn.k_proj", "model.layers.39.self_attn.v_proj" ], "candidates": [ { "dkld": -0.006711576133966446, "dbits": 6291456 } ] }, { "idx": 197, "layers": [ "model.layers.39.self_attn.o_proj" ], "candidates": [ { "dkld": -0.014546770974993711, "dbits": 18874368 } ] }, { "idx": 198, "layers": [ "model.layers.39.block_sparse_moe.experts.0.w1", "model.layers.39.block_sparse_moe.experts.1.w1", "model.layers.39.block_sparse_moe.experts.2.w1", "model.layers.39.block_sparse_moe.experts.3.w1", "model.layers.39.block_sparse_moe.experts.4.w1", "model.layers.39.block_sparse_moe.experts.5.w1", "model.layers.39.block_sparse_moe.experts.6.w1", "model.layers.39.block_sparse_moe.experts.7.w1", "model.layers.39.block_sparse_moe.experts.8.w1", "model.layers.39.block_sparse_moe.experts.9.w1", "model.layers.39.block_sparse_moe.experts.10.w1", "model.layers.39.block_sparse_moe.experts.11.w1", "model.layers.39.block_sparse_moe.experts.12.w1", "model.layers.39.block_sparse_moe.experts.13.w1", "model.layers.39.block_sparse_moe.experts.14.w1", "model.layers.39.block_sparse_moe.experts.15.w1", "model.layers.39.block_sparse_moe.experts.16.w1", "model.layers.39.block_sparse_moe.experts.17.w1", "model.layers.39.block_sparse_moe.experts.18.w1", "model.layers.39.block_sparse_moe.experts.19.w1", "model.layers.39.block_sparse_moe.experts.20.w1", "model.layers.39.block_sparse_moe.experts.21.w1", "model.layers.39.block_sparse_moe.experts.22.w1", "model.layers.39.block_sparse_moe.experts.23.w1", "model.layers.39.block_sparse_moe.experts.24.w1", "model.layers.39.block_sparse_moe.experts.25.w1", "model.layers.39.block_sparse_moe.experts.26.w1", "model.layers.39.block_sparse_moe.experts.27.w1", "model.layers.39.block_sparse_moe.experts.28.w1", "model.layers.39.block_sparse_moe.experts.29.w1", "model.layers.39.block_sparse_moe.experts.30.w1", "model.layers.39.block_sparse_moe.experts.31.w1", "model.layers.39.block_sparse_moe.experts.32.w1", "model.layers.39.block_sparse_moe.experts.33.w1", "model.layers.39.block_sparse_moe.experts.34.w1", "model.layers.39.block_sparse_moe.experts.35.w1", "model.layers.39.block_sparse_moe.experts.36.w1", "model.layers.39.block_sparse_moe.experts.37.w1", "model.layers.39.block_sparse_moe.experts.38.w1", "model.layers.39.block_sparse_moe.experts.39.w1", "model.layers.39.block_sparse_moe.experts.40.w1", "model.layers.39.block_sparse_moe.experts.41.w1", "model.layers.39.block_sparse_moe.experts.42.w1", "model.layers.39.block_sparse_moe.experts.43.w1", "model.layers.39.block_sparse_moe.experts.44.w1", "model.layers.39.block_sparse_moe.experts.45.w1", "model.layers.39.block_sparse_moe.experts.46.w1", "model.layers.39.block_sparse_moe.experts.47.w1", "model.layers.39.block_sparse_moe.experts.48.w1", "model.layers.39.block_sparse_moe.experts.49.w1", "model.layers.39.block_sparse_moe.experts.50.w1", "model.layers.39.block_sparse_moe.experts.51.w1", "model.layers.39.block_sparse_moe.experts.52.w1", "model.layers.39.block_sparse_moe.experts.53.w1", "model.layers.39.block_sparse_moe.experts.54.w1", "model.layers.39.block_sparse_moe.experts.55.w1", "model.layers.39.block_sparse_moe.experts.56.w1", "model.layers.39.block_sparse_moe.experts.57.w1", "model.layers.39.block_sparse_moe.experts.58.w1", "model.layers.39.block_sparse_moe.experts.59.w1", "model.layers.39.block_sparse_moe.experts.60.w1", "model.layers.39.block_sparse_moe.experts.61.w1", "model.layers.39.block_sparse_moe.experts.62.w1", "model.layers.39.block_sparse_moe.experts.63.w1", "model.layers.39.block_sparse_moe.experts.64.w1", "model.layers.39.block_sparse_moe.experts.65.w1", "model.layers.39.block_sparse_moe.experts.66.w1", "model.layers.39.block_sparse_moe.experts.67.w1", "model.layers.39.block_sparse_moe.experts.68.w1", "model.layers.39.block_sparse_moe.experts.69.w1", "model.layers.39.block_sparse_moe.experts.70.w1", "model.layers.39.block_sparse_moe.experts.71.w1", "model.layers.39.block_sparse_moe.experts.72.w1", "model.layers.39.block_sparse_moe.experts.73.w1", "model.layers.39.block_sparse_moe.experts.74.w1", "model.layers.39.block_sparse_moe.experts.75.w1", "model.layers.39.block_sparse_moe.experts.76.w1", "model.layers.39.block_sparse_moe.experts.77.w1", "model.layers.39.block_sparse_moe.experts.78.w1", "model.layers.39.block_sparse_moe.experts.79.w1", "model.layers.39.block_sparse_moe.experts.80.w1", "model.layers.39.block_sparse_moe.experts.81.w1", "model.layers.39.block_sparse_moe.experts.82.w1", "model.layers.39.block_sparse_moe.experts.83.w1", "model.layers.39.block_sparse_moe.experts.84.w1", "model.layers.39.block_sparse_moe.experts.85.w1", "model.layers.39.block_sparse_moe.experts.86.w1", "model.layers.39.block_sparse_moe.experts.87.w1", "model.layers.39.block_sparse_moe.experts.88.w1", "model.layers.39.block_sparse_moe.experts.89.w1", "model.layers.39.block_sparse_moe.experts.90.w1", "model.layers.39.block_sparse_moe.experts.91.w1", "model.layers.39.block_sparse_moe.experts.92.w1", "model.layers.39.block_sparse_moe.experts.93.w1", "model.layers.39.block_sparse_moe.experts.94.w1", "model.layers.39.block_sparse_moe.experts.95.w1", "model.layers.39.block_sparse_moe.experts.96.w1", "model.layers.39.block_sparse_moe.experts.97.w1", "model.layers.39.block_sparse_moe.experts.98.w1", "model.layers.39.block_sparse_moe.experts.99.w1", "model.layers.39.block_sparse_moe.experts.100.w1", "model.layers.39.block_sparse_moe.experts.101.w1", "model.layers.39.block_sparse_moe.experts.102.w1", "model.layers.39.block_sparse_moe.experts.103.w1", "model.layers.39.block_sparse_moe.experts.104.w1", "model.layers.39.block_sparse_moe.experts.105.w1", "model.layers.39.block_sparse_moe.experts.106.w1", "model.layers.39.block_sparse_moe.experts.107.w1", "model.layers.39.block_sparse_moe.experts.108.w1", "model.layers.39.block_sparse_moe.experts.109.w1", "model.layers.39.block_sparse_moe.experts.110.w1", "model.layers.39.block_sparse_moe.experts.111.w1", "model.layers.39.block_sparse_moe.experts.112.w1", "model.layers.39.block_sparse_moe.experts.113.w1", "model.layers.39.block_sparse_moe.experts.114.w1", "model.layers.39.block_sparse_moe.experts.115.w1", "model.layers.39.block_sparse_moe.experts.116.w1", "model.layers.39.block_sparse_moe.experts.117.w1", "model.layers.39.block_sparse_moe.experts.118.w1", "model.layers.39.block_sparse_moe.experts.119.w1", "model.layers.39.block_sparse_moe.experts.120.w1", "model.layers.39.block_sparse_moe.experts.121.w1", "model.layers.39.block_sparse_moe.experts.122.w1", "model.layers.39.block_sparse_moe.experts.123.w1", "model.layers.39.block_sparse_moe.experts.124.w1", "model.layers.39.block_sparse_moe.experts.125.w1", "model.layers.39.block_sparse_moe.experts.126.w1", "model.layers.39.block_sparse_moe.experts.127.w1", "model.layers.39.block_sparse_moe.experts.128.w1", "model.layers.39.block_sparse_moe.experts.129.w1", "model.layers.39.block_sparse_moe.experts.130.w1", "model.layers.39.block_sparse_moe.experts.131.w1", "model.layers.39.block_sparse_moe.experts.132.w1", "model.layers.39.block_sparse_moe.experts.133.w1", "model.layers.39.block_sparse_moe.experts.134.w1", "model.layers.39.block_sparse_moe.experts.135.w1", "model.layers.39.block_sparse_moe.experts.136.w1", "model.layers.39.block_sparse_moe.experts.137.w1", "model.layers.39.block_sparse_moe.experts.138.w1", "model.layers.39.block_sparse_moe.experts.139.w1", "model.layers.39.block_sparse_moe.experts.140.w1", "model.layers.39.block_sparse_moe.experts.141.w1", "model.layers.39.block_sparse_moe.experts.142.w1", "model.layers.39.block_sparse_moe.experts.143.w1", "model.layers.39.block_sparse_moe.experts.144.w1", "model.layers.39.block_sparse_moe.experts.145.w1", "model.layers.39.block_sparse_moe.experts.146.w1", "model.layers.39.block_sparse_moe.experts.147.w1", "model.layers.39.block_sparse_moe.experts.148.w1", "model.layers.39.block_sparse_moe.experts.149.w1", "model.layers.39.block_sparse_moe.experts.150.w1", "model.layers.39.block_sparse_moe.experts.151.w1", "model.layers.39.block_sparse_moe.experts.152.w1", "model.layers.39.block_sparse_moe.experts.153.w1", "model.layers.39.block_sparse_moe.experts.154.w1", "model.layers.39.block_sparse_moe.experts.155.w1", "model.layers.39.block_sparse_moe.experts.156.w1", "model.layers.39.block_sparse_moe.experts.157.w1", "model.layers.39.block_sparse_moe.experts.158.w1", "model.layers.39.block_sparse_moe.experts.159.w1", "model.layers.39.block_sparse_moe.experts.160.w1", "model.layers.39.block_sparse_moe.experts.161.w1", "model.layers.39.block_sparse_moe.experts.162.w1", "model.layers.39.block_sparse_moe.experts.163.w1", "model.layers.39.block_sparse_moe.experts.164.w1", "model.layers.39.block_sparse_moe.experts.165.w1", "model.layers.39.block_sparse_moe.experts.166.w1", "model.layers.39.block_sparse_moe.experts.167.w1", "model.layers.39.block_sparse_moe.experts.168.w1", "model.layers.39.block_sparse_moe.experts.169.w1", "model.layers.39.block_sparse_moe.experts.170.w1", "model.layers.39.block_sparse_moe.experts.171.w1", "model.layers.39.block_sparse_moe.experts.172.w1", "model.layers.39.block_sparse_moe.experts.173.w1", "model.layers.39.block_sparse_moe.experts.174.w1", "model.layers.39.block_sparse_moe.experts.175.w1", "model.layers.39.block_sparse_moe.experts.176.w1", "model.layers.39.block_sparse_moe.experts.177.w1", "model.layers.39.block_sparse_moe.experts.178.w1", "model.layers.39.block_sparse_moe.experts.179.w1", "model.layers.39.block_sparse_moe.experts.180.w1", "model.layers.39.block_sparse_moe.experts.181.w1", "model.layers.39.block_sparse_moe.experts.182.w1", "model.layers.39.block_sparse_moe.experts.183.w1", "model.layers.39.block_sparse_moe.experts.184.w1", "model.layers.39.block_sparse_moe.experts.185.w1", "model.layers.39.block_sparse_moe.experts.186.w1", "model.layers.39.block_sparse_moe.experts.187.w1", "model.layers.39.block_sparse_moe.experts.188.w1", "model.layers.39.block_sparse_moe.experts.189.w1", "model.layers.39.block_sparse_moe.experts.190.w1", "model.layers.39.block_sparse_moe.experts.191.w1", "model.layers.39.block_sparse_moe.experts.192.w1", "model.layers.39.block_sparse_moe.experts.193.w1", "model.layers.39.block_sparse_moe.experts.194.w1", "model.layers.39.block_sparse_moe.experts.195.w1", "model.layers.39.block_sparse_moe.experts.196.w1", "model.layers.39.block_sparse_moe.experts.197.w1", "model.layers.39.block_sparse_moe.experts.198.w1", "model.layers.39.block_sparse_moe.experts.199.w1", "model.layers.39.block_sparse_moe.experts.200.w1", "model.layers.39.block_sparse_moe.experts.201.w1", "model.layers.39.block_sparse_moe.experts.202.w1", "model.layers.39.block_sparse_moe.experts.203.w1", "model.layers.39.block_sparse_moe.experts.204.w1", "model.layers.39.block_sparse_moe.experts.205.w1", "model.layers.39.block_sparse_moe.experts.206.w1", "model.layers.39.block_sparse_moe.experts.207.w1", "model.layers.39.block_sparse_moe.experts.208.w1", "model.layers.39.block_sparse_moe.experts.209.w1", "model.layers.39.block_sparse_moe.experts.210.w1", "model.layers.39.block_sparse_moe.experts.211.w1", "model.layers.39.block_sparse_moe.experts.212.w1", "model.layers.39.block_sparse_moe.experts.213.w1", "model.layers.39.block_sparse_moe.experts.214.w1", "model.layers.39.block_sparse_moe.experts.215.w1", "model.layers.39.block_sparse_moe.experts.216.w1", "model.layers.39.block_sparse_moe.experts.217.w1", "model.layers.39.block_sparse_moe.experts.218.w1", "model.layers.39.block_sparse_moe.experts.219.w1", "model.layers.39.block_sparse_moe.experts.220.w1", "model.layers.39.block_sparse_moe.experts.221.w1", "model.layers.39.block_sparse_moe.experts.222.w1", "model.layers.39.block_sparse_moe.experts.223.w1", "model.layers.39.block_sparse_moe.experts.224.w1", "model.layers.39.block_sparse_moe.experts.225.w1", "model.layers.39.block_sparse_moe.experts.226.w1", "model.layers.39.block_sparse_moe.experts.227.w1", "model.layers.39.block_sparse_moe.experts.228.w1", "model.layers.39.block_sparse_moe.experts.229.w1", "model.layers.39.block_sparse_moe.experts.230.w1", "model.layers.39.block_sparse_moe.experts.231.w1", "model.layers.39.block_sparse_moe.experts.232.w1", "model.layers.39.block_sparse_moe.experts.233.w1", "model.layers.39.block_sparse_moe.experts.234.w1", "model.layers.39.block_sparse_moe.experts.235.w1", "model.layers.39.block_sparse_moe.experts.236.w1", "model.layers.39.block_sparse_moe.experts.237.w1", "model.layers.39.block_sparse_moe.experts.238.w1", "model.layers.39.block_sparse_moe.experts.239.w1", "model.layers.39.block_sparse_moe.experts.240.w1", "model.layers.39.block_sparse_moe.experts.241.w1", "model.layers.39.block_sparse_moe.experts.242.w1", "model.layers.39.block_sparse_moe.experts.243.w1", "model.layers.39.block_sparse_moe.experts.244.w1", "model.layers.39.block_sparse_moe.experts.245.w1", "model.layers.39.block_sparse_moe.experts.246.w1", "model.layers.39.block_sparse_moe.experts.247.w1", "model.layers.39.block_sparse_moe.experts.248.w1", "model.layers.39.block_sparse_moe.experts.249.w1", "model.layers.39.block_sparse_moe.experts.250.w1", "model.layers.39.block_sparse_moe.experts.251.w1", "model.layers.39.block_sparse_moe.experts.252.w1", "model.layers.39.block_sparse_moe.experts.253.w1", "model.layers.39.block_sparse_moe.experts.254.w1", "model.layers.39.block_sparse_moe.experts.255.w1", "model.layers.39.block_sparse_moe.experts.0.w3", "model.layers.39.block_sparse_moe.experts.1.w3", "model.layers.39.block_sparse_moe.experts.2.w3", "model.layers.39.block_sparse_moe.experts.3.w3", "model.layers.39.block_sparse_moe.experts.4.w3", "model.layers.39.block_sparse_moe.experts.5.w3", "model.layers.39.block_sparse_moe.experts.6.w3", "model.layers.39.block_sparse_moe.experts.7.w3", "model.layers.39.block_sparse_moe.experts.8.w3", "model.layers.39.block_sparse_moe.experts.9.w3", "model.layers.39.block_sparse_moe.experts.10.w3", "model.layers.39.block_sparse_moe.experts.11.w3", "model.layers.39.block_sparse_moe.experts.12.w3", "model.layers.39.block_sparse_moe.experts.13.w3", "model.layers.39.block_sparse_moe.experts.14.w3", "model.layers.39.block_sparse_moe.experts.15.w3", "model.layers.39.block_sparse_moe.experts.16.w3", "model.layers.39.block_sparse_moe.experts.17.w3", "model.layers.39.block_sparse_moe.experts.18.w3", "model.layers.39.block_sparse_moe.experts.19.w3", "model.layers.39.block_sparse_moe.experts.20.w3", "model.layers.39.block_sparse_moe.experts.21.w3", "model.layers.39.block_sparse_moe.experts.22.w3", "model.layers.39.block_sparse_moe.experts.23.w3", "model.layers.39.block_sparse_moe.experts.24.w3", "model.layers.39.block_sparse_moe.experts.25.w3", "model.layers.39.block_sparse_moe.experts.26.w3", "model.layers.39.block_sparse_moe.experts.27.w3", "model.layers.39.block_sparse_moe.experts.28.w3", "model.layers.39.block_sparse_moe.experts.29.w3", "model.layers.39.block_sparse_moe.experts.30.w3", "model.layers.39.block_sparse_moe.experts.31.w3", "model.layers.39.block_sparse_moe.experts.32.w3", "model.layers.39.block_sparse_moe.experts.33.w3", "model.layers.39.block_sparse_moe.experts.34.w3", "model.layers.39.block_sparse_moe.experts.35.w3", "model.layers.39.block_sparse_moe.experts.36.w3", "model.layers.39.block_sparse_moe.experts.37.w3", "model.layers.39.block_sparse_moe.experts.38.w3", "model.layers.39.block_sparse_moe.experts.39.w3", "model.layers.39.block_sparse_moe.experts.40.w3", "model.layers.39.block_sparse_moe.experts.41.w3", "model.layers.39.block_sparse_moe.experts.42.w3", "model.layers.39.block_sparse_moe.experts.43.w3", "model.layers.39.block_sparse_moe.experts.44.w3", "model.layers.39.block_sparse_moe.experts.45.w3", "model.layers.39.block_sparse_moe.experts.46.w3", "model.layers.39.block_sparse_moe.experts.47.w3", "model.layers.39.block_sparse_moe.experts.48.w3", "model.layers.39.block_sparse_moe.experts.49.w3", "model.layers.39.block_sparse_moe.experts.50.w3", "model.layers.39.block_sparse_moe.experts.51.w3", "model.layers.39.block_sparse_moe.experts.52.w3", "model.layers.39.block_sparse_moe.experts.53.w3", "model.layers.39.block_sparse_moe.experts.54.w3", "model.layers.39.block_sparse_moe.experts.55.w3", "model.layers.39.block_sparse_moe.experts.56.w3", "model.layers.39.block_sparse_moe.experts.57.w3", "model.layers.39.block_sparse_moe.experts.58.w3", "model.layers.39.block_sparse_moe.experts.59.w3", "model.layers.39.block_sparse_moe.experts.60.w3", "model.layers.39.block_sparse_moe.experts.61.w3", "model.layers.39.block_sparse_moe.experts.62.w3", "model.layers.39.block_sparse_moe.experts.63.w3", "model.layers.39.block_sparse_moe.experts.64.w3", "model.layers.39.block_sparse_moe.experts.65.w3", "model.layers.39.block_sparse_moe.experts.66.w3", "model.layers.39.block_sparse_moe.experts.67.w3", "model.layers.39.block_sparse_moe.experts.68.w3", "model.layers.39.block_sparse_moe.experts.69.w3", "model.layers.39.block_sparse_moe.experts.70.w3", "model.layers.39.block_sparse_moe.experts.71.w3", "model.layers.39.block_sparse_moe.experts.72.w3", "model.layers.39.block_sparse_moe.experts.73.w3", "model.layers.39.block_sparse_moe.experts.74.w3", "model.layers.39.block_sparse_moe.experts.75.w3", "model.layers.39.block_sparse_moe.experts.76.w3", "model.layers.39.block_sparse_moe.experts.77.w3", "model.layers.39.block_sparse_moe.experts.78.w3", "model.layers.39.block_sparse_moe.experts.79.w3", "model.layers.39.block_sparse_moe.experts.80.w3", "model.layers.39.block_sparse_moe.experts.81.w3", "model.layers.39.block_sparse_moe.experts.82.w3", "model.layers.39.block_sparse_moe.experts.83.w3", "model.layers.39.block_sparse_moe.experts.84.w3", "model.layers.39.block_sparse_moe.experts.85.w3", "model.layers.39.block_sparse_moe.experts.86.w3", "model.layers.39.block_sparse_moe.experts.87.w3", "model.layers.39.block_sparse_moe.experts.88.w3", "model.layers.39.block_sparse_moe.experts.89.w3", "model.layers.39.block_sparse_moe.experts.90.w3", "model.layers.39.block_sparse_moe.experts.91.w3", "model.layers.39.block_sparse_moe.experts.92.w3", "model.layers.39.block_sparse_moe.experts.93.w3", "model.layers.39.block_sparse_moe.experts.94.w3", "model.layers.39.block_sparse_moe.experts.95.w3", "model.layers.39.block_sparse_moe.experts.96.w3", "model.layers.39.block_sparse_moe.experts.97.w3", "model.layers.39.block_sparse_moe.experts.98.w3", "model.layers.39.block_sparse_moe.experts.99.w3", "model.layers.39.block_sparse_moe.experts.100.w3", "model.layers.39.block_sparse_moe.experts.101.w3", "model.layers.39.block_sparse_moe.experts.102.w3", "model.layers.39.block_sparse_moe.experts.103.w3", "model.layers.39.block_sparse_moe.experts.104.w3", "model.layers.39.block_sparse_moe.experts.105.w3", "model.layers.39.block_sparse_moe.experts.106.w3", "model.layers.39.block_sparse_moe.experts.107.w3", "model.layers.39.block_sparse_moe.experts.108.w3", "model.layers.39.block_sparse_moe.experts.109.w3", "model.layers.39.block_sparse_moe.experts.110.w3", "model.layers.39.block_sparse_moe.experts.111.w3", "model.layers.39.block_sparse_moe.experts.112.w3", "model.layers.39.block_sparse_moe.experts.113.w3", "model.layers.39.block_sparse_moe.experts.114.w3", "model.layers.39.block_sparse_moe.experts.115.w3", "model.layers.39.block_sparse_moe.experts.116.w3", "model.layers.39.block_sparse_moe.experts.117.w3", "model.layers.39.block_sparse_moe.experts.118.w3", "model.layers.39.block_sparse_moe.experts.119.w3", "model.layers.39.block_sparse_moe.experts.120.w3", "model.layers.39.block_sparse_moe.experts.121.w3", "model.layers.39.block_sparse_moe.experts.122.w3", "model.layers.39.block_sparse_moe.experts.123.w3", "model.layers.39.block_sparse_moe.experts.124.w3", "model.layers.39.block_sparse_moe.experts.125.w3", "model.layers.39.block_sparse_moe.experts.126.w3", "model.layers.39.block_sparse_moe.experts.127.w3", "model.layers.39.block_sparse_moe.experts.128.w3", "model.layers.39.block_sparse_moe.experts.129.w3", "model.layers.39.block_sparse_moe.experts.130.w3", "model.layers.39.block_sparse_moe.experts.131.w3", "model.layers.39.block_sparse_moe.experts.132.w3", "model.layers.39.block_sparse_moe.experts.133.w3", "model.layers.39.block_sparse_moe.experts.134.w3", "model.layers.39.block_sparse_moe.experts.135.w3", "model.layers.39.block_sparse_moe.experts.136.w3", "model.layers.39.block_sparse_moe.experts.137.w3", "model.layers.39.block_sparse_moe.experts.138.w3", "model.layers.39.block_sparse_moe.experts.139.w3", "model.layers.39.block_sparse_moe.experts.140.w3", "model.layers.39.block_sparse_moe.experts.141.w3", "model.layers.39.block_sparse_moe.experts.142.w3", "model.layers.39.block_sparse_moe.experts.143.w3", "model.layers.39.block_sparse_moe.experts.144.w3", "model.layers.39.block_sparse_moe.experts.145.w3", "model.layers.39.block_sparse_moe.experts.146.w3", "model.layers.39.block_sparse_moe.experts.147.w3", "model.layers.39.block_sparse_moe.experts.148.w3", "model.layers.39.block_sparse_moe.experts.149.w3", "model.layers.39.block_sparse_moe.experts.150.w3", "model.layers.39.block_sparse_moe.experts.151.w3", "model.layers.39.block_sparse_moe.experts.152.w3", "model.layers.39.block_sparse_moe.experts.153.w3", "model.layers.39.block_sparse_moe.experts.154.w3", "model.layers.39.block_sparse_moe.experts.155.w3", "model.layers.39.block_sparse_moe.experts.156.w3", "model.layers.39.block_sparse_moe.experts.157.w3", "model.layers.39.block_sparse_moe.experts.158.w3", "model.layers.39.block_sparse_moe.experts.159.w3", "model.layers.39.block_sparse_moe.experts.160.w3", "model.layers.39.block_sparse_moe.experts.161.w3", "model.layers.39.block_sparse_moe.experts.162.w3", "model.layers.39.block_sparse_moe.experts.163.w3", "model.layers.39.block_sparse_moe.experts.164.w3", "model.layers.39.block_sparse_moe.experts.165.w3", "model.layers.39.block_sparse_moe.experts.166.w3", "model.layers.39.block_sparse_moe.experts.167.w3", "model.layers.39.block_sparse_moe.experts.168.w3", "model.layers.39.block_sparse_moe.experts.169.w3", "model.layers.39.block_sparse_moe.experts.170.w3", "model.layers.39.block_sparse_moe.experts.171.w3", "model.layers.39.block_sparse_moe.experts.172.w3", "model.layers.39.block_sparse_moe.experts.173.w3", "model.layers.39.block_sparse_moe.experts.174.w3", "model.layers.39.block_sparse_moe.experts.175.w3", "model.layers.39.block_sparse_moe.experts.176.w3", "model.layers.39.block_sparse_moe.experts.177.w3", "model.layers.39.block_sparse_moe.experts.178.w3", "model.layers.39.block_sparse_moe.experts.179.w3", "model.layers.39.block_sparse_moe.experts.180.w3", "model.layers.39.block_sparse_moe.experts.181.w3", "model.layers.39.block_sparse_moe.experts.182.w3", "model.layers.39.block_sparse_moe.experts.183.w3", "model.layers.39.block_sparse_moe.experts.184.w3", "model.layers.39.block_sparse_moe.experts.185.w3", "model.layers.39.block_sparse_moe.experts.186.w3", "model.layers.39.block_sparse_moe.experts.187.w3", "model.layers.39.block_sparse_moe.experts.188.w3", "model.layers.39.block_sparse_moe.experts.189.w3", "model.layers.39.block_sparse_moe.experts.190.w3", "model.layers.39.block_sparse_moe.experts.191.w3", "model.layers.39.block_sparse_moe.experts.192.w3", "model.layers.39.block_sparse_moe.experts.193.w3", "model.layers.39.block_sparse_moe.experts.194.w3", "model.layers.39.block_sparse_moe.experts.195.w3", "model.layers.39.block_sparse_moe.experts.196.w3", "model.layers.39.block_sparse_moe.experts.197.w3", "model.layers.39.block_sparse_moe.experts.198.w3", "model.layers.39.block_sparse_moe.experts.199.w3", "model.layers.39.block_sparse_moe.experts.200.w3", "model.layers.39.block_sparse_moe.experts.201.w3", "model.layers.39.block_sparse_moe.experts.202.w3", "model.layers.39.block_sparse_moe.experts.203.w3", "model.layers.39.block_sparse_moe.experts.204.w3", "model.layers.39.block_sparse_moe.experts.205.w3", "model.layers.39.block_sparse_moe.experts.206.w3", "model.layers.39.block_sparse_moe.experts.207.w3", "model.layers.39.block_sparse_moe.experts.208.w3", "model.layers.39.block_sparse_moe.experts.209.w3", "model.layers.39.block_sparse_moe.experts.210.w3", "model.layers.39.block_sparse_moe.experts.211.w3", "model.layers.39.block_sparse_moe.experts.212.w3", "model.layers.39.block_sparse_moe.experts.213.w3", "model.layers.39.block_sparse_moe.experts.214.w3", "model.layers.39.block_sparse_moe.experts.215.w3", "model.layers.39.block_sparse_moe.experts.216.w3", "model.layers.39.block_sparse_moe.experts.217.w3", "model.layers.39.block_sparse_moe.experts.218.w3", "model.layers.39.block_sparse_moe.experts.219.w3", "model.layers.39.block_sparse_moe.experts.220.w3", "model.layers.39.block_sparse_moe.experts.221.w3", "model.layers.39.block_sparse_moe.experts.222.w3", "model.layers.39.block_sparse_moe.experts.223.w3", "model.layers.39.block_sparse_moe.experts.224.w3", "model.layers.39.block_sparse_moe.experts.225.w3", "model.layers.39.block_sparse_moe.experts.226.w3", "model.layers.39.block_sparse_moe.experts.227.w3", "model.layers.39.block_sparse_moe.experts.228.w3", "model.layers.39.block_sparse_moe.experts.229.w3", "model.layers.39.block_sparse_moe.experts.230.w3", "model.layers.39.block_sparse_moe.experts.231.w3", "model.layers.39.block_sparse_moe.experts.232.w3", "model.layers.39.block_sparse_moe.experts.233.w3", "model.layers.39.block_sparse_moe.experts.234.w3", "model.layers.39.block_sparse_moe.experts.235.w3", "model.layers.39.block_sparse_moe.experts.236.w3", "model.layers.39.block_sparse_moe.experts.237.w3", "model.layers.39.block_sparse_moe.experts.238.w3", "model.layers.39.block_sparse_moe.experts.239.w3", "model.layers.39.block_sparse_moe.experts.240.w3", "model.layers.39.block_sparse_moe.experts.241.w3", "model.layers.39.block_sparse_moe.experts.242.w3", "model.layers.39.block_sparse_moe.experts.243.w3", "model.layers.39.block_sparse_moe.experts.244.w3", "model.layers.39.block_sparse_moe.experts.245.w3", "model.layers.39.block_sparse_moe.experts.246.w3", "model.layers.39.block_sparse_moe.experts.247.w3", "model.layers.39.block_sparse_moe.experts.248.w3", "model.layers.39.block_sparse_moe.experts.249.w3", "model.layers.39.block_sparse_moe.experts.250.w3", "model.layers.39.block_sparse_moe.experts.251.w3", "model.layers.39.block_sparse_moe.experts.252.w3", "model.layers.39.block_sparse_moe.experts.253.w3", "model.layers.39.block_sparse_moe.experts.254.w3", "model.layers.39.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -2.0060688257217407e-06, "dbits": 2415919104 } ] }, { "idx": 199, "layers": [ "model.layers.39.block_sparse_moe.experts.0.w2", "model.layers.39.block_sparse_moe.experts.1.w2", "model.layers.39.block_sparse_moe.experts.2.w2", "model.layers.39.block_sparse_moe.experts.3.w2", "model.layers.39.block_sparse_moe.experts.4.w2", "model.layers.39.block_sparse_moe.experts.5.w2", "model.layers.39.block_sparse_moe.experts.6.w2", "model.layers.39.block_sparse_moe.experts.7.w2", "model.layers.39.block_sparse_moe.experts.8.w2", "model.layers.39.block_sparse_moe.experts.9.w2", "model.layers.39.block_sparse_moe.experts.10.w2", "model.layers.39.block_sparse_moe.experts.11.w2", "model.layers.39.block_sparse_moe.experts.12.w2", "model.layers.39.block_sparse_moe.experts.13.w2", "model.layers.39.block_sparse_moe.experts.14.w2", "model.layers.39.block_sparse_moe.experts.15.w2", "model.layers.39.block_sparse_moe.experts.16.w2", "model.layers.39.block_sparse_moe.experts.17.w2", "model.layers.39.block_sparse_moe.experts.18.w2", "model.layers.39.block_sparse_moe.experts.19.w2", "model.layers.39.block_sparse_moe.experts.20.w2", "model.layers.39.block_sparse_moe.experts.21.w2", "model.layers.39.block_sparse_moe.experts.22.w2", "model.layers.39.block_sparse_moe.experts.23.w2", "model.layers.39.block_sparse_moe.experts.24.w2", "model.layers.39.block_sparse_moe.experts.25.w2", "model.layers.39.block_sparse_moe.experts.26.w2", "model.layers.39.block_sparse_moe.experts.27.w2", "model.layers.39.block_sparse_moe.experts.28.w2", "model.layers.39.block_sparse_moe.experts.29.w2", "model.layers.39.block_sparse_moe.experts.30.w2", "model.layers.39.block_sparse_moe.experts.31.w2", "model.layers.39.block_sparse_moe.experts.32.w2", "model.layers.39.block_sparse_moe.experts.33.w2", "model.layers.39.block_sparse_moe.experts.34.w2", "model.layers.39.block_sparse_moe.experts.35.w2", "model.layers.39.block_sparse_moe.experts.36.w2", "model.layers.39.block_sparse_moe.experts.37.w2", "model.layers.39.block_sparse_moe.experts.38.w2", "model.layers.39.block_sparse_moe.experts.39.w2", "model.layers.39.block_sparse_moe.experts.40.w2", "model.layers.39.block_sparse_moe.experts.41.w2", "model.layers.39.block_sparse_moe.experts.42.w2", "model.layers.39.block_sparse_moe.experts.43.w2", "model.layers.39.block_sparse_moe.experts.44.w2", "model.layers.39.block_sparse_moe.experts.45.w2", "model.layers.39.block_sparse_moe.experts.46.w2", "model.layers.39.block_sparse_moe.experts.47.w2", "model.layers.39.block_sparse_moe.experts.48.w2", "model.layers.39.block_sparse_moe.experts.49.w2", "model.layers.39.block_sparse_moe.experts.50.w2", "model.layers.39.block_sparse_moe.experts.51.w2", "model.layers.39.block_sparse_moe.experts.52.w2", "model.layers.39.block_sparse_moe.experts.53.w2", "model.layers.39.block_sparse_moe.experts.54.w2", "model.layers.39.block_sparse_moe.experts.55.w2", "model.layers.39.block_sparse_moe.experts.56.w2", "model.layers.39.block_sparse_moe.experts.57.w2", "model.layers.39.block_sparse_moe.experts.58.w2", "model.layers.39.block_sparse_moe.experts.59.w2", "model.layers.39.block_sparse_moe.experts.60.w2", "model.layers.39.block_sparse_moe.experts.61.w2", "model.layers.39.block_sparse_moe.experts.62.w2", "model.layers.39.block_sparse_moe.experts.63.w2", "model.layers.39.block_sparse_moe.experts.64.w2", "model.layers.39.block_sparse_moe.experts.65.w2", "model.layers.39.block_sparse_moe.experts.66.w2", "model.layers.39.block_sparse_moe.experts.67.w2", "model.layers.39.block_sparse_moe.experts.68.w2", "model.layers.39.block_sparse_moe.experts.69.w2", "model.layers.39.block_sparse_moe.experts.70.w2", "model.layers.39.block_sparse_moe.experts.71.w2", "model.layers.39.block_sparse_moe.experts.72.w2", "model.layers.39.block_sparse_moe.experts.73.w2", "model.layers.39.block_sparse_moe.experts.74.w2", "model.layers.39.block_sparse_moe.experts.75.w2", "model.layers.39.block_sparse_moe.experts.76.w2", "model.layers.39.block_sparse_moe.experts.77.w2", "model.layers.39.block_sparse_moe.experts.78.w2", "model.layers.39.block_sparse_moe.experts.79.w2", "model.layers.39.block_sparse_moe.experts.80.w2", "model.layers.39.block_sparse_moe.experts.81.w2", "model.layers.39.block_sparse_moe.experts.82.w2", "model.layers.39.block_sparse_moe.experts.83.w2", "model.layers.39.block_sparse_moe.experts.84.w2", "model.layers.39.block_sparse_moe.experts.85.w2", "model.layers.39.block_sparse_moe.experts.86.w2", "model.layers.39.block_sparse_moe.experts.87.w2", "model.layers.39.block_sparse_moe.experts.88.w2", "model.layers.39.block_sparse_moe.experts.89.w2", "model.layers.39.block_sparse_moe.experts.90.w2", "model.layers.39.block_sparse_moe.experts.91.w2", "model.layers.39.block_sparse_moe.experts.92.w2", "model.layers.39.block_sparse_moe.experts.93.w2", "model.layers.39.block_sparse_moe.experts.94.w2", "model.layers.39.block_sparse_moe.experts.95.w2", "model.layers.39.block_sparse_moe.experts.96.w2", "model.layers.39.block_sparse_moe.experts.97.w2", "model.layers.39.block_sparse_moe.experts.98.w2", "model.layers.39.block_sparse_moe.experts.99.w2", "model.layers.39.block_sparse_moe.experts.100.w2", "model.layers.39.block_sparse_moe.experts.101.w2", "model.layers.39.block_sparse_moe.experts.102.w2", "model.layers.39.block_sparse_moe.experts.103.w2", "model.layers.39.block_sparse_moe.experts.104.w2", "model.layers.39.block_sparse_moe.experts.105.w2", "model.layers.39.block_sparse_moe.experts.106.w2", "model.layers.39.block_sparse_moe.experts.107.w2", "model.layers.39.block_sparse_moe.experts.108.w2", "model.layers.39.block_sparse_moe.experts.109.w2", "model.layers.39.block_sparse_moe.experts.110.w2", "model.layers.39.block_sparse_moe.experts.111.w2", "model.layers.39.block_sparse_moe.experts.112.w2", "model.layers.39.block_sparse_moe.experts.113.w2", "model.layers.39.block_sparse_moe.experts.114.w2", "model.layers.39.block_sparse_moe.experts.115.w2", "model.layers.39.block_sparse_moe.experts.116.w2", "model.layers.39.block_sparse_moe.experts.117.w2", "model.layers.39.block_sparse_moe.experts.118.w2", "model.layers.39.block_sparse_moe.experts.119.w2", "model.layers.39.block_sparse_moe.experts.120.w2", "model.layers.39.block_sparse_moe.experts.121.w2", "model.layers.39.block_sparse_moe.experts.122.w2", "model.layers.39.block_sparse_moe.experts.123.w2", "model.layers.39.block_sparse_moe.experts.124.w2", "model.layers.39.block_sparse_moe.experts.125.w2", "model.layers.39.block_sparse_moe.experts.126.w2", "model.layers.39.block_sparse_moe.experts.127.w2", "model.layers.39.block_sparse_moe.experts.128.w2", "model.layers.39.block_sparse_moe.experts.129.w2", "model.layers.39.block_sparse_moe.experts.130.w2", "model.layers.39.block_sparse_moe.experts.131.w2", "model.layers.39.block_sparse_moe.experts.132.w2", "model.layers.39.block_sparse_moe.experts.133.w2", "model.layers.39.block_sparse_moe.experts.134.w2", "model.layers.39.block_sparse_moe.experts.135.w2", "model.layers.39.block_sparse_moe.experts.136.w2", "model.layers.39.block_sparse_moe.experts.137.w2", "model.layers.39.block_sparse_moe.experts.138.w2", "model.layers.39.block_sparse_moe.experts.139.w2", "model.layers.39.block_sparse_moe.experts.140.w2", "model.layers.39.block_sparse_moe.experts.141.w2", "model.layers.39.block_sparse_moe.experts.142.w2", "model.layers.39.block_sparse_moe.experts.143.w2", "model.layers.39.block_sparse_moe.experts.144.w2", "model.layers.39.block_sparse_moe.experts.145.w2", "model.layers.39.block_sparse_moe.experts.146.w2", "model.layers.39.block_sparse_moe.experts.147.w2", "model.layers.39.block_sparse_moe.experts.148.w2", "model.layers.39.block_sparse_moe.experts.149.w2", "model.layers.39.block_sparse_moe.experts.150.w2", "model.layers.39.block_sparse_moe.experts.151.w2", "model.layers.39.block_sparse_moe.experts.152.w2", "model.layers.39.block_sparse_moe.experts.153.w2", "model.layers.39.block_sparse_moe.experts.154.w2", "model.layers.39.block_sparse_moe.experts.155.w2", "model.layers.39.block_sparse_moe.experts.156.w2", "model.layers.39.block_sparse_moe.experts.157.w2", "model.layers.39.block_sparse_moe.experts.158.w2", "model.layers.39.block_sparse_moe.experts.159.w2", "model.layers.39.block_sparse_moe.experts.160.w2", "model.layers.39.block_sparse_moe.experts.161.w2", "model.layers.39.block_sparse_moe.experts.162.w2", "model.layers.39.block_sparse_moe.experts.163.w2", "model.layers.39.block_sparse_moe.experts.164.w2", "model.layers.39.block_sparse_moe.experts.165.w2", "model.layers.39.block_sparse_moe.experts.166.w2", "model.layers.39.block_sparse_moe.experts.167.w2", "model.layers.39.block_sparse_moe.experts.168.w2", "model.layers.39.block_sparse_moe.experts.169.w2", "model.layers.39.block_sparse_moe.experts.170.w2", "model.layers.39.block_sparse_moe.experts.171.w2", "model.layers.39.block_sparse_moe.experts.172.w2", "model.layers.39.block_sparse_moe.experts.173.w2", "model.layers.39.block_sparse_moe.experts.174.w2", "model.layers.39.block_sparse_moe.experts.175.w2", "model.layers.39.block_sparse_moe.experts.176.w2", "model.layers.39.block_sparse_moe.experts.177.w2", "model.layers.39.block_sparse_moe.experts.178.w2", "model.layers.39.block_sparse_moe.experts.179.w2", "model.layers.39.block_sparse_moe.experts.180.w2", "model.layers.39.block_sparse_moe.experts.181.w2", "model.layers.39.block_sparse_moe.experts.182.w2", "model.layers.39.block_sparse_moe.experts.183.w2", "model.layers.39.block_sparse_moe.experts.184.w2", "model.layers.39.block_sparse_moe.experts.185.w2", "model.layers.39.block_sparse_moe.experts.186.w2", "model.layers.39.block_sparse_moe.experts.187.w2", "model.layers.39.block_sparse_moe.experts.188.w2", "model.layers.39.block_sparse_moe.experts.189.w2", "model.layers.39.block_sparse_moe.experts.190.w2", "model.layers.39.block_sparse_moe.experts.191.w2", "model.layers.39.block_sparse_moe.experts.192.w2", "model.layers.39.block_sparse_moe.experts.193.w2", "model.layers.39.block_sparse_moe.experts.194.w2", "model.layers.39.block_sparse_moe.experts.195.w2", "model.layers.39.block_sparse_moe.experts.196.w2", "model.layers.39.block_sparse_moe.experts.197.w2", "model.layers.39.block_sparse_moe.experts.198.w2", "model.layers.39.block_sparse_moe.experts.199.w2", "model.layers.39.block_sparse_moe.experts.200.w2", "model.layers.39.block_sparse_moe.experts.201.w2", "model.layers.39.block_sparse_moe.experts.202.w2", "model.layers.39.block_sparse_moe.experts.203.w2", "model.layers.39.block_sparse_moe.experts.204.w2", "model.layers.39.block_sparse_moe.experts.205.w2", "model.layers.39.block_sparse_moe.experts.206.w2", "model.layers.39.block_sparse_moe.experts.207.w2", "model.layers.39.block_sparse_moe.experts.208.w2", "model.layers.39.block_sparse_moe.experts.209.w2", "model.layers.39.block_sparse_moe.experts.210.w2", "model.layers.39.block_sparse_moe.experts.211.w2", "model.layers.39.block_sparse_moe.experts.212.w2", "model.layers.39.block_sparse_moe.experts.213.w2", "model.layers.39.block_sparse_moe.experts.214.w2", "model.layers.39.block_sparse_moe.experts.215.w2", "model.layers.39.block_sparse_moe.experts.216.w2", "model.layers.39.block_sparse_moe.experts.217.w2", "model.layers.39.block_sparse_moe.experts.218.w2", "model.layers.39.block_sparse_moe.experts.219.w2", "model.layers.39.block_sparse_moe.experts.220.w2", "model.layers.39.block_sparse_moe.experts.221.w2", "model.layers.39.block_sparse_moe.experts.222.w2", "model.layers.39.block_sparse_moe.experts.223.w2", "model.layers.39.block_sparse_moe.experts.224.w2", "model.layers.39.block_sparse_moe.experts.225.w2", "model.layers.39.block_sparse_moe.experts.226.w2", "model.layers.39.block_sparse_moe.experts.227.w2", "model.layers.39.block_sparse_moe.experts.228.w2", "model.layers.39.block_sparse_moe.experts.229.w2", "model.layers.39.block_sparse_moe.experts.230.w2", "model.layers.39.block_sparse_moe.experts.231.w2", "model.layers.39.block_sparse_moe.experts.232.w2", "model.layers.39.block_sparse_moe.experts.233.w2", "model.layers.39.block_sparse_moe.experts.234.w2", "model.layers.39.block_sparse_moe.experts.235.w2", "model.layers.39.block_sparse_moe.experts.236.w2", "model.layers.39.block_sparse_moe.experts.237.w2", "model.layers.39.block_sparse_moe.experts.238.w2", "model.layers.39.block_sparse_moe.experts.239.w2", "model.layers.39.block_sparse_moe.experts.240.w2", "model.layers.39.block_sparse_moe.experts.241.w2", "model.layers.39.block_sparse_moe.experts.242.w2", "model.layers.39.block_sparse_moe.experts.243.w2", "model.layers.39.block_sparse_moe.experts.244.w2", "model.layers.39.block_sparse_moe.experts.245.w2", "model.layers.39.block_sparse_moe.experts.246.w2", "model.layers.39.block_sparse_moe.experts.247.w2", "model.layers.39.block_sparse_moe.experts.248.w2", "model.layers.39.block_sparse_moe.experts.249.w2", "model.layers.39.block_sparse_moe.experts.250.w2", "model.layers.39.block_sparse_moe.experts.251.w2", "model.layers.39.block_sparse_moe.experts.252.w2", "model.layers.39.block_sparse_moe.experts.253.w2", "model.layers.39.block_sparse_moe.experts.254.w2", "model.layers.39.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -9.717270731929295e-05, "dbits": 1207959552 } ] }, { "idx": 200, "layers": [ "model.layers.40.self_attn.q_proj" ], "candidates": [ { "dkld": -0.003822748363018047, "dbits": 18874368 } ] }, { "idx": 201, "layers": [ "model.layers.40.self_attn.k_proj", "model.layers.40.self_attn.v_proj" ], "candidates": [ { "dkld": -0.004944877699017525, "dbits": 6291456 } ] }, { "idx": 202, "layers": [ "model.layers.40.self_attn.o_proj" ], "candidates": [ { "dkld": -0.007293665409088157, "dbits": 18874368 } ] }, { "idx": 203, "layers": [ "model.layers.40.block_sparse_moe.experts.0.w1", "model.layers.40.block_sparse_moe.experts.1.w1", "model.layers.40.block_sparse_moe.experts.2.w1", "model.layers.40.block_sparse_moe.experts.3.w1", "model.layers.40.block_sparse_moe.experts.4.w1", "model.layers.40.block_sparse_moe.experts.5.w1", "model.layers.40.block_sparse_moe.experts.6.w1", "model.layers.40.block_sparse_moe.experts.7.w1", "model.layers.40.block_sparse_moe.experts.8.w1", "model.layers.40.block_sparse_moe.experts.9.w1", "model.layers.40.block_sparse_moe.experts.10.w1", "model.layers.40.block_sparse_moe.experts.11.w1", "model.layers.40.block_sparse_moe.experts.12.w1", "model.layers.40.block_sparse_moe.experts.13.w1", "model.layers.40.block_sparse_moe.experts.14.w1", "model.layers.40.block_sparse_moe.experts.15.w1", "model.layers.40.block_sparse_moe.experts.16.w1", "model.layers.40.block_sparse_moe.experts.17.w1", "model.layers.40.block_sparse_moe.experts.18.w1", "model.layers.40.block_sparse_moe.experts.19.w1", "model.layers.40.block_sparse_moe.experts.20.w1", "model.layers.40.block_sparse_moe.experts.21.w1", "model.layers.40.block_sparse_moe.experts.22.w1", "model.layers.40.block_sparse_moe.experts.23.w1", "model.layers.40.block_sparse_moe.experts.24.w1", "model.layers.40.block_sparse_moe.experts.25.w1", "model.layers.40.block_sparse_moe.experts.26.w1", "model.layers.40.block_sparse_moe.experts.27.w1", "model.layers.40.block_sparse_moe.experts.28.w1", "model.layers.40.block_sparse_moe.experts.29.w1", "model.layers.40.block_sparse_moe.experts.30.w1", "model.layers.40.block_sparse_moe.experts.31.w1", "model.layers.40.block_sparse_moe.experts.32.w1", "model.layers.40.block_sparse_moe.experts.33.w1", "model.layers.40.block_sparse_moe.experts.34.w1", "model.layers.40.block_sparse_moe.experts.35.w1", "model.layers.40.block_sparse_moe.experts.36.w1", "model.layers.40.block_sparse_moe.experts.37.w1", "model.layers.40.block_sparse_moe.experts.38.w1", "model.layers.40.block_sparse_moe.experts.39.w1", "model.layers.40.block_sparse_moe.experts.40.w1", "model.layers.40.block_sparse_moe.experts.41.w1", "model.layers.40.block_sparse_moe.experts.42.w1", "model.layers.40.block_sparse_moe.experts.43.w1", "model.layers.40.block_sparse_moe.experts.44.w1", "model.layers.40.block_sparse_moe.experts.45.w1", "model.layers.40.block_sparse_moe.experts.46.w1", "model.layers.40.block_sparse_moe.experts.47.w1", "model.layers.40.block_sparse_moe.experts.48.w1", "model.layers.40.block_sparse_moe.experts.49.w1", "model.layers.40.block_sparse_moe.experts.50.w1", "model.layers.40.block_sparse_moe.experts.51.w1", "model.layers.40.block_sparse_moe.experts.52.w1", "model.layers.40.block_sparse_moe.experts.53.w1", "model.layers.40.block_sparse_moe.experts.54.w1", "model.layers.40.block_sparse_moe.experts.55.w1", "model.layers.40.block_sparse_moe.experts.56.w1", "model.layers.40.block_sparse_moe.experts.57.w1", "model.layers.40.block_sparse_moe.experts.58.w1", "model.layers.40.block_sparse_moe.experts.59.w1", "model.layers.40.block_sparse_moe.experts.60.w1", "model.layers.40.block_sparse_moe.experts.61.w1", "model.layers.40.block_sparse_moe.experts.62.w1", "model.layers.40.block_sparse_moe.experts.63.w1", "model.layers.40.block_sparse_moe.experts.64.w1", "model.layers.40.block_sparse_moe.experts.65.w1", "model.layers.40.block_sparse_moe.experts.66.w1", "model.layers.40.block_sparse_moe.experts.67.w1", "model.layers.40.block_sparse_moe.experts.68.w1", "model.layers.40.block_sparse_moe.experts.69.w1", "model.layers.40.block_sparse_moe.experts.70.w1", "model.layers.40.block_sparse_moe.experts.71.w1", "model.layers.40.block_sparse_moe.experts.72.w1", "model.layers.40.block_sparse_moe.experts.73.w1", "model.layers.40.block_sparse_moe.experts.74.w1", "model.layers.40.block_sparse_moe.experts.75.w1", "model.layers.40.block_sparse_moe.experts.76.w1", "model.layers.40.block_sparse_moe.experts.77.w1", "model.layers.40.block_sparse_moe.experts.78.w1", "model.layers.40.block_sparse_moe.experts.79.w1", "model.layers.40.block_sparse_moe.experts.80.w1", "model.layers.40.block_sparse_moe.experts.81.w1", "model.layers.40.block_sparse_moe.experts.82.w1", "model.layers.40.block_sparse_moe.experts.83.w1", "model.layers.40.block_sparse_moe.experts.84.w1", "model.layers.40.block_sparse_moe.experts.85.w1", "model.layers.40.block_sparse_moe.experts.86.w1", "model.layers.40.block_sparse_moe.experts.87.w1", "model.layers.40.block_sparse_moe.experts.88.w1", "model.layers.40.block_sparse_moe.experts.89.w1", "model.layers.40.block_sparse_moe.experts.90.w1", "model.layers.40.block_sparse_moe.experts.91.w1", "model.layers.40.block_sparse_moe.experts.92.w1", "model.layers.40.block_sparse_moe.experts.93.w1", "model.layers.40.block_sparse_moe.experts.94.w1", "model.layers.40.block_sparse_moe.experts.95.w1", "model.layers.40.block_sparse_moe.experts.96.w1", "model.layers.40.block_sparse_moe.experts.97.w1", "model.layers.40.block_sparse_moe.experts.98.w1", "model.layers.40.block_sparse_moe.experts.99.w1", "model.layers.40.block_sparse_moe.experts.100.w1", "model.layers.40.block_sparse_moe.experts.101.w1", "model.layers.40.block_sparse_moe.experts.102.w1", "model.layers.40.block_sparse_moe.experts.103.w1", "model.layers.40.block_sparse_moe.experts.104.w1", "model.layers.40.block_sparse_moe.experts.105.w1", "model.layers.40.block_sparse_moe.experts.106.w1", "model.layers.40.block_sparse_moe.experts.107.w1", "model.layers.40.block_sparse_moe.experts.108.w1", "model.layers.40.block_sparse_moe.experts.109.w1", "model.layers.40.block_sparse_moe.experts.110.w1", "model.layers.40.block_sparse_moe.experts.111.w1", "model.layers.40.block_sparse_moe.experts.112.w1", "model.layers.40.block_sparse_moe.experts.113.w1", "model.layers.40.block_sparse_moe.experts.114.w1", "model.layers.40.block_sparse_moe.experts.115.w1", "model.layers.40.block_sparse_moe.experts.116.w1", "model.layers.40.block_sparse_moe.experts.117.w1", "model.layers.40.block_sparse_moe.experts.118.w1", "model.layers.40.block_sparse_moe.experts.119.w1", "model.layers.40.block_sparse_moe.experts.120.w1", "model.layers.40.block_sparse_moe.experts.121.w1", "model.layers.40.block_sparse_moe.experts.122.w1", "model.layers.40.block_sparse_moe.experts.123.w1", "model.layers.40.block_sparse_moe.experts.124.w1", "model.layers.40.block_sparse_moe.experts.125.w1", "model.layers.40.block_sparse_moe.experts.126.w1", "model.layers.40.block_sparse_moe.experts.127.w1", "model.layers.40.block_sparse_moe.experts.128.w1", "model.layers.40.block_sparse_moe.experts.129.w1", "model.layers.40.block_sparse_moe.experts.130.w1", "model.layers.40.block_sparse_moe.experts.131.w1", "model.layers.40.block_sparse_moe.experts.132.w1", "model.layers.40.block_sparse_moe.experts.133.w1", "model.layers.40.block_sparse_moe.experts.134.w1", "model.layers.40.block_sparse_moe.experts.135.w1", "model.layers.40.block_sparse_moe.experts.136.w1", "model.layers.40.block_sparse_moe.experts.137.w1", "model.layers.40.block_sparse_moe.experts.138.w1", "model.layers.40.block_sparse_moe.experts.139.w1", "model.layers.40.block_sparse_moe.experts.140.w1", "model.layers.40.block_sparse_moe.experts.141.w1", "model.layers.40.block_sparse_moe.experts.142.w1", "model.layers.40.block_sparse_moe.experts.143.w1", "model.layers.40.block_sparse_moe.experts.144.w1", "model.layers.40.block_sparse_moe.experts.145.w1", "model.layers.40.block_sparse_moe.experts.146.w1", "model.layers.40.block_sparse_moe.experts.147.w1", "model.layers.40.block_sparse_moe.experts.148.w1", "model.layers.40.block_sparse_moe.experts.149.w1", "model.layers.40.block_sparse_moe.experts.150.w1", "model.layers.40.block_sparse_moe.experts.151.w1", "model.layers.40.block_sparse_moe.experts.152.w1", "model.layers.40.block_sparse_moe.experts.153.w1", "model.layers.40.block_sparse_moe.experts.154.w1", "model.layers.40.block_sparse_moe.experts.155.w1", "model.layers.40.block_sparse_moe.experts.156.w1", "model.layers.40.block_sparse_moe.experts.157.w1", "model.layers.40.block_sparse_moe.experts.158.w1", "model.layers.40.block_sparse_moe.experts.159.w1", "model.layers.40.block_sparse_moe.experts.160.w1", "model.layers.40.block_sparse_moe.experts.161.w1", "model.layers.40.block_sparse_moe.experts.162.w1", "model.layers.40.block_sparse_moe.experts.163.w1", "model.layers.40.block_sparse_moe.experts.164.w1", "model.layers.40.block_sparse_moe.experts.165.w1", "model.layers.40.block_sparse_moe.experts.166.w1", "model.layers.40.block_sparse_moe.experts.167.w1", "model.layers.40.block_sparse_moe.experts.168.w1", "model.layers.40.block_sparse_moe.experts.169.w1", "model.layers.40.block_sparse_moe.experts.170.w1", "model.layers.40.block_sparse_moe.experts.171.w1", "model.layers.40.block_sparse_moe.experts.172.w1", "model.layers.40.block_sparse_moe.experts.173.w1", "model.layers.40.block_sparse_moe.experts.174.w1", "model.layers.40.block_sparse_moe.experts.175.w1", "model.layers.40.block_sparse_moe.experts.176.w1", "model.layers.40.block_sparse_moe.experts.177.w1", "model.layers.40.block_sparse_moe.experts.178.w1", "model.layers.40.block_sparse_moe.experts.179.w1", "model.layers.40.block_sparse_moe.experts.180.w1", "model.layers.40.block_sparse_moe.experts.181.w1", "model.layers.40.block_sparse_moe.experts.182.w1", "model.layers.40.block_sparse_moe.experts.183.w1", "model.layers.40.block_sparse_moe.experts.184.w1", "model.layers.40.block_sparse_moe.experts.185.w1", "model.layers.40.block_sparse_moe.experts.186.w1", "model.layers.40.block_sparse_moe.experts.187.w1", "model.layers.40.block_sparse_moe.experts.188.w1", "model.layers.40.block_sparse_moe.experts.189.w1", "model.layers.40.block_sparse_moe.experts.190.w1", "model.layers.40.block_sparse_moe.experts.191.w1", "model.layers.40.block_sparse_moe.experts.192.w1", "model.layers.40.block_sparse_moe.experts.193.w1", "model.layers.40.block_sparse_moe.experts.194.w1", "model.layers.40.block_sparse_moe.experts.195.w1", "model.layers.40.block_sparse_moe.experts.196.w1", "model.layers.40.block_sparse_moe.experts.197.w1", "model.layers.40.block_sparse_moe.experts.198.w1", "model.layers.40.block_sparse_moe.experts.199.w1", "model.layers.40.block_sparse_moe.experts.200.w1", "model.layers.40.block_sparse_moe.experts.201.w1", "model.layers.40.block_sparse_moe.experts.202.w1", "model.layers.40.block_sparse_moe.experts.203.w1", "model.layers.40.block_sparse_moe.experts.204.w1", "model.layers.40.block_sparse_moe.experts.205.w1", "model.layers.40.block_sparse_moe.experts.206.w1", "model.layers.40.block_sparse_moe.experts.207.w1", "model.layers.40.block_sparse_moe.experts.208.w1", "model.layers.40.block_sparse_moe.experts.209.w1", "model.layers.40.block_sparse_moe.experts.210.w1", "model.layers.40.block_sparse_moe.experts.211.w1", "model.layers.40.block_sparse_moe.experts.212.w1", "model.layers.40.block_sparse_moe.experts.213.w1", "model.layers.40.block_sparse_moe.experts.214.w1", "model.layers.40.block_sparse_moe.experts.215.w1", "model.layers.40.block_sparse_moe.experts.216.w1", "model.layers.40.block_sparse_moe.experts.217.w1", "model.layers.40.block_sparse_moe.experts.218.w1", "model.layers.40.block_sparse_moe.experts.219.w1", "model.layers.40.block_sparse_moe.experts.220.w1", "model.layers.40.block_sparse_moe.experts.221.w1", "model.layers.40.block_sparse_moe.experts.222.w1", "model.layers.40.block_sparse_moe.experts.223.w1", "model.layers.40.block_sparse_moe.experts.224.w1", "model.layers.40.block_sparse_moe.experts.225.w1", "model.layers.40.block_sparse_moe.experts.226.w1", "model.layers.40.block_sparse_moe.experts.227.w1", "model.layers.40.block_sparse_moe.experts.228.w1", "model.layers.40.block_sparse_moe.experts.229.w1", "model.layers.40.block_sparse_moe.experts.230.w1", "model.layers.40.block_sparse_moe.experts.231.w1", "model.layers.40.block_sparse_moe.experts.232.w1", "model.layers.40.block_sparse_moe.experts.233.w1", "model.layers.40.block_sparse_moe.experts.234.w1", "model.layers.40.block_sparse_moe.experts.235.w1", "model.layers.40.block_sparse_moe.experts.236.w1", "model.layers.40.block_sparse_moe.experts.237.w1", "model.layers.40.block_sparse_moe.experts.238.w1", "model.layers.40.block_sparse_moe.experts.239.w1", "model.layers.40.block_sparse_moe.experts.240.w1", "model.layers.40.block_sparse_moe.experts.241.w1", "model.layers.40.block_sparse_moe.experts.242.w1", "model.layers.40.block_sparse_moe.experts.243.w1", "model.layers.40.block_sparse_moe.experts.244.w1", "model.layers.40.block_sparse_moe.experts.245.w1", "model.layers.40.block_sparse_moe.experts.246.w1", "model.layers.40.block_sparse_moe.experts.247.w1", "model.layers.40.block_sparse_moe.experts.248.w1", "model.layers.40.block_sparse_moe.experts.249.w1", "model.layers.40.block_sparse_moe.experts.250.w1", "model.layers.40.block_sparse_moe.experts.251.w1", "model.layers.40.block_sparse_moe.experts.252.w1", "model.layers.40.block_sparse_moe.experts.253.w1", "model.layers.40.block_sparse_moe.experts.254.w1", "model.layers.40.block_sparse_moe.experts.255.w1", "model.layers.40.block_sparse_moe.experts.0.w3", "model.layers.40.block_sparse_moe.experts.1.w3", "model.layers.40.block_sparse_moe.experts.2.w3", "model.layers.40.block_sparse_moe.experts.3.w3", "model.layers.40.block_sparse_moe.experts.4.w3", "model.layers.40.block_sparse_moe.experts.5.w3", "model.layers.40.block_sparse_moe.experts.6.w3", "model.layers.40.block_sparse_moe.experts.7.w3", "model.layers.40.block_sparse_moe.experts.8.w3", "model.layers.40.block_sparse_moe.experts.9.w3", "model.layers.40.block_sparse_moe.experts.10.w3", "model.layers.40.block_sparse_moe.experts.11.w3", "model.layers.40.block_sparse_moe.experts.12.w3", "model.layers.40.block_sparse_moe.experts.13.w3", "model.layers.40.block_sparse_moe.experts.14.w3", "model.layers.40.block_sparse_moe.experts.15.w3", "model.layers.40.block_sparse_moe.experts.16.w3", "model.layers.40.block_sparse_moe.experts.17.w3", "model.layers.40.block_sparse_moe.experts.18.w3", "model.layers.40.block_sparse_moe.experts.19.w3", "model.layers.40.block_sparse_moe.experts.20.w3", "model.layers.40.block_sparse_moe.experts.21.w3", "model.layers.40.block_sparse_moe.experts.22.w3", "model.layers.40.block_sparse_moe.experts.23.w3", "model.layers.40.block_sparse_moe.experts.24.w3", "model.layers.40.block_sparse_moe.experts.25.w3", "model.layers.40.block_sparse_moe.experts.26.w3", "model.layers.40.block_sparse_moe.experts.27.w3", "model.layers.40.block_sparse_moe.experts.28.w3", "model.layers.40.block_sparse_moe.experts.29.w3", "model.layers.40.block_sparse_moe.experts.30.w3", "model.layers.40.block_sparse_moe.experts.31.w3", "model.layers.40.block_sparse_moe.experts.32.w3", "model.layers.40.block_sparse_moe.experts.33.w3", "model.layers.40.block_sparse_moe.experts.34.w3", "model.layers.40.block_sparse_moe.experts.35.w3", "model.layers.40.block_sparse_moe.experts.36.w3", "model.layers.40.block_sparse_moe.experts.37.w3", "model.layers.40.block_sparse_moe.experts.38.w3", "model.layers.40.block_sparse_moe.experts.39.w3", "model.layers.40.block_sparse_moe.experts.40.w3", "model.layers.40.block_sparse_moe.experts.41.w3", "model.layers.40.block_sparse_moe.experts.42.w3", "model.layers.40.block_sparse_moe.experts.43.w3", "model.layers.40.block_sparse_moe.experts.44.w3", "model.layers.40.block_sparse_moe.experts.45.w3", "model.layers.40.block_sparse_moe.experts.46.w3", "model.layers.40.block_sparse_moe.experts.47.w3", "model.layers.40.block_sparse_moe.experts.48.w3", "model.layers.40.block_sparse_moe.experts.49.w3", "model.layers.40.block_sparse_moe.experts.50.w3", "model.layers.40.block_sparse_moe.experts.51.w3", "model.layers.40.block_sparse_moe.experts.52.w3", "model.layers.40.block_sparse_moe.experts.53.w3", "model.layers.40.block_sparse_moe.experts.54.w3", "model.layers.40.block_sparse_moe.experts.55.w3", "model.layers.40.block_sparse_moe.experts.56.w3", "model.layers.40.block_sparse_moe.experts.57.w3", "model.layers.40.block_sparse_moe.experts.58.w3", "model.layers.40.block_sparse_moe.experts.59.w3", "model.layers.40.block_sparse_moe.experts.60.w3", "model.layers.40.block_sparse_moe.experts.61.w3", "model.layers.40.block_sparse_moe.experts.62.w3", "model.layers.40.block_sparse_moe.experts.63.w3", "model.layers.40.block_sparse_moe.experts.64.w3", "model.layers.40.block_sparse_moe.experts.65.w3", "model.layers.40.block_sparse_moe.experts.66.w3", "model.layers.40.block_sparse_moe.experts.67.w3", "model.layers.40.block_sparse_moe.experts.68.w3", "model.layers.40.block_sparse_moe.experts.69.w3", "model.layers.40.block_sparse_moe.experts.70.w3", "model.layers.40.block_sparse_moe.experts.71.w3", "model.layers.40.block_sparse_moe.experts.72.w3", "model.layers.40.block_sparse_moe.experts.73.w3", "model.layers.40.block_sparse_moe.experts.74.w3", "model.layers.40.block_sparse_moe.experts.75.w3", "model.layers.40.block_sparse_moe.experts.76.w3", "model.layers.40.block_sparse_moe.experts.77.w3", "model.layers.40.block_sparse_moe.experts.78.w3", "model.layers.40.block_sparse_moe.experts.79.w3", "model.layers.40.block_sparse_moe.experts.80.w3", "model.layers.40.block_sparse_moe.experts.81.w3", "model.layers.40.block_sparse_moe.experts.82.w3", "model.layers.40.block_sparse_moe.experts.83.w3", "model.layers.40.block_sparse_moe.experts.84.w3", "model.layers.40.block_sparse_moe.experts.85.w3", "model.layers.40.block_sparse_moe.experts.86.w3", "model.layers.40.block_sparse_moe.experts.87.w3", "model.layers.40.block_sparse_moe.experts.88.w3", "model.layers.40.block_sparse_moe.experts.89.w3", "model.layers.40.block_sparse_moe.experts.90.w3", "model.layers.40.block_sparse_moe.experts.91.w3", "model.layers.40.block_sparse_moe.experts.92.w3", "model.layers.40.block_sparse_moe.experts.93.w3", "model.layers.40.block_sparse_moe.experts.94.w3", "model.layers.40.block_sparse_moe.experts.95.w3", "model.layers.40.block_sparse_moe.experts.96.w3", "model.layers.40.block_sparse_moe.experts.97.w3", "model.layers.40.block_sparse_moe.experts.98.w3", "model.layers.40.block_sparse_moe.experts.99.w3", "model.layers.40.block_sparse_moe.experts.100.w3", "model.layers.40.block_sparse_moe.experts.101.w3", "model.layers.40.block_sparse_moe.experts.102.w3", "model.layers.40.block_sparse_moe.experts.103.w3", "model.layers.40.block_sparse_moe.experts.104.w3", "model.layers.40.block_sparse_moe.experts.105.w3", "model.layers.40.block_sparse_moe.experts.106.w3", "model.layers.40.block_sparse_moe.experts.107.w3", "model.layers.40.block_sparse_moe.experts.108.w3", "model.layers.40.block_sparse_moe.experts.109.w3", "model.layers.40.block_sparse_moe.experts.110.w3", "model.layers.40.block_sparse_moe.experts.111.w3", "model.layers.40.block_sparse_moe.experts.112.w3", "model.layers.40.block_sparse_moe.experts.113.w3", "model.layers.40.block_sparse_moe.experts.114.w3", "model.layers.40.block_sparse_moe.experts.115.w3", "model.layers.40.block_sparse_moe.experts.116.w3", "model.layers.40.block_sparse_moe.experts.117.w3", "model.layers.40.block_sparse_moe.experts.118.w3", "model.layers.40.block_sparse_moe.experts.119.w3", "model.layers.40.block_sparse_moe.experts.120.w3", "model.layers.40.block_sparse_moe.experts.121.w3", "model.layers.40.block_sparse_moe.experts.122.w3", "model.layers.40.block_sparse_moe.experts.123.w3", "model.layers.40.block_sparse_moe.experts.124.w3", "model.layers.40.block_sparse_moe.experts.125.w3", "model.layers.40.block_sparse_moe.experts.126.w3", "model.layers.40.block_sparse_moe.experts.127.w3", "model.layers.40.block_sparse_moe.experts.128.w3", "model.layers.40.block_sparse_moe.experts.129.w3", "model.layers.40.block_sparse_moe.experts.130.w3", "model.layers.40.block_sparse_moe.experts.131.w3", "model.layers.40.block_sparse_moe.experts.132.w3", "model.layers.40.block_sparse_moe.experts.133.w3", "model.layers.40.block_sparse_moe.experts.134.w3", "model.layers.40.block_sparse_moe.experts.135.w3", "model.layers.40.block_sparse_moe.experts.136.w3", "model.layers.40.block_sparse_moe.experts.137.w3", "model.layers.40.block_sparse_moe.experts.138.w3", "model.layers.40.block_sparse_moe.experts.139.w3", "model.layers.40.block_sparse_moe.experts.140.w3", "model.layers.40.block_sparse_moe.experts.141.w3", "model.layers.40.block_sparse_moe.experts.142.w3", "model.layers.40.block_sparse_moe.experts.143.w3", "model.layers.40.block_sparse_moe.experts.144.w3", "model.layers.40.block_sparse_moe.experts.145.w3", "model.layers.40.block_sparse_moe.experts.146.w3", "model.layers.40.block_sparse_moe.experts.147.w3", "model.layers.40.block_sparse_moe.experts.148.w3", "model.layers.40.block_sparse_moe.experts.149.w3", "model.layers.40.block_sparse_moe.experts.150.w3", "model.layers.40.block_sparse_moe.experts.151.w3", "model.layers.40.block_sparse_moe.experts.152.w3", "model.layers.40.block_sparse_moe.experts.153.w3", "model.layers.40.block_sparse_moe.experts.154.w3", "model.layers.40.block_sparse_moe.experts.155.w3", "model.layers.40.block_sparse_moe.experts.156.w3", "model.layers.40.block_sparse_moe.experts.157.w3", "model.layers.40.block_sparse_moe.experts.158.w3", "model.layers.40.block_sparse_moe.experts.159.w3", "model.layers.40.block_sparse_moe.experts.160.w3", "model.layers.40.block_sparse_moe.experts.161.w3", "model.layers.40.block_sparse_moe.experts.162.w3", "model.layers.40.block_sparse_moe.experts.163.w3", "model.layers.40.block_sparse_moe.experts.164.w3", "model.layers.40.block_sparse_moe.experts.165.w3", "model.layers.40.block_sparse_moe.experts.166.w3", "model.layers.40.block_sparse_moe.experts.167.w3", "model.layers.40.block_sparse_moe.experts.168.w3", "model.layers.40.block_sparse_moe.experts.169.w3", "model.layers.40.block_sparse_moe.experts.170.w3", "model.layers.40.block_sparse_moe.experts.171.w3", "model.layers.40.block_sparse_moe.experts.172.w3", "model.layers.40.block_sparse_moe.experts.173.w3", "model.layers.40.block_sparse_moe.experts.174.w3", "model.layers.40.block_sparse_moe.experts.175.w3", "model.layers.40.block_sparse_moe.experts.176.w3", "model.layers.40.block_sparse_moe.experts.177.w3", "model.layers.40.block_sparse_moe.experts.178.w3", "model.layers.40.block_sparse_moe.experts.179.w3", "model.layers.40.block_sparse_moe.experts.180.w3", "model.layers.40.block_sparse_moe.experts.181.w3", "model.layers.40.block_sparse_moe.experts.182.w3", "model.layers.40.block_sparse_moe.experts.183.w3", "model.layers.40.block_sparse_moe.experts.184.w3", "model.layers.40.block_sparse_moe.experts.185.w3", "model.layers.40.block_sparse_moe.experts.186.w3", "model.layers.40.block_sparse_moe.experts.187.w3", "model.layers.40.block_sparse_moe.experts.188.w3", "model.layers.40.block_sparse_moe.experts.189.w3", "model.layers.40.block_sparse_moe.experts.190.w3", "model.layers.40.block_sparse_moe.experts.191.w3", "model.layers.40.block_sparse_moe.experts.192.w3", "model.layers.40.block_sparse_moe.experts.193.w3", "model.layers.40.block_sparse_moe.experts.194.w3", "model.layers.40.block_sparse_moe.experts.195.w3", "model.layers.40.block_sparse_moe.experts.196.w3", "model.layers.40.block_sparse_moe.experts.197.w3", "model.layers.40.block_sparse_moe.experts.198.w3", "model.layers.40.block_sparse_moe.experts.199.w3", "model.layers.40.block_sparse_moe.experts.200.w3", "model.layers.40.block_sparse_moe.experts.201.w3", "model.layers.40.block_sparse_moe.experts.202.w3", "model.layers.40.block_sparse_moe.experts.203.w3", "model.layers.40.block_sparse_moe.experts.204.w3", "model.layers.40.block_sparse_moe.experts.205.w3", "model.layers.40.block_sparse_moe.experts.206.w3", "model.layers.40.block_sparse_moe.experts.207.w3", "model.layers.40.block_sparse_moe.experts.208.w3", "model.layers.40.block_sparse_moe.experts.209.w3", "model.layers.40.block_sparse_moe.experts.210.w3", "model.layers.40.block_sparse_moe.experts.211.w3", "model.layers.40.block_sparse_moe.experts.212.w3", "model.layers.40.block_sparse_moe.experts.213.w3", "model.layers.40.block_sparse_moe.experts.214.w3", "model.layers.40.block_sparse_moe.experts.215.w3", "model.layers.40.block_sparse_moe.experts.216.w3", "model.layers.40.block_sparse_moe.experts.217.w3", "model.layers.40.block_sparse_moe.experts.218.w3", "model.layers.40.block_sparse_moe.experts.219.w3", "model.layers.40.block_sparse_moe.experts.220.w3", "model.layers.40.block_sparse_moe.experts.221.w3", "model.layers.40.block_sparse_moe.experts.222.w3", "model.layers.40.block_sparse_moe.experts.223.w3", "model.layers.40.block_sparse_moe.experts.224.w3", "model.layers.40.block_sparse_moe.experts.225.w3", "model.layers.40.block_sparse_moe.experts.226.w3", "model.layers.40.block_sparse_moe.experts.227.w3", "model.layers.40.block_sparse_moe.experts.228.w3", "model.layers.40.block_sparse_moe.experts.229.w3", "model.layers.40.block_sparse_moe.experts.230.w3", "model.layers.40.block_sparse_moe.experts.231.w3", "model.layers.40.block_sparse_moe.experts.232.w3", "model.layers.40.block_sparse_moe.experts.233.w3", "model.layers.40.block_sparse_moe.experts.234.w3", "model.layers.40.block_sparse_moe.experts.235.w3", "model.layers.40.block_sparse_moe.experts.236.w3", "model.layers.40.block_sparse_moe.experts.237.w3", "model.layers.40.block_sparse_moe.experts.238.w3", "model.layers.40.block_sparse_moe.experts.239.w3", "model.layers.40.block_sparse_moe.experts.240.w3", "model.layers.40.block_sparse_moe.experts.241.w3", "model.layers.40.block_sparse_moe.experts.242.w3", "model.layers.40.block_sparse_moe.experts.243.w3", "model.layers.40.block_sparse_moe.experts.244.w3", "model.layers.40.block_sparse_moe.experts.245.w3", "model.layers.40.block_sparse_moe.experts.246.w3", "model.layers.40.block_sparse_moe.experts.247.w3", "model.layers.40.block_sparse_moe.experts.248.w3", "model.layers.40.block_sparse_moe.experts.249.w3", "model.layers.40.block_sparse_moe.experts.250.w3", "model.layers.40.block_sparse_moe.experts.251.w3", "model.layers.40.block_sparse_moe.experts.252.w3", "model.layers.40.block_sparse_moe.experts.253.w3", "model.layers.40.block_sparse_moe.experts.254.w3", "model.layers.40.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0009531855583190807, "dbits": 2415919104 } ] }, { "idx": 204, "layers": [ "model.layers.40.block_sparse_moe.experts.0.w2", "model.layers.40.block_sparse_moe.experts.1.w2", "model.layers.40.block_sparse_moe.experts.2.w2", "model.layers.40.block_sparse_moe.experts.3.w2", "model.layers.40.block_sparse_moe.experts.4.w2", "model.layers.40.block_sparse_moe.experts.5.w2", "model.layers.40.block_sparse_moe.experts.6.w2", "model.layers.40.block_sparse_moe.experts.7.w2", "model.layers.40.block_sparse_moe.experts.8.w2", "model.layers.40.block_sparse_moe.experts.9.w2", "model.layers.40.block_sparse_moe.experts.10.w2", "model.layers.40.block_sparse_moe.experts.11.w2", "model.layers.40.block_sparse_moe.experts.12.w2", "model.layers.40.block_sparse_moe.experts.13.w2", "model.layers.40.block_sparse_moe.experts.14.w2", "model.layers.40.block_sparse_moe.experts.15.w2", "model.layers.40.block_sparse_moe.experts.16.w2", "model.layers.40.block_sparse_moe.experts.17.w2", "model.layers.40.block_sparse_moe.experts.18.w2", "model.layers.40.block_sparse_moe.experts.19.w2", "model.layers.40.block_sparse_moe.experts.20.w2", "model.layers.40.block_sparse_moe.experts.21.w2", "model.layers.40.block_sparse_moe.experts.22.w2", "model.layers.40.block_sparse_moe.experts.23.w2", "model.layers.40.block_sparse_moe.experts.24.w2", "model.layers.40.block_sparse_moe.experts.25.w2", "model.layers.40.block_sparse_moe.experts.26.w2", "model.layers.40.block_sparse_moe.experts.27.w2", "model.layers.40.block_sparse_moe.experts.28.w2", "model.layers.40.block_sparse_moe.experts.29.w2", "model.layers.40.block_sparse_moe.experts.30.w2", "model.layers.40.block_sparse_moe.experts.31.w2", "model.layers.40.block_sparse_moe.experts.32.w2", "model.layers.40.block_sparse_moe.experts.33.w2", "model.layers.40.block_sparse_moe.experts.34.w2", "model.layers.40.block_sparse_moe.experts.35.w2", "model.layers.40.block_sparse_moe.experts.36.w2", "model.layers.40.block_sparse_moe.experts.37.w2", "model.layers.40.block_sparse_moe.experts.38.w2", "model.layers.40.block_sparse_moe.experts.39.w2", "model.layers.40.block_sparse_moe.experts.40.w2", "model.layers.40.block_sparse_moe.experts.41.w2", "model.layers.40.block_sparse_moe.experts.42.w2", "model.layers.40.block_sparse_moe.experts.43.w2", "model.layers.40.block_sparse_moe.experts.44.w2", "model.layers.40.block_sparse_moe.experts.45.w2", "model.layers.40.block_sparse_moe.experts.46.w2", "model.layers.40.block_sparse_moe.experts.47.w2", "model.layers.40.block_sparse_moe.experts.48.w2", "model.layers.40.block_sparse_moe.experts.49.w2", "model.layers.40.block_sparse_moe.experts.50.w2", "model.layers.40.block_sparse_moe.experts.51.w2", "model.layers.40.block_sparse_moe.experts.52.w2", "model.layers.40.block_sparse_moe.experts.53.w2", "model.layers.40.block_sparse_moe.experts.54.w2", "model.layers.40.block_sparse_moe.experts.55.w2", "model.layers.40.block_sparse_moe.experts.56.w2", "model.layers.40.block_sparse_moe.experts.57.w2", "model.layers.40.block_sparse_moe.experts.58.w2", "model.layers.40.block_sparse_moe.experts.59.w2", "model.layers.40.block_sparse_moe.experts.60.w2", "model.layers.40.block_sparse_moe.experts.61.w2", "model.layers.40.block_sparse_moe.experts.62.w2", "model.layers.40.block_sparse_moe.experts.63.w2", "model.layers.40.block_sparse_moe.experts.64.w2", "model.layers.40.block_sparse_moe.experts.65.w2", "model.layers.40.block_sparse_moe.experts.66.w2", "model.layers.40.block_sparse_moe.experts.67.w2", "model.layers.40.block_sparse_moe.experts.68.w2", "model.layers.40.block_sparse_moe.experts.69.w2", "model.layers.40.block_sparse_moe.experts.70.w2", "model.layers.40.block_sparse_moe.experts.71.w2", "model.layers.40.block_sparse_moe.experts.72.w2", "model.layers.40.block_sparse_moe.experts.73.w2", "model.layers.40.block_sparse_moe.experts.74.w2", "model.layers.40.block_sparse_moe.experts.75.w2", "model.layers.40.block_sparse_moe.experts.76.w2", "model.layers.40.block_sparse_moe.experts.77.w2", "model.layers.40.block_sparse_moe.experts.78.w2", "model.layers.40.block_sparse_moe.experts.79.w2", "model.layers.40.block_sparse_moe.experts.80.w2", "model.layers.40.block_sparse_moe.experts.81.w2", "model.layers.40.block_sparse_moe.experts.82.w2", "model.layers.40.block_sparse_moe.experts.83.w2", "model.layers.40.block_sparse_moe.experts.84.w2", "model.layers.40.block_sparse_moe.experts.85.w2", "model.layers.40.block_sparse_moe.experts.86.w2", "model.layers.40.block_sparse_moe.experts.87.w2", "model.layers.40.block_sparse_moe.experts.88.w2", "model.layers.40.block_sparse_moe.experts.89.w2", "model.layers.40.block_sparse_moe.experts.90.w2", "model.layers.40.block_sparse_moe.experts.91.w2", "model.layers.40.block_sparse_moe.experts.92.w2", "model.layers.40.block_sparse_moe.experts.93.w2", "model.layers.40.block_sparse_moe.experts.94.w2", "model.layers.40.block_sparse_moe.experts.95.w2", "model.layers.40.block_sparse_moe.experts.96.w2", "model.layers.40.block_sparse_moe.experts.97.w2", "model.layers.40.block_sparse_moe.experts.98.w2", "model.layers.40.block_sparse_moe.experts.99.w2", "model.layers.40.block_sparse_moe.experts.100.w2", "model.layers.40.block_sparse_moe.experts.101.w2", "model.layers.40.block_sparse_moe.experts.102.w2", "model.layers.40.block_sparse_moe.experts.103.w2", "model.layers.40.block_sparse_moe.experts.104.w2", "model.layers.40.block_sparse_moe.experts.105.w2", "model.layers.40.block_sparse_moe.experts.106.w2", "model.layers.40.block_sparse_moe.experts.107.w2", "model.layers.40.block_sparse_moe.experts.108.w2", "model.layers.40.block_sparse_moe.experts.109.w2", "model.layers.40.block_sparse_moe.experts.110.w2", "model.layers.40.block_sparse_moe.experts.111.w2", "model.layers.40.block_sparse_moe.experts.112.w2", "model.layers.40.block_sparse_moe.experts.113.w2", "model.layers.40.block_sparse_moe.experts.114.w2", "model.layers.40.block_sparse_moe.experts.115.w2", "model.layers.40.block_sparse_moe.experts.116.w2", "model.layers.40.block_sparse_moe.experts.117.w2", "model.layers.40.block_sparse_moe.experts.118.w2", "model.layers.40.block_sparse_moe.experts.119.w2", "model.layers.40.block_sparse_moe.experts.120.w2", "model.layers.40.block_sparse_moe.experts.121.w2", "model.layers.40.block_sparse_moe.experts.122.w2", "model.layers.40.block_sparse_moe.experts.123.w2", "model.layers.40.block_sparse_moe.experts.124.w2", "model.layers.40.block_sparse_moe.experts.125.w2", "model.layers.40.block_sparse_moe.experts.126.w2", "model.layers.40.block_sparse_moe.experts.127.w2", "model.layers.40.block_sparse_moe.experts.128.w2", "model.layers.40.block_sparse_moe.experts.129.w2", "model.layers.40.block_sparse_moe.experts.130.w2", "model.layers.40.block_sparse_moe.experts.131.w2", "model.layers.40.block_sparse_moe.experts.132.w2", "model.layers.40.block_sparse_moe.experts.133.w2", "model.layers.40.block_sparse_moe.experts.134.w2", "model.layers.40.block_sparse_moe.experts.135.w2", "model.layers.40.block_sparse_moe.experts.136.w2", "model.layers.40.block_sparse_moe.experts.137.w2", "model.layers.40.block_sparse_moe.experts.138.w2", "model.layers.40.block_sparse_moe.experts.139.w2", "model.layers.40.block_sparse_moe.experts.140.w2", "model.layers.40.block_sparse_moe.experts.141.w2", "model.layers.40.block_sparse_moe.experts.142.w2", "model.layers.40.block_sparse_moe.experts.143.w2", "model.layers.40.block_sparse_moe.experts.144.w2", "model.layers.40.block_sparse_moe.experts.145.w2", "model.layers.40.block_sparse_moe.experts.146.w2", "model.layers.40.block_sparse_moe.experts.147.w2", "model.layers.40.block_sparse_moe.experts.148.w2", "model.layers.40.block_sparse_moe.experts.149.w2", "model.layers.40.block_sparse_moe.experts.150.w2", "model.layers.40.block_sparse_moe.experts.151.w2", "model.layers.40.block_sparse_moe.experts.152.w2", "model.layers.40.block_sparse_moe.experts.153.w2", "model.layers.40.block_sparse_moe.experts.154.w2", "model.layers.40.block_sparse_moe.experts.155.w2", "model.layers.40.block_sparse_moe.experts.156.w2", "model.layers.40.block_sparse_moe.experts.157.w2", "model.layers.40.block_sparse_moe.experts.158.w2", "model.layers.40.block_sparse_moe.experts.159.w2", "model.layers.40.block_sparse_moe.experts.160.w2", "model.layers.40.block_sparse_moe.experts.161.w2", "model.layers.40.block_sparse_moe.experts.162.w2", "model.layers.40.block_sparse_moe.experts.163.w2", "model.layers.40.block_sparse_moe.experts.164.w2", "model.layers.40.block_sparse_moe.experts.165.w2", "model.layers.40.block_sparse_moe.experts.166.w2", "model.layers.40.block_sparse_moe.experts.167.w2", "model.layers.40.block_sparse_moe.experts.168.w2", "model.layers.40.block_sparse_moe.experts.169.w2", "model.layers.40.block_sparse_moe.experts.170.w2", "model.layers.40.block_sparse_moe.experts.171.w2", "model.layers.40.block_sparse_moe.experts.172.w2", "model.layers.40.block_sparse_moe.experts.173.w2", "model.layers.40.block_sparse_moe.experts.174.w2", "model.layers.40.block_sparse_moe.experts.175.w2", "model.layers.40.block_sparse_moe.experts.176.w2", "model.layers.40.block_sparse_moe.experts.177.w2", "model.layers.40.block_sparse_moe.experts.178.w2", "model.layers.40.block_sparse_moe.experts.179.w2", "model.layers.40.block_sparse_moe.experts.180.w2", "model.layers.40.block_sparse_moe.experts.181.w2", "model.layers.40.block_sparse_moe.experts.182.w2", "model.layers.40.block_sparse_moe.experts.183.w2", "model.layers.40.block_sparse_moe.experts.184.w2", "model.layers.40.block_sparse_moe.experts.185.w2", "model.layers.40.block_sparse_moe.experts.186.w2", "model.layers.40.block_sparse_moe.experts.187.w2", "model.layers.40.block_sparse_moe.experts.188.w2", "model.layers.40.block_sparse_moe.experts.189.w2", "model.layers.40.block_sparse_moe.experts.190.w2", "model.layers.40.block_sparse_moe.experts.191.w2", "model.layers.40.block_sparse_moe.experts.192.w2", "model.layers.40.block_sparse_moe.experts.193.w2", "model.layers.40.block_sparse_moe.experts.194.w2", "model.layers.40.block_sparse_moe.experts.195.w2", "model.layers.40.block_sparse_moe.experts.196.w2", "model.layers.40.block_sparse_moe.experts.197.w2", "model.layers.40.block_sparse_moe.experts.198.w2", "model.layers.40.block_sparse_moe.experts.199.w2", "model.layers.40.block_sparse_moe.experts.200.w2", "model.layers.40.block_sparse_moe.experts.201.w2", "model.layers.40.block_sparse_moe.experts.202.w2", "model.layers.40.block_sparse_moe.experts.203.w2", "model.layers.40.block_sparse_moe.experts.204.w2", "model.layers.40.block_sparse_moe.experts.205.w2", "model.layers.40.block_sparse_moe.experts.206.w2", "model.layers.40.block_sparse_moe.experts.207.w2", "model.layers.40.block_sparse_moe.experts.208.w2", "model.layers.40.block_sparse_moe.experts.209.w2", "model.layers.40.block_sparse_moe.experts.210.w2", "model.layers.40.block_sparse_moe.experts.211.w2", "model.layers.40.block_sparse_moe.experts.212.w2", "model.layers.40.block_sparse_moe.experts.213.w2", "model.layers.40.block_sparse_moe.experts.214.w2", "model.layers.40.block_sparse_moe.experts.215.w2", "model.layers.40.block_sparse_moe.experts.216.w2", "model.layers.40.block_sparse_moe.experts.217.w2", "model.layers.40.block_sparse_moe.experts.218.w2", "model.layers.40.block_sparse_moe.experts.219.w2", "model.layers.40.block_sparse_moe.experts.220.w2", "model.layers.40.block_sparse_moe.experts.221.w2", "model.layers.40.block_sparse_moe.experts.222.w2", "model.layers.40.block_sparse_moe.experts.223.w2", "model.layers.40.block_sparse_moe.experts.224.w2", "model.layers.40.block_sparse_moe.experts.225.w2", "model.layers.40.block_sparse_moe.experts.226.w2", "model.layers.40.block_sparse_moe.experts.227.w2", "model.layers.40.block_sparse_moe.experts.228.w2", "model.layers.40.block_sparse_moe.experts.229.w2", "model.layers.40.block_sparse_moe.experts.230.w2", "model.layers.40.block_sparse_moe.experts.231.w2", "model.layers.40.block_sparse_moe.experts.232.w2", "model.layers.40.block_sparse_moe.experts.233.w2", "model.layers.40.block_sparse_moe.experts.234.w2", "model.layers.40.block_sparse_moe.experts.235.w2", "model.layers.40.block_sparse_moe.experts.236.w2", "model.layers.40.block_sparse_moe.experts.237.w2", "model.layers.40.block_sparse_moe.experts.238.w2", "model.layers.40.block_sparse_moe.experts.239.w2", "model.layers.40.block_sparse_moe.experts.240.w2", "model.layers.40.block_sparse_moe.experts.241.w2", "model.layers.40.block_sparse_moe.experts.242.w2", "model.layers.40.block_sparse_moe.experts.243.w2", "model.layers.40.block_sparse_moe.experts.244.w2", "model.layers.40.block_sparse_moe.experts.245.w2", "model.layers.40.block_sparse_moe.experts.246.w2", "model.layers.40.block_sparse_moe.experts.247.w2", "model.layers.40.block_sparse_moe.experts.248.w2", "model.layers.40.block_sparse_moe.experts.249.w2", "model.layers.40.block_sparse_moe.experts.250.w2", "model.layers.40.block_sparse_moe.experts.251.w2", "model.layers.40.block_sparse_moe.experts.252.w2", "model.layers.40.block_sparse_moe.experts.253.w2", "model.layers.40.block_sparse_moe.experts.254.w2", "model.layers.40.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0009468670934438816, "dbits": 1207959552 } ] }, { "idx": 205, "layers": [ "model.layers.41.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0002557080239057319, "dbits": 18874368 } ] }, { "idx": 206, "layers": [ "model.layers.41.self_attn.k_proj", "model.layers.41.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0059758517891168594, "dbits": 6291456 } ] }, { "idx": 207, "layers": [ "model.layers.41.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0018866654485464318, "dbits": 18874368 } ] }, { "idx": 208, "layers": [ "model.layers.41.block_sparse_moe.experts.0.w1", "model.layers.41.block_sparse_moe.experts.1.w1", "model.layers.41.block_sparse_moe.experts.2.w1", "model.layers.41.block_sparse_moe.experts.3.w1", "model.layers.41.block_sparse_moe.experts.4.w1", "model.layers.41.block_sparse_moe.experts.5.w1", "model.layers.41.block_sparse_moe.experts.6.w1", "model.layers.41.block_sparse_moe.experts.7.w1", "model.layers.41.block_sparse_moe.experts.8.w1", "model.layers.41.block_sparse_moe.experts.9.w1", "model.layers.41.block_sparse_moe.experts.10.w1", "model.layers.41.block_sparse_moe.experts.11.w1", "model.layers.41.block_sparse_moe.experts.12.w1", "model.layers.41.block_sparse_moe.experts.13.w1", "model.layers.41.block_sparse_moe.experts.14.w1", "model.layers.41.block_sparse_moe.experts.15.w1", "model.layers.41.block_sparse_moe.experts.16.w1", "model.layers.41.block_sparse_moe.experts.17.w1", "model.layers.41.block_sparse_moe.experts.18.w1", "model.layers.41.block_sparse_moe.experts.19.w1", "model.layers.41.block_sparse_moe.experts.20.w1", "model.layers.41.block_sparse_moe.experts.21.w1", "model.layers.41.block_sparse_moe.experts.22.w1", "model.layers.41.block_sparse_moe.experts.23.w1", "model.layers.41.block_sparse_moe.experts.24.w1", "model.layers.41.block_sparse_moe.experts.25.w1", "model.layers.41.block_sparse_moe.experts.26.w1", "model.layers.41.block_sparse_moe.experts.27.w1", "model.layers.41.block_sparse_moe.experts.28.w1", "model.layers.41.block_sparse_moe.experts.29.w1", "model.layers.41.block_sparse_moe.experts.30.w1", "model.layers.41.block_sparse_moe.experts.31.w1", "model.layers.41.block_sparse_moe.experts.32.w1", "model.layers.41.block_sparse_moe.experts.33.w1", "model.layers.41.block_sparse_moe.experts.34.w1", "model.layers.41.block_sparse_moe.experts.35.w1", "model.layers.41.block_sparse_moe.experts.36.w1", "model.layers.41.block_sparse_moe.experts.37.w1", "model.layers.41.block_sparse_moe.experts.38.w1", "model.layers.41.block_sparse_moe.experts.39.w1", "model.layers.41.block_sparse_moe.experts.40.w1", "model.layers.41.block_sparse_moe.experts.41.w1", "model.layers.41.block_sparse_moe.experts.42.w1", "model.layers.41.block_sparse_moe.experts.43.w1", "model.layers.41.block_sparse_moe.experts.44.w1", "model.layers.41.block_sparse_moe.experts.45.w1", "model.layers.41.block_sparse_moe.experts.46.w1", "model.layers.41.block_sparse_moe.experts.47.w1", "model.layers.41.block_sparse_moe.experts.48.w1", "model.layers.41.block_sparse_moe.experts.49.w1", "model.layers.41.block_sparse_moe.experts.50.w1", "model.layers.41.block_sparse_moe.experts.51.w1", "model.layers.41.block_sparse_moe.experts.52.w1", "model.layers.41.block_sparse_moe.experts.53.w1", "model.layers.41.block_sparse_moe.experts.54.w1", "model.layers.41.block_sparse_moe.experts.55.w1", "model.layers.41.block_sparse_moe.experts.56.w1", "model.layers.41.block_sparse_moe.experts.57.w1", "model.layers.41.block_sparse_moe.experts.58.w1", "model.layers.41.block_sparse_moe.experts.59.w1", "model.layers.41.block_sparse_moe.experts.60.w1", "model.layers.41.block_sparse_moe.experts.61.w1", "model.layers.41.block_sparse_moe.experts.62.w1", "model.layers.41.block_sparse_moe.experts.63.w1", "model.layers.41.block_sparse_moe.experts.64.w1", "model.layers.41.block_sparse_moe.experts.65.w1", "model.layers.41.block_sparse_moe.experts.66.w1", "model.layers.41.block_sparse_moe.experts.67.w1", "model.layers.41.block_sparse_moe.experts.68.w1", "model.layers.41.block_sparse_moe.experts.69.w1", "model.layers.41.block_sparse_moe.experts.70.w1", "model.layers.41.block_sparse_moe.experts.71.w1", "model.layers.41.block_sparse_moe.experts.72.w1", "model.layers.41.block_sparse_moe.experts.73.w1", "model.layers.41.block_sparse_moe.experts.74.w1", "model.layers.41.block_sparse_moe.experts.75.w1", "model.layers.41.block_sparse_moe.experts.76.w1", "model.layers.41.block_sparse_moe.experts.77.w1", "model.layers.41.block_sparse_moe.experts.78.w1", "model.layers.41.block_sparse_moe.experts.79.w1", "model.layers.41.block_sparse_moe.experts.80.w1", "model.layers.41.block_sparse_moe.experts.81.w1", "model.layers.41.block_sparse_moe.experts.82.w1", "model.layers.41.block_sparse_moe.experts.83.w1", "model.layers.41.block_sparse_moe.experts.84.w1", "model.layers.41.block_sparse_moe.experts.85.w1", "model.layers.41.block_sparse_moe.experts.86.w1", "model.layers.41.block_sparse_moe.experts.87.w1", "model.layers.41.block_sparse_moe.experts.88.w1", "model.layers.41.block_sparse_moe.experts.89.w1", "model.layers.41.block_sparse_moe.experts.90.w1", "model.layers.41.block_sparse_moe.experts.91.w1", "model.layers.41.block_sparse_moe.experts.92.w1", "model.layers.41.block_sparse_moe.experts.93.w1", "model.layers.41.block_sparse_moe.experts.94.w1", "model.layers.41.block_sparse_moe.experts.95.w1", "model.layers.41.block_sparse_moe.experts.96.w1", "model.layers.41.block_sparse_moe.experts.97.w1", "model.layers.41.block_sparse_moe.experts.98.w1", "model.layers.41.block_sparse_moe.experts.99.w1", "model.layers.41.block_sparse_moe.experts.100.w1", "model.layers.41.block_sparse_moe.experts.101.w1", "model.layers.41.block_sparse_moe.experts.102.w1", "model.layers.41.block_sparse_moe.experts.103.w1", "model.layers.41.block_sparse_moe.experts.104.w1", "model.layers.41.block_sparse_moe.experts.105.w1", "model.layers.41.block_sparse_moe.experts.106.w1", "model.layers.41.block_sparse_moe.experts.107.w1", "model.layers.41.block_sparse_moe.experts.108.w1", "model.layers.41.block_sparse_moe.experts.109.w1", "model.layers.41.block_sparse_moe.experts.110.w1", "model.layers.41.block_sparse_moe.experts.111.w1", "model.layers.41.block_sparse_moe.experts.112.w1", "model.layers.41.block_sparse_moe.experts.113.w1", "model.layers.41.block_sparse_moe.experts.114.w1", "model.layers.41.block_sparse_moe.experts.115.w1", "model.layers.41.block_sparse_moe.experts.116.w1", "model.layers.41.block_sparse_moe.experts.117.w1", "model.layers.41.block_sparse_moe.experts.118.w1", "model.layers.41.block_sparse_moe.experts.119.w1", "model.layers.41.block_sparse_moe.experts.120.w1", "model.layers.41.block_sparse_moe.experts.121.w1", "model.layers.41.block_sparse_moe.experts.122.w1", "model.layers.41.block_sparse_moe.experts.123.w1", "model.layers.41.block_sparse_moe.experts.124.w1", "model.layers.41.block_sparse_moe.experts.125.w1", "model.layers.41.block_sparse_moe.experts.126.w1", "model.layers.41.block_sparse_moe.experts.127.w1", "model.layers.41.block_sparse_moe.experts.128.w1", "model.layers.41.block_sparse_moe.experts.129.w1", "model.layers.41.block_sparse_moe.experts.130.w1", "model.layers.41.block_sparse_moe.experts.131.w1", "model.layers.41.block_sparse_moe.experts.132.w1", "model.layers.41.block_sparse_moe.experts.133.w1", "model.layers.41.block_sparse_moe.experts.134.w1", "model.layers.41.block_sparse_moe.experts.135.w1", "model.layers.41.block_sparse_moe.experts.136.w1", "model.layers.41.block_sparse_moe.experts.137.w1", "model.layers.41.block_sparse_moe.experts.138.w1", "model.layers.41.block_sparse_moe.experts.139.w1", "model.layers.41.block_sparse_moe.experts.140.w1", "model.layers.41.block_sparse_moe.experts.141.w1", "model.layers.41.block_sparse_moe.experts.142.w1", "model.layers.41.block_sparse_moe.experts.143.w1", "model.layers.41.block_sparse_moe.experts.144.w1", "model.layers.41.block_sparse_moe.experts.145.w1", "model.layers.41.block_sparse_moe.experts.146.w1", "model.layers.41.block_sparse_moe.experts.147.w1", "model.layers.41.block_sparse_moe.experts.148.w1", "model.layers.41.block_sparse_moe.experts.149.w1", "model.layers.41.block_sparse_moe.experts.150.w1", "model.layers.41.block_sparse_moe.experts.151.w1", "model.layers.41.block_sparse_moe.experts.152.w1", "model.layers.41.block_sparse_moe.experts.153.w1", "model.layers.41.block_sparse_moe.experts.154.w1", "model.layers.41.block_sparse_moe.experts.155.w1", "model.layers.41.block_sparse_moe.experts.156.w1", "model.layers.41.block_sparse_moe.experts.157.w1", "model.layers.41.block_sparse_moe.experts.158.w1", "model.layers.41.block_sparse_moe.experts.159.w1", "model.layers.41.block_sparse_moe.experts.160.w1", "model.layers.41.block_sparse_moe.experts.161.w1", "model.layers.41.block_sparse_moe.experts.162.w1", "model.layers.41.block_sparse_moe.experts.163.w1", "model.layers.41.block_sparse_moe.experts.164.w1", "model.layers.41.block_sparse_moe.experts.165.w1", "model.layers.41.block_sparse_moe.experts.166.w1", "model.layers.41.block_sparse_moe.experts.167.w1", "model.layers.41.block_sparse_moe.experts.168.w1", "model.layers.41.block_sparse_moe.experts.169.w1", "model.layers.41.block_sparse_moe.experts.170.w1", "model.layers.41.block_sparse_moe.experts.171.w1", "model.layers.41.block_sparse_moe.experts.172.w1", "model.layers.41.block_sparse_moe.experts.173.w1", "model.layers.41.block_sparse_moe.experts.174.w1", "model.layers.41.block_sparse_moe.experts.175.w1", "model.layers.41.block_sparse_moe.experts.176.w1", "model.layers.41.block_sparse_moe.experts.177.w1", "model.layers.41.block_sparse_moe.experts.178.w1", "model.layers.41.block_sparse_moe.experts.179.w1", "model.layers.41.block_sparse_moe.experts.180.w1", "model.layers.41.block_sparse_moe.experts.181.w1", "model.layers.41.block_sparse_moe.experts.182.w1", "model.layers.41.block_sparse_moe.experts.183.w1", "model.layers.41.block_sparse_moe.experts.184.w1", "model.layers.41.block_sparse_moe.experts.185.w1", "model.layers.41.block_sparse_moe.experts.186.w1", "model.layers.41.block_sparse_moe.experts.187.w1", "model.layers.41.block_sparse_moe.experts.188.w1", "model.layers.41.block_sparse_moe.experts.189.w1", "model.layers.41.block_sparse_moe.experts.190.w1", "model.layers.41.block_sparse_moe.experts.191.w1", "model.layers.41.block_sparse_moe.experts.192.w1", "model.layers.41.block_sparse_moe.experts.193.w1", "model.layers.41.block_sparse_moe.experts.194.w1", "model.layers.41.block_sparse_moe.experts.195.w1", "model.layers.41.block_sparse_moe.experts.196.w1", "model.layers.41.block_sparse_moe.experts.197.w1", "model.layers.41.block_sparse_moe.experts.198.w1", "model.layers.41.block_sparse_moe.experts.199.w1", "model.layers.41.block_sparse_moe.experts.200.w1", "model.layers.41.block_sparse_moe.experts.201.w1", "model.layers.41.block_sparse_moe.experts.202.w1", "model.layers.41.block_sparse_moe.experts.203.w1", "model.layers.41.block_sparse_moe.experts.204.w1", "model.layers.41.block_sparse_moe.experts.205.w1", "model.layers.41.block_sparse_moe.experts.206.w1", "model.layers.41.block_sparse_moe.experts.207.w1", "model.layers.41.block_sparse_moe.experts.208.w1", "model.layers.41.block_sparse_moe.experts.209.w1", "model.layers.41.block_sparse_moe.experts.210.w1", "model.layers.41.block_sparse_moe.experts.211.w1", "model.layers.41.block_sparse_moe.experts.212.w1", "model.layers.41.block_sparse_moe.experts.213.w1", "model.layers.41.block_sparse_moe.experts.214.w1", "model.layers.41.block_sparse_moe.experts.215.w1", "model.layers.41.block_sparse_moe.experts.216.w1", "model.layers.41.block_sparse_moe.experts.217.w1", "model.layers.41.block_sparse_moe.experts.218.w1", "model.layers.41.block_sparse_moe.experts.219.w1", "model.layers.41.block_sparse_moe.experts.220.w1", "model.layers.41.block_sparse_moe.experts.221.w1", "model.layers.41.block_sparse_moe.experts.222.w1", "model.layers.41.block_sparse_moe.experts.223.w1", "model.layers.41.block_sparse_moe.experts.224.w1", "model.layers.41.block_sparse_moe.experts.225.w1", "model.layers.41.block_sparse_moe.experts.226.w1", "model.layers.41.block_sparse_moe.experts.227.w1", "model.layers.41.block_sparse_moe.experts.228.w1", "model.layers.41.block_sparse_moe.experts.229.w1", "model.layers.41.block_sparse_moe.experts.230.w1", "model.layers.41.block_sparse_moe.experts.231.w1", "model.layers.41.block_sparse_moe.experts.232.w1", "model.layers.41.block_sparse_moe.experts.233.w1", "model.layers.41.block_sparse_moe.experts.234.w1", "model.layers.41.block_sparse_moe.experts.235.w1", "model.layers.41.block_sparse_moe.experts.236.w1", "model.layers.41.block_sparse_moe.experts.237.w1", "model.layers.41.block_sparse_moe.experts.238.w1", "model.layers.41.block_sparse_moe.experts.239.w1", "model.layers.41.block_sparse_moe.experts.240.w1", "model.layers.41.block_sparse_moe.experts.241.w1", "model.layers.41.block_sparse_moe.experts.242.w1", "model.layers.41.block_sparse_moe.experts.243.w1", "model.layers.41.block_sparse_moe.experts.244.w1", "model.layers.41.block_sparse_moe.experts.245.w1", "model.layers.41.block_sparse_moe.experts.246.w1", "model.layers.41.block_sparse_moe.experts.247.w1", "model.layers.41.block_sparse_moe.experts.248.w1", "model.layers.41.block_sparse_moe.experts.249.w1", "model.layers.41.block_sparse_moe.experts.250.w1", "model.layers.41.block_sparse_moe.experts.251.w1", "model.layers.41.block_sparse_moe.experts.252.w1", "model.layers.41.block_sparse_moe.experts.253.w1", "model.layers.41.block_sparse_moe.experts.254.w1", "model.layers.41.block_sparse_moe.experts.255.w1", "model.layers.41.block_sparse_moe.experts.0.w3", "model.layers.41.block_sparse_moe.experts.1.w3", "model.layers.41.block_sparse_moe.experts.2.w3", "model.layers.41.block_sparse_moe.experts.3.w3", "model.layers.41.block_sparse_moe.experts.4.w3", "model.layers.41.block_sparse_moe.experts.5.w3", "model.layers.41.block_sparse_moe.experts.6.w3", "model.layers.41.block_sparse_moe.experts.7.w3", "model.layers.41.block_sparse_moe.experts.8.w3", "model.layers.41.block_sparse_moe.experts.9.w3", "model.layers.41.block_sparse_moe.experts.10.w3", "model.layers.41.block_sparse_moe.experts.11.w3", "model.layers.41.block_sparse_moe.experts.12.w3", "model.layers.41.block_sparse_moe.experts.13.w3", "model.layers.41.block_sparse_moe.experts.14.w3", "model.layers.41.block_sparse_moe.experts.15.w3", "model.layers.41.block_sparse_moe.experts.16.w3", "model.layers.41.block_sparse_moe.experts.17.w3", "model.layers.41.block_sparse_moe.experts.18.w3", "model.layers.41.block_sparse_moe.experts.19.w3", "model.layers.41.block_sparse_moe.experts.20.w3", "model.layers.41.block_sparse_moe.experts.21.w3", "model.layers.41.block_sparse_moe.experts.22.w3", "model.layers.41.block_sparse_moe.experts.23.w3", "model.layers.41.block_sparse_moe.experts.24.w3", "model.layers.41.block_sparse_moe.experts.25.w3", "model.layers.41.block_sparse_moe.experts.26.w3", "model.layers.41.block_sparse_moe.experts.27.w3", "model.layers.41.block_sparse_moe.experts.28.w3", "model.layers.41.block_sparse_moe.experts.29.w3", "model.layers.41.block_sparse_moe.experts.30.w3", "model.layers.41.block_sparse_moe.experts.31.w3", "model.layers.41.block_sparse_moe.experts.32.w3", "model.layers.41.block_sparse_moe.experts.33.w3", "model.layers.41.block_sparse_moe.experts.34.w3", "model.layers.41.block_sparse_moe.experts.35.w3", "model.layers.41.block_sparse_moe.experts.36.w3", "model.layers.41.block_sparse_moe.experts.37.w3", "model.layers.41.block_sparse_moe.experts.38.w3", "model.layers.41.block_sparse_moe.experts.39.w3", "model.layers.41.block_sparse_moe.experts.40.w3", "model.layers.41.block_sparse_moe.experts.41.w3", "model.layers.41.block_sparse_moe.experts.42.w3", "model.layers.41.block_sparse_moe.experts.43.w3", "model.layers.41.block_sparse_moe.experts.44.w3", "model.layers.41.block_sparse_moe.experts.45.w3", "model.layers.41.block_sparse_moe.experts.46.w3", "model.layers.41.block_sparse_moe.experts.47.w3", "model.layers.41.block_sparse_moe.experts.48.w3", "model.layers.41.block_sparse_moe.experts.49.w3", "model.layers.41.block_sparse_moe.experts.50.w3", "model.layers.41.block_sparse_moe.experts.51.w3", "model.layers.41.block_sparse_moe.experts.52.w3", "model.layers.41.block_sparse_moe.experts.53.w3", "model.layers.41.block_sparse_moe.experts.54.w3", "model.layers.41.block_sparse_moe.experts.55.w3", "model.layers.41.block_sparse_moe.experts.56.w3", "model.layers.41.block_sparse_moe.experts.57.w3", "model.layers.41.block_sparse_moe.experts.58.w3", "model.layers.41.block_sparse_moe.experts.59.w3", "model.layers.41.block_sparse_moe.experts.60.w3", "model.layers.41.block_sparse_moe.experts.61.w3", "model.layers.41.block_sparse_moe.experts.62.w3", "model.layers.41.block_sparse_moe.experts.63.w3", "model.layers.41.block_sparse_moe.experts.64.w3", "model.layers.41.block_sparse_moe.experts.65.w3", "model.layers.41.block_sparse_moe.experts.66.w3", "model.layers.41.block_sparse_moe.experts.67.w3", "model.layers.41.block_sparse_moe.experts.68.w3", "model.layers.41.block_sparse_moe.experts.69.w3", "model.layers.41.block_sparse_moe.experts.70.w3", "model.layers.41.block_sparse_moe.experts.71.w3", "model.layers.41.block_sparse_moe.experts.72.w3", "model.layers.41.block_sparse_moe.experts.73.w3", "model.layers.41.block_sparse_moe.experts.74.w3", "model.layers.41.block_sparse_moe.experts.75.w3", "model.layers.41.block_sparse_moe.experts.76.w3", "model.layers.41.block_sparse_moe.experts.77.w3", "model.layers.41.block_sparse_moe.experts.78.w3", "model.layers.41.block_sparse_moe.experts.79.w3", "model.layers.41.block_sparse_moe.experts.80.w3", "model.layers.41.block_sparse_moe.experts.81.w3", "model.layers.41.block_sparse_moe.experts.82.w3", "model.layers.41.block_sparse_moe.experts.83.w3", "model.layers.41.block_sparse_moe.experts.84.w3", "model.layers.41.block_sparse_moe.experts.85.w3", "model.layers.41.block_sparse_moe.experts.86.w3", "model.layers.41.block_sparse_moe.experts.87.w3", "model.layers.41.block_sparse_moe.experts.88.w3", "model.layers.41.block_sparse_moe.experts.89.w3", "model.layers.41.block_sparse_moe.experts.90.w3", "model.layers.41.block_sparse_moe.experts.91.w3", "model.layers.41.block_sparse_moe.experts.92.w3", "model.layers.41.block_sparse_moe.experts.93.w3", "model.layers.41.block_sparse_moe.experts.94.w3", "model.layers.41.block_sparse_moe.experts.95.w3", "model.layers.41.block_sparse_moe.experts.96.w3", "model.layers.41.block_sparse_moe.experts.97.w3", "model.layers.41.block_sparse_moe.experts.98.w3", "model.layers.41.block_sparse_moe.experts.99.w3", "model.layers.41.block_sparse_moe.experts.100.w3", "model.layers.41.block_sparse_moe.experts.101.w3", "model.layers.41.block_sparse_moe.experts.102.w3", "model.layers.41.block_sparse_moe.experts.103.w3", "model.layers.41.block_sparse_moe.experts.104.w3", "model.layers.41.block_sparse_moe.experts.105.w3", "model.layers.41.block_sparse_moe.experts.106.w3", "model.layers.41.block_sparse_moe.experts.107.w3", "model.layers.41.block_sparse_moe.experts.108.w3", "model.layers.41.block_sparse_moe.experts.109.w3", "model.layers.41.block_sparse_moe.experts.110.w3", "model.layers.41.block_sparse_moe.experts.111.w3", "model.layers.41.block_sparse_moe.experts.112.w3", "model.layers.41.block_sparse_moe.experts.113.w3", "model.layers.41.block_sparse_moe.experts.114.w3", "model.layers.41.block_sparse_moe.experts.115.w3", "model.layers.41.block_sparse_moe.experts.116.w3", "model.layers.41.block_sparse_moe.experts.117.w3", "model.layers.41.block_sparse_moe.experts.118.w3", "model.layers.41.block_sparse_moe.experts.119.w3", "model.layers.41.block_sparse_moe.experts.120.w3", "model.layers.41.block_sparse_moe.experts.121.w3", "model.layers.41.block_sparse_moe.experts.122.w3", "model.layers.41.block_sparse_moe.experts.123.w3", "model.layers.41.block_sparse_moe.experts.124.w3", "model.layers.41.block_sparse_moe.experts.125.w3", "model.layers.41.block_sparse_moe.experts.126.w3", "model.layers.41.block_sparse_moe.experts.127.w3", "model.layers.41.block_sparse_moe.experts.128.w3", "model.layers.41.block_sparse_moe.experts.129.w3", "model.layers.41.block_sparse_moe.experts.130.w3", "model.layers.41.block_sparse_moe.experts.131.w3", "model.layers.41.block_sparse_moe.experts.132.w3", "model.layers.41.block_sparse_moe.experts.133.w3", "model.layers.41.block_sparse_moe.experts.134.w3", "model.layers.41.block_sparse_moe.experts.135.w3", "model.layers.41.block_sparse_moe.experts.136.w3", "model.layers.41.block_sparse_moe.experts.137.w3", "model.layers.41.block_sparse_moe.experts.138.w3", "model.layers.41.block_sparse_moe.experts.139.w3", "model.layers.41.block_sparse_moe.experts.140.w3", "model.layers.41.block_sparse_moe.experts.141.w3", "model.layers.41.block_sparse_moe.experts.142.w3", "model.layers.41.block_sparse_moe.experts.143.w3", "model.layers.41.block_sparse_moe.experts.144.w3", "model.layers.41.block_sparse_moe.experts.145.w3", "model.layers.41.block_sparse_moe.experts.146.w3", "model.layers.41.block_sparse_moe.experts.147.w3", "model.layers.41.block_sparse_moe.experts.148.w3", "model.layers.41.block_sparse_moe.experts.149.w3", "model.layers.41.block_sparse_moe.experts.150.w3", "model.layers.41.block_sparse_moe.experts.151.w3", "model.layers.41.block_sparse_moe.experts.152.w3", "model.layers.41.block_sparse_moe.experts.153.w3", "model.layers.41.block_sparse_moe.experts.154.w3", "model.layers.41.block_sparse_moe.experts.155.w3", "model.layers.41.block_sparse_moe.experts.156.w3", "model.layers.41.block_sparse_moe.experts.157.w3", "model.layers.41.block_sparse_moe.experts.158.w3", "model.layers.41.block_sparse_moe.experts.159.w3", "model.layers.41.block_sparse_moe.experts.160.w3", "model.layers.41.block_sparse_moe.experts.161.w3", "model.layers.41.block_sparse_moe.experts.162.w3", "model.layers.41.block_sparse_moe.experts.163.w3", "model.layers.41.block_sparse_moe.experts.164.w3", "model.layers.41.block_sparse_moe.experts.165.w3", "model.layers.41.block_sparse_moe.experts.166.w3", "model.layers.41.block_sparse_moe.experts.167.w3", "model.layers.41.block_sparse_moe.experts.168.w3", "model.layers.41.block_sparse_moe.experts.169.w3", "model.layers.41.block_sparse_moe.experts.170.w3", "model.layers.41.block_sparse_moe.experts.171.w3", "model.layers.41.block_sparse_moe.experts.172.w3", "model.layers.41.block_sparse_moe.experts.173.w3", "model.layers.41.block_sparse_moe.experts.174.w3", "model.layers.41.block_sparse_moe.experts.175.w3", "model.layers.41.block_sparse_moe.experts.176.w3", "model.layers.41.block_sparse_moe.experts.177.w3", "model.layers.41.block_sparse_moe.experts.178.w3", "model.layers.41.block_sparse_moe.experts.179.w3", "model.layers.41.block_sparse_moe.experts.180.w3", "model.layers.41.block_sparse_moe.experts.181.w3", "model.layers.41.block_sparse_moe.experts.182.w3", "model.layers.41.block_sparse_moe.experts.183.w3", "model.layers.41.block_sparse_moe.experts.184.w3", "model.layers.41.block_sparse_moe.experts.185.w3", "model.layers.41.block_sparse_moe.experts.186.w3", "model.layers.41.block_sparse_moe.experts.187.w3", "model.layers.41.block_sparse_moe.experts.188.w3", "model.layers.41.block_sparse_moe.experts.189.w3", "model.layers.41.block_sparse_moe.experts.190.w3", "model.layers.41.block_sparse_moe.experts.191.w3", "model.layers.41.block_sparse_moe.experts.192.w3", "model.layers.41.block_sparse_moe.experts.193.w3", "model.layers.41.block_sparse_moe.experts.194.w3", "model.layers.41.block_sparse_moe.experts.195.w3", "model.layers.41.block_sparse_moe.experts.196.w3", "model.layers.41.block_sparse_moe.experts.197.w3", "model.layers.41.block_sparse_moe.experts.198.w3", "model.layers.41.block_sparse_moe.experts.199.w3", "model.layers.41.block_sparse_moe.experts.200.w3", "model.layers.41.block_sparse_moe.experts.201.w3", "model.layers.41.block_sparse_moe.experts.202.w3", "model.layers.41.block_sparse_moe.experts.203.w3", "model.layers.41.block_sparse_moe.experts.204.w3", "model.layers.41.block_sparse_moe.experts.205.w3", "model.layers.41.block_sparse_moe.experts.206.w3", "model.layers.41.block_sparse_moe.experts.207.w3", "model.layers.41.block_sparse_moe.experts.208.w3", "model.layers.41.block_sparse_moe.experts.209.w3", "model.layers.41.block_sparse_moe.experts.210.w3", "model.layers.41.block_sparse_moe.experts.211.w3", "model.layers.41.block_sparse_moe.experts.212.w3", "model.layers.41.block_sparse_moe.experts.213.w3", "model.layers.41.block_sparse_moe.experts.214.w3", "model.layers.41.block_sparse_moe.experts.215.w3", "model.layers.41.block_sparse_moe.experts.216.w3", "model.layers.41.block_sparse_moe.experts.217.w3", "model.layers.41.block_sparse_moe.experts.218.w3", "model.layers.41.block_sparse_moe.experts.219.w3", "model.layers.41.block_sparse_moe.experts.220.w3", "model.layers.41.block_sparse_moe.experts.221.w3", "model.layers.41.block_sparse_moe.experts.222.w3", "model.layers.41.block_sparse_moe.experts.223.w3", "model.layers.41.block_sparse_moe.experts.224.w3", "model.layers.41.block_sparse_moe.experts.225.w3", "model.layers.41.block_sparse_moe.experts.226.w3", "model.layers.41.block_sparse_moe.experts.227.w3", "model.layers.41.block_sparse_moe.experts.228.w3", "model.layers.41.block_sparse_moe.experts.229.w3", "model.layers.41.block_sparse_moe.experts.230.w3", "model.layers.41.block_sparse_moe.experts.231.w3", "model.layers.41.block_sparse_moe.experts.232.w3", "model.layers.41.block_sparse_moe.experts.233.w3", "model.layers.41.block_sparse_moe.experts.234.w3", "model.layers.41.block_sparse_moe.experts.235.w3", "model.layers.41.block_sparse_moe.experts.236.w3", "model.layers.41.block_sparse_moe.experts.237.w3", "model.layers.41.block_sparse_moe.experts.238.w3", "model.layers.41.block_sparse_moe.experts.239.w3", "model.layers.41.block_sparse_moe.experts.240.w3", "model.layers.41.block_sparse_moe.experts.241.w3", "model.layers.41.block_sparse_moe.experts.242.w3", "model.layers.41.block_sparse_moe.experts.243.w3", "model.layers.41.block_sparse_moe.experts.244.w3", "model.layers.41.block_sparse_moe.experts.245.w3", "model.layers.41.block_sparse_moe.experts.246.w3", "model.layers.41.block_sparse_moe.experts.247.w3", "model.layers.41.block_sparse_moe.experts.248.w3", "model.layers.41.block_sparse_moe.experts.249.w3", "model.layers.41.block_sparse_moe.experts.250.w3", "model.layers.41.block_sparse_moe.experts.251.w3", "model.layers.41.block_sparse_moe.experts.252.w3", "model.layers.41.block_sparse_moe.experts.253.w3", "model.layers.41.block_sparse_moe.experts.254.w3", "model.layers.41.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00011868290603161968, "dbits": 2415919104 } ] }, { "idx": 209, "layers": [ "model.layers.41.block_sparse_moe.experts.0.w2", "model.layers.41.block_sparse_moe.experts.1.w2", "model.layers.41.block_sparse_moe.experts.2.w2", "model.layers.41.block_sparse_moe.experts.3.w2", "model.layers.41.block_sparse_moe.experts.4.w2", "model.layers.41.block_sparse_moe.experts.5.w2", "model.layers.41.block_sparse_moe.experts.6.w2", "model.layers.41.block_sparse_moe.experts.7.w2", "model.layers.41.block_sparse_moe.experts.8.w2", "model.layers.41.block_sparse_moe.experts.9.w2", "model.layers.41.block_sparse_moe.experts.10.w2", "model.layers.41.block_sparse_moe.experts.11.w2", "model.layers.41.block_sparse_moe.experts.12.w2", "model.layers.41.block_sparse_moe.experts.13.w2", "model.layers.41.block_sparse_moe.experts.14.w2", "model.layers.41.block_sparse_moe.experts.15.w2", "model.layers.41.block_sparse_moe.experts.16.w2", "model.layers.41.block_sparse_moe.experts.17.w2", "model.layers.41.block_sparse_moe.experts.18.w2", "model.layers.41.block_sparse_moe.experts.19.w2", "model.layers.41.block_sparse_moe.experts.20.w2", "model.layers.41.block_sparse_moe.experts.21.w2", "model.layers.41.block_sparse_moe.experts.22.w2", "model.layers.41.block_sparse_moe.experts.23.w2", "model.layers.41.block_sparse_moe.experts.24.w2", "model.layers.41.block_sparse_moe.experts.25.w2", "model.layers.41.block_sparse_moe.experts.26.w2", "model.layers.41.block_sparse_moe.experts.27.w2", "model.layers.41.block_sparse_moe.experts.28.w2", "model.layers.41.block_sparse_moe.experts.29.w2", "model.layers.41.block_sparse_moe.experts.30.w2", "model.layers.41.block_sparse_moe.experts.31.w2", "model.layers.41.block_sparse_moe.experts.32.w2", "model.layers.41.block_sparse_moe.experts.33.w2", "model.layers.41.block_sparse_moe.experts.34.w2", "model.layers.41.block_sparse_moe.experts.35.w2", "model.layers.41.block_sparse_moe.experts.36.w2", "model.layers.41.block_sparse_moe.experts.37.w2", "model.layers.41.block_sparse_moe.experts.38.w2", "model.layers.41.block_sparse_moe.experts.39.w2", "model.layers.41.block_sparse_moe.experts.40.w2", "model.layers.41.block_sparse_moe.experts.41.w2", "model.layers.41.block_sparse_moe.experts.42.w2", "model.layers.41.block_sparse_moe.experts.43.w2", "model.layers.41.block_sparse_moe.experts.44.w2", "model.layers.41.block_sparse_moe.experts.45.w2", "model.layers.41.block_sparse_moe.experts.46.w2", "model.layers.41.block_sparse_moe.experts.47.w2", "model.layers.41.block_sparse_moe.experts.48.w2", "model.layers.41.block_sparse_moe.experts.49.w2", "model.layers.41.block_sparse_moe.experts.50.w2", "model.layers.41.block_sparse_moe.experts.51.w2", "model.layers.41.block_sparse_moe.experts.52.w2", "model.layers.41.block_sparse_moe.experts.53.w2", "model.layers.41.block_sparse_moe.experts.54.w2", "model.layers.41.block_sparse_moe.experts.55.w2", "model.layers.41.block_sparse_moe.experts.56.w2", "model.layers.41.block_sparse_moe.experts.57.w2", "model.layers.41.block_sparse_moe.experts.58.w2", "model.layers.41.block_sparse_moe.experts.59.w2", "model.layers.41.block_sparse_moe.experts.60.w2", "model.layers.41.block_sparse_moe.experts.61.w2", "model.layers.41.block_sparse_moe.experts.62.w2", "model.layers.41.block_sparse_moe.experts.63.w2", "model.layers.41.block_sparse_moe.experts.64.w2", "model.layers.41.block_sparse_moe.experts.65.w2", "model.layers.41.block_sparse_moe.experts.66.w2", "model.layers.41.block_sparse_moe.experts.67.w2", "model.layers.41.block_sparse_moe.experts.68.w2", "model.layers.41.block_sparse_moe.experts.69.w2", "model.layers.41.block_sparse_moe.experts.70.w2", "model.layers.41.block_sparse_moe.experts.71.w2", "model.layers.41.block_sparse_moe.experts.72.w2", "model.layers.41.block_sparse_moe.experts.73.w2", "model.layers.41.block_sparse_moe.experts.74.w2", "model.layers.41.block_sparse_moe.experts.75.w2", "model.layers.41.block_sparse_moe.experts.76.w2", "model.layers.41.block_sparse_moe.experts.77.w2", "model.layers.41.block_sparse_moe.experts.78.w2", "model.layers.41.block_sparse_moe.experts.79.w2", "model.layers.41.block_sparse_moe.experts.80.w2", "model.layers.41.block_sparse_moe.experts.81.w2", "model.layers.41.block_sparse_moe.experts.82.w2", "model.layers.41.block_sparse_moe.experts.83.w2", "model.layers.41.block_sparse_moe.experts.84.w2", "model.layers.41.block_sparse_moe.experts.85.w2", "model.layers.41.block_sparse_moe.experts.86.w2", "model.layers.41.block_sparse_moe.experts.87.w2", "model.layers.41.block_sparse_moe.experts.88.w2", "model.layers.41.block_sparse_moe.experts.89.w2", "model.layers.41.block_sparse_moe.experts.90.w2", "model.layers.41.block_sparse_moe.experts.91.w2", "model.layers.41.block_sparse_moe.experts.92.w2", "model.layers.41.block_sparse_moe.experts.93.w2", "model.layers.41.block_sparse_moe.experts.94.w2", "model.layers.41.block_sparse_moe.experts.95.w2", "model.layers.41.block_sparse_moe.experts.96.w2", "model.layers.41.block_sparse_moe.experts.97.w2", "model.layers.41.block_sparse_moe.experts.98.w2", "model.layers.41.block_sparse_moe.experts.99.w2", "model.layers.41.block_sparse_moe.experts.100.w2", "model.layers.41.block_sparse_moe.experts.101.w2", "model.layers.41.block_sparse_moe.experts.102.w2", "model.layers.41.block_sparse_moe.experts.103.w2", "model.layers.41.block_sparse_moe.experts.104.w2", "model.layers.41.block_sparse_moe.experts.105.w2", "model.layers.41.block_sparse_moe.experts.106.w2", "model.layers.41.block_sparse_moe.experts.107.w2", "model.layers.41.block_sparse_moe.experts.108.w2", "model.layers.41.block_sparse_moe.experts.109.w2", "model.layers.41.block_sparse_moe.experts.110.w2", "model.layers.41.block_sparse_moe.experts.111.w2", "model.layers.41.block_sparse_moe.experts.112.w2", "model.layers.41.block_sparse_moe.experts.113.w2", "model.layers.41.block_sparse_moe.experts.114.w2", "model.layers.41.block_sparse_moe.experts.115.w2", "model.layers.41.block_sparse_moe.experts.116.w2", "model.layers.41.block_sparse_moe.experts.117.w2", "model.layers.41.block_sparse_moe.experts.118.w2", "model.layers.41.block_sparse_moe.experts.119.w2", "model.layers.41.block_sparse_moe.experts.120.w2", "model.layers.41.block_sparse_moe.experts.121.w2", "model.layers.41.block_sparse_moe.experts.122.w2", "model.layers.41.block_sparse_moe.experts.123.w2", "model.layers.41.block_sparse_moe.experts.124.w2", "model.layers.41.block_sparse_moe.experts.125.w2", "model.layers.41.block_sparse_moe.experts.126.w2", "model.layers.41.block_sparse_moe.experts.127.w2", "model.layers.41.block_sparse_moe.experts.128.w2", "model.layers.41.block_sparse_moe.experts.129.w2", "model.layers.41.block_sparse_moe.experts.130.w2", "model.layers.41.block_sparse_moe.experts.131.w2", "model.layers.41.block_sparse_moe.experts.132.w2", "model.layers.41.block_sparse_moe.experts.133.w2", "model.layers.41.block_sparse_moe.experts.134.w2", "model.layers.41.block_sparse_moe.experts.135.w2", "model.layers.41.block_sparse_moe.experts.136.w2", "model.layers.41.block_sparse_moe.experts.137.w2", "model.layers.41.block_sparse_moe.experts.138.w2", "model.layers.41.block_sparse_moe.experts.139.w2", "model.layers.41.block_sparse_moe.experts.140.w2", "model.layers.41.block_sparse_moe.experts.141.w2", "model.layers.41.block_sparse_moe.experts.142.w2", "model.layers.41.block_sparse_moe.experts.143.w2", "model.layers.41.block_sparse_moe.experts.144.w2", "model.layers.41.block_sparse_moe.experts.145.w2", "model.layers.41.block_sparse_moe.experts.146.w2", "model.layers.41.block_sparse_moe.experts.147.w2", "model.layers.41.block_sparse_moe.experts.148.w2", "model.layers.41.block_sparse_moe.experts.149.w2", "model.layers.41.block_sparse_moe.experts.150.w2", "model.layers.41.block_sparse_moe.experts.151.w2", "model.layers.41.block_sparse_moe.experts.152.w2", "model.layers.41.block_sparse_moe.experts.153.w2", "model.layers.41.block_sparse_moe.experts.154.w2", "model.layers.41.block_sparse_moe.experts.155.w2", "model.layers.41.block_sparse_moe.experts.156.w2", "model.layers.41.block_sparse_moe.experts.157.w2", "model.layers.41.block_sparse_moe.experts.158.w2", "model.layers.41.block_sparse_moe.experts.159.w2", "model.layers.41.block_sparse_moe.experts.160.w2", "model.layers.41.block_sparse_moe.experts.161.w2", "model.layers.41.block_sparse_moe.experts.162.w2", "model.layers.41.block_sparse_moe.experts.163.w2", "model.layers.41.block_sparse_moe.experts.164.w2", "model.layers.41.block_sparse_moe.experts.165.w2", "model.layers.41.block_sparse_moe.experts.166.w2", "model.layers.41.block_sparse_moe.experts.167.w2", "model.layers.41.block_sparse_moe.experts.168.w2", "model.layers.41.block_sparse_moe.experts.169.w2", "model.layers.41.block_sparse_moe.experts.170.w2", "model.layers.41.block_sparse_moe.experts.171.w2", "model.layers.41.block_sparse_moe.experts.172.w2", "model.layers.41.block_sparse_moe.experts.173.w2", "model.layers.41.block_sparse_moe.experts.174.w2", "model.layers.41.block_sparse_moe.experts.175.w2", "model.layers.41.block_sparse_moe.experts.176.w2", "model.layers.41.block_sparse_moe.experts.177.w2", "model.layers.41.block_sparse_moe.experts.178.w2", "model.layers.41.block_sparse_moe.experts.179.w2", "model.layers.41.block_sparse_moe.experts.180.w2", "model.layers.41.block_sparse_moe.experts.181.w2", "model.layers.41.block_sparse_moe.experts.182.w2", "model.layers.41.block_sparse_moe.experts.183.w2", "model.layers.41.block_sparse_moe.experts.184.w2", "model.layers.41.block_sparse_moe.experts.185.w2", "model.layers.41.block_sparse_moe.experts.186.w2", "model.layers.41.block_sparse_moe.experts.187.w2", "model.layers.41.block_sparse_moe.experts.188.w2", "model.layers.41.block_sparse_moe.experts.189.w2", "model.layers.41.block_sparse_moe.experts.190.w2", "model.layers.41.block_sparse_moe.experts.191.w2", "model.layers.41.block_sparse_moe.experts.192.w2", "model.layers.41.block_sparse_moe.experts.193.w2", "model.layers.41.block_sparse_moe.experts.194.w2", "model.layers.41.block_sparse_moe.experts.195.w2", "model.layers.41.block_sparse_moe.experts.196.w2", "model.layers.41.block_sparse_moe.experts.197.w2", "model.layers.41.block_sparse_moe.experts.198.w2", "model.layers.41.block_sparse_moe.experts.199.w2", "model.layers.41.block_sparse_moe.experts.200.w2", "model.layers.41.block_sparse_moe.experts.201.w2", "model.layers.41.block_sparse_moe.experts.202.w2", "model.layers.41.block_sparse_moe.experts.203.w2", "model.layers.41.block_sparse_moe.experts.204.w2", "model.layers.41.block_sparse_moe.experts.205.w2", "model.layers.41.block_sparse_moe.experts.206.w2", "model.layers.41.block_sparse_moe.experts.207.w2", "model.layers.41.block_sparse_moe.experts.208.w2", "model.layers.41.block_sparse_moe.experts.209.w2", "model.layers.41.block_sparse_moe.experts.210.w2", "model.layers.41.block_sparse_moe.experts.211.w2", "model.layers.41.block_sparse_moe.experts.212.w2", "model.layers.41.block_sparse_moe.experts.213.w2", "model.layers.41.block_sparse_moe.experts.214.w2", "model.layers.41.block_sparse_moe.experts.215.w2", "model.layers.41.block_sparse_moe.experts.216.w2", "model.layers.41.block_sparse_moe.experts.217.w2", "model.layers.41.block_sparse_moe.experts.218.w2", "model.layers.41.block_sparse_moe.experts.219.w2", "model.layers.41.block_sparse_moe.experts.220.w2", "model.layers.41.block_sparse_moe.experts.221.w2", "model.layers.41.block_sparse_moe.experts.222.w2", "model.layers.41.block_sparse_moe.experts.223.w2", "model.layers.41.block_sparse_moe.experts.224.w2", "model.layers.41.block_sparse_moe.experts.225.w2", "model.layers.41.block_sparse_moe.experts.226.w2", "model.layers.41.block_sparse_moe.experts.227.w2", "model.layers.41.block_sparse_moe.experts.228.w2", "model.layers.41.block_sparse_moe.experts.229.w2", "model.layers.41.block_sparse_moe.experts.230.w2", "model.layers.41.block_sparse_moe.experts.231.w2", "model.layers.41.block_sparse_moe.experts.232.w2", "model.layers.41.block_sparse_moe.experts.233.w2", "model.layers.41.block_sparse_moe.experts.234.w2", "model.layers.41.block_sparse_moe.experts.235.w2", "model.layers.41.block_sparse_moe.experts.236.w2", "model.layers.41.block_sparse_moe.experts.237.w2", "model.layers.41.block_sparse_moe.experts.238.w2", "model.layers.41.block_sparse_moe.experts.239.w2", "model.layers.41.block_sparse_moe.experts.240.w2", "model.layers.41.block_sparse_moe.experts.241.w2", "model.layers.41.block_sparse_moe.experts.242.w2", "model.layers.41.block_sparse_moe.experts.243.w2", "model.layers.41.block_sparse_moe.experts.244.w2", "model.layers.41.block_sparse_moe.experts.245.w2", "model.layers.41.block_sparse_moe.experts.246.w2", "model.layers.41.block_sparse_moe.experts.247.w2", "model.layers.41.block_sparse_moe.experts.248.w2", "model.layers.41.block_sparse_moe.experts.249.w2", "model.layers.41.block_sparse_moe.experts.250.w2", "model.layers.41.block_sparse_moe.experts.251.w2", "model.layers.41.block_sparse_moe.experts.252.w2", "model.layers.41.block_sparse_moe.experts.253.w2", "model.layers.41.block_sparse_moe.experts.254.w2", "model.layers.41.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0001208033412694598, "dbits": 1207959552 } ] }, { "idx": 210, "layers": [ "model.layers.42.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0009879600256681442, "dbits": 18874368 } ] }, { "idx": 211, "layers": [ "model.layers.42.self_attn.k_proj", "model.layers.42.self_attn.v_proj" ], "candidates": [ { "dkld": 0.007787576317787148, "dbits": 6291456 } ] }, { "idx": 212, "layers": [ "model.layers.42.self_attn.o_proj" ], "candidates": [ { "dkld": -0.00920776426792147, "dbits": 18874368 } ] }, { "idx": 213, "layers": [ "model.layers.42.block_sparse_moe.experts.0.w1", "model.layers.42.block_sparse_moe.experts.1.w1", "model.layers.42.block_sparse_moe.experts.2.w1", "model.layers.42.block_sparse_moe.experts.3.w1", "model.layers.42.block_sparse_moe.experts.4.w1", "model.layers.42.block_sparse_moe.experts.5.w1", "model.layers.42.block_sparse_moe.experts.6.w1", "model.layers.42.block_sparse_moe.experts.7.w1", "model.layers.42.block_sparse_moe.experts.8.w1", "model.layers.42.block_sparse_moe.experts.9.w1", "model.layers.42.block_sparse_moe.experts.10.w1", "model.layers.42.block_sparse_moe.experts.11.w1", "model.layers.42.block_sparse_moe.experts.12.w1", "model.layers.42.block_sparse_moe.experts.13.w1", "model.layers.42.block_sparse_moe.experts.14.w1", "model.layers.42.block_sparse_moe.experts.15.w1", "model.layers.42.block_sparse_moe.experts.16.w1", "model.layers.42.block_sparse_moe.experts.17.w1", "model.layers.42.block_sparse_moe.experts.18.w1", "model.layers.42.block_sparse_moe.experts.19.w1", "model.layers.42.block_sparse_moe.experts.20.w1", "model.layers.42.block_sparse_moe.experts.21.w1", "model.layers.42.block_sparse_moe.experts.22.w1", "model.layers.42.block_sparse_moe.experts.23.w1", "model.layers.42.block_sparse_moe.experts.24.w1", "model.layers.42.block_sparse_moe.experts.25.w1", "model.layers.42.block_sparse_moe.experts.26.w1", "model.layers.42.block_sparse_moe.experts.27.w1", "model.layers.42.block_sparse_moe.experts.28.w1", "model.layers.42.block_sparse_moe.experts.29.w1", "model.layers.42.block_sparse_moe.experts.30.w1", "model.layers.42.block_sparse_moe.experts.31.w1", "model.layers.42.block_sparse_moe.experts.32.w1", "model.layers.42.block_sparse_moe.experts.33.w1", "model.layers.42.block_sparse_moe.experts.34.w1", "model.layers.42.block_sparse_moe.experts.35.w1", "model.layers.42.block_sparse_moe.experts.36.w1", "model.layers.42.block_sparse_moe.experts.37.w1", "model.layers.42.block_sparse_moe.experts.38.w1", "model.layers.42.block_sparse_moe.experts.39.w1", "model.layers.42.block_sparse_moe.experts.40.w1", "model.layers.42.block_sparse_moe.experts.41.w1", "model.layers.42.block_sparse_moe.experts.42.w1", "model.layers.42.block_sparse_moe.experts.43.w1", "model.layers.42.block_sparse_moe.experts.44.w1", "model.layers.42.block_sparse_moe.experts.45.w1", "model.layers.42.block_sparse_moe.experts.46.w1", "model.layers.42.block_sparse_moe.experts.47.w1", "model.layers.42.block_sparse_moe.experts.48.w1", "model.layers.42.block_sparse_moe.experts.49.w1", "model.layers.42.block_sparse_moe.experts.50.w1", "model.layers.42.block_sparse_moe.experts.51.w1", "model.layers.42.block_sparse_moe.experts.52.w1", "model.layers.42.block_sparse_moe.experts.53.w1", "model.layers.42.block_sparse_moe.experts.54.w1", "model.layers.42.block_sparse_moe.experts.55.w1", "model.layers.42.block_sparse_moe.experts.56.w1", "model.layers.42.block_sparse_moe.experts.57.w1", "model.layers.42.block_sparse_moe.experts.58.w1", "model.layers.42.block_sparse_moe.experts.59.w1", "model.layers.42.block_sparse_moe.experts.60.w1", "model.layers.42.block_sparse_moe.experts.61.w1", "model.layers.42.block_sparse_moe.experts.62.w1", "model.layers.42.block_sparse_moe.experts.63.w1", "model.layers.42.block_sparse_moe.experts.64.w1", "model.layers.42.block_sparse_moe.experts.65.w1", "model.layers.42.block_sparse_moe.experts.66.w1", "model.layers.42.block_sparse_moe.experts.67.w1", "model.layers.42.block_sparse_moe.experts.68.w1", "model.layers.42.block_sparse_moe.experts.69.w1", "model.layers.42.block_sparse_moe.experts.70.w1", "model.layers.42.block_sparse_moe.experts.71.w1", "model.layers.42.block_sparse_moe.experts.72.w1", "model.layers.42.block_sparse_moe.experts.73.w1", "model.layers.42.block_sparse_moe.experts.74.w1", "model.layers.42.block_sparse_moe.experts.75.w1", "model.layers.42.block_sparse_moe.experts.76.w1", "model.layers.42.block_sparse_moe.experts.77.w1", "model.layers.42.block_sparse_moe.experts.78.w1", "model.layers.42.block_sparse_moe.experts.79.w1", "model.layers.42.block_sparse_moe.experts.80.w1", "model.layers.42.block_sparse_moe.experts.81.w1", "model.layers.42.block_sparse_moe.experts.82.w1", "model.layers.42.block_sparse_moe.experts.83.w1", "model.layers.42.block_sparse_moe.experts.84.w1", "model.layers.42.block_sparse_moe.experts.85.w1", "model.layers.42.block_sparse_moe.experts.86.w1", "model.layers.42.block_sparse_moe.experts.87.w1", "model.layers.42.block_sparse_moe.experts.88.w1", "model.layers.42.block_sparse_moe.experts.89.w1", "model.layers.42.block_sparse_moe.experts.90.w1", "model.layers.42.block_sparse_moe.experts.91.w1", "model.layers.42.block_sparse_moe.experts.92.w1", "model.layers.42.block_sparse_moe.experts.93.w1", "model.layers.42.block_sparse_moe.experts.94.w1", "model.layers.42.block_sparse_moe.experts.95.w1", "model.layers.42.block_sparse_moe.experts.96.w1", "model.layers.42.block_sparse_moe.experts.97.w1", "model.layers.42.block_sparse_moe.experts.98.w1", "model.layers.42.block_sparse_moe.experts.99.w1", "model.layers.42.block_sparse_moe.experts.100.w1", "model.layers.42.block_sparse_moe.experts.101.w1", "model.layers.42.block_sparse_moe.experts.102.w1", "model.layers.42.block_sparse_moe.experts.103.w1", "model.layers.42.block_sparse_moe.experts.104.w1", "model.layers.42.block_sparse_moe.experts.105.w1", "model.layers.42.block_sparse_moe.experts.106.w1", "model.layers.42.block_sparse_moe.experts.107.w1", "model.layers.42.block_sparse_moe.experts.108.w1", "model.layers.42.block_sparse_moe.experts.109.w1", "model.layers.42.block_sparse_moe.experts.110.w1", "model.layers.42.block_sparse_moe.experts.111.w1", "model.layers.42.block_sparse_moe.experts.112.w1", "model.layers.42.block_sparse_moe.experts.113.w1", "model.layers.42.block_sparse_moe.experts.114.w1", "model.layers.42.block_sparse_moe.experts.115.w1", "model.layers.42.block_sparse_moe.experts.116.w1", "model.layers.42.block_sparse_moe.experts.117.w1", "model.layers.42.block_sparse_moe.experts.118.w1", "model.layers.42.block_sparse_moe.experts.119.w1", "model.layers.42.block_sparse_moe.experts.120.w1", "model.layers.42.block_sparse_moe.experts.121.w1", "model.layers.42.block_sparse_moe.experts.122.w1", "model.layers.42.block_sparse_moe.experts.123.w1", "model.layers.42.block_sparse_moe.experts.124.w1", "model.layers.42.block_sparse_moe.experts.125.w1", "model.layers.42.block_sparse_moe.experts.126.w1", "model.layers.42.block_sparse_moe.experts.127.w1", "model.layers.42.block_sparse_moe.experts.128.w1", "model.layers.42.block_sparse_moe.experts.129.w1", "model.layers.42.block_sparse_moe.experts.130.w1", "model.layers.42.block_sparse_moe.experts.131.w1", "model.layers.42.block_sparse_moe.experts.132.w1", "model.layers.42.block_sparse_moe.experts.133.w1", "model.layers.42.block_sparse_moe.experts.134.w1", "model.layers.42.block_sparse_moe.experts.135.w1", "model.layers.42.block_sparse_moe.experts.136.w1", "model.layers.42.block_sparse_moe.experts.137.w1", "model.layers.42.block_sparse_moe.experts.138.w1", "model.layers.42.block_sparse_moe.experts.139.w1", "model.layers.42.block_sparse_moe.experts.140.w1", "model.layers.42.block_sparse_moe.experts.141.w1", "model.layers.42.block_sparse_moe.experts.142.w1", "model.layers.42.block_sparse_moe.experts.143.w1", "model.layers.42.block_sparse_moe.experts.144.w1", "model.layers.42.block_sparse_moe.experts.145.w1", "model.layers.42.block_sparse_moe.experts.146.w1", "model.layers.42.block_sparse_moe.experts.147.w1", "model.layers.42.block_sparse_moe.experts.148.w1", "model.layers.42.block_sparse_moe.experts.149.w1", "model.layers.42.block_sparse_moe.experts.150.w1", "model.layers.42.block_sparse_moe.experts.151.w1", "model.layers.42.block_sparse_moe.experts.152.w1", "model.layers.42.block_sparse_moe.experts.153.w1", "model.layers.42.block_sparse_moe.experts.154.w1", "model.layers.42.block_sparse_moe.experts.155.w1", "model.layers.42.block_sparse_moe.experts.156.w1", "model.layers.42.block_sparse_moe.experts.157.w1", "model.layers.42.block_sparse_moe.experts.158.w1", "model.layers.42.block_sparse_moe.experts.159.w1", "model.layers.42.block_sparse_moe.experts.160.w1", "model.layers.42.block_sparse_moe.experts.161.w1", "model.layers.42.block_sparse_moe.experts.162.w1", "model.layers.42.block_sparse_moe.experts.163.w1", "model.layers.42.block_sparse_moe.experts.164.w1", "model.layers.42.block_sparse_moe.experts.165.w1", "model.layers.42.block_sparse_moe.experts.166.w1", "model.layers.42.block_sparse_moe.experts.167.w1", "model.layers.42.block_sparse_moe.experts.168.w1", "model.layers.42.block_sparse_moe.experts.169.w1", "model.layers.42.block_sparse_moe.experts.170.w1", "model.layers.42.block_sparse_moe.experts.171.w1", "model.layers.42.block_sparse_moe.experts.172.w1", "model.layers.42.block_sparse_moe.experts.173.w1", "model.layers.42.block_sparse_moe.experts.174.w1", "model.layers.42.block_sparse_moe.experts.175.w1", "model.layers.42.block_sparse_moe.experts.176.w1", "model.layers.42.block_sparse_moe.experts.177.w1", "model.layers.42.block_sparse_moe.experts.178.w1", "model.layers.42.block_sparse_moe.experts.179.w1", "model.layers.42.block_sparse_moe.experts.180.w1", "model.layers.42.block_sparse_moe.experts.181.w1", "model.layers.42.block_sparse_moe.experts.182.w1", "model.layers.42.block_sparse_moe.experts.183.w1", "model.layers.42.block_sparse_moe.experts.184.w1", "model.layers.42.block_sparse_moe.experts.185.w1", "model.layers.42.block_sparse_moe.experts.186.w1", "model.layers.42.block_sparse_moe.experts.187.w1", "model.layers.42.block_sparse_moe.experts.188.w1", "model.layers.42.block_sparse_moe.experts.189.w1", "model.layers.42.block_sparse_moe.experts.190.w1", "model.layers.42.block_sparse_moe.experts.191.w1", "model.layers.42.block_sparse_moe.experts.192.w1", "model.layers.42.block_sparse_moe.experts.193.w1", "model.layers.42.block_sparse_moe.experts.194.w1", "model.layers.42.block_sparse_moe.experts.195.w1", "model.layers.42.block_sparse_moe.experts.196.w1", "model.layers.42.block_sparse_moe.experts.197.w1", "model.layers.42.block_sparse_moe.experts.198.w1", "model.layers.42.block_sparse_moe.experts.199.w1", "model.layers.42.block_sparse_moe.experts.200.w1", "model.layers.42.block_sparse_moe.experts.201.w1", "model.layers.42.block_sparse_moe.experts.202.w1", "model.layers.42.block_sparse_moe.experts.203.w1", "model.layers.42.block_sparse_moe.experts.204.w1", "model.layers.42.block_sparse_moe.experts.205.w1", "model.layers.42.block_sparse_moe.experts.206.w1", "model.layers.42.block_sparse_moe.experts.207.w1", "model.layers.42.block_sparse_moe.experts.208.w1", "model.layers.42.block_sparse_moe.experts.209.w1", "model.layers.42.block_sparse_moe.experts.210.w1", "model.layers.42.block_sparse_moe.experts.211.w1", "model.layers.42.block_sparse_moe.experts.212.w1", "model.layers.42.block_sparse_moe.experts.213.w1", "model.layers.42.block_sparse_moe.experts.214.w1", "model.layers.42.block_sparse_moe.experts.215.w1", "model.layers.42.block_sparse_moe.experts.216.w1", "model.layers.42.block_sparse_moe.experts.217.w1", "model.layers.42.block_sparse_moe.experts.218.w1", "model.layers.42.block_sparse_moe.experts.219.w1", "model.layers.42.block_sparse_moe.experts.220.w1", "model.layers.42.block_sparse_moe.experts.221.w1", "model.layers.42.block_sparse_moe.experts.222.w1", "model.layers.42.block_sparse_moe.experts.223.w1", "model.layers.42.block_sparse_moe.experts.224.w1", "model.layers.42.block_sparse_moe.experts.225.w1", "model.layers.42.block_sparse_moe.experts.226.w1", "model.layers.42.block_sparse_moe.experts.227.w1", "model.layers.42.block_sparse_moe.experts.228.w1", "model.layers.42.block_sparse_moe.experts.229.w1", "model.layers.42.block_sparse_moe.experts.230.w1", "model.layers.42.block_sparse_moe.experts.231.w1", "model.layers.42.block_sparse_moe.experts.232.w1", "model.layers.42.block_sparse_moe.experts.233.w1", "model.layers.42.block_sparse_moe.experts.234.w1", "model.layers.42.block_sparse_moe.experts.235.w1", "model.layers.42.block_sparse_moe.experts.236.w1", "model.layers.42.block_sparse_moe.experts.237.w1", "model.layers.42.block_sparse_moe.experts.238.w1", "model.layers.42.block_sparse_moe.experts.239.w1", "model.layers.42.block_sparse_moe.experts.240.w1", "model.layers.42.block_sparse_moe.experts.241.w1", "model.layers.42.block_sparse_moe.experts.242.w1", "model.layers.42.block_sparse_moe.experts.243.w1", "model.layers.42.block_sparse_moe.experts.244.w1", "model.layers.42.block_sparse_moe.experts.245.w1", "model.layers.42.block_sparse_moe.experts.246.w1", "model.layers.42.block_sparse_moe.experts.247.w1", "model.layers.42.block_sparse_moe.experts.248.w1", "model.layers.42.block_sparse_moe.experts.249.w1", "model.layers.42.block_sparse_moe.experts.250.w1", "model.layers.42.block_sparse_moe.experts.251.w1", "model.layers.42.block_sparse_moe.experts.252.w1", "model.layers.42.block_sparse_moe.experts.253.w1", "model.layers.42.block_sparse_moe.experts.254.w1", "model.layers.42.block_sparse_moe.experts.255.w1", "model.layers.42.block_sparse_moe.experts.0.w3", "model.layers.42.block_sparse_moe.experts.1.w3", "model.layers.42.block_sparse_moe.experts.2.w3", "model.layers.42.block_sparse_moe.experts.3.w3", "model.layers.42.block_sparse_moe.experts.4.w3", "model.layers.42.block_sparse_moe.experts.5.w3", "model.layers.42.block_sparse_moe.experts.6.w3", "model.layers.42.block_sparse_moe.experts.7.w3", "model.layers.42.block_sparse_moe.experts.8.w3", "model.layers.42.block_sparse_moe.experts.9.w3", "model.layers.42.block_sparse_moe.experts.10.w3", "model.layers.42.block_sparse_moe.experts.11.w3", "model.layers.42.block_sparse_moe.experts.12.w3", "model.layers.42.block_sparse_moe.experts.13.w3", "model.layers.42.block_sparse_moe.experts.14.w3", "model.layers.42.block_sparse_moe.experts.15.w3", "model.layers.42.block_sparse_moe.experts.16.w3", "model.layers.42.block_sparse_moe.experts.17.w3", "model.layers.42.block_sparse_moe.experts.18.w3", "model.layers.42.block_sparse_moe.experts.19.w3", "model.layers.42.block_sparse_moe.experts.20.w3", "model.layers.42.block_sparse_moe.experts.21.w3", "model.layers.42.block_sparse_moe.experts.22.w3", "model.layers.42.block_sparse_moe.experts.23.w3", "model.layers.42.block_sparse_moe.experts.24.w3", "model.layers.42.block_sparse_moe.experts.25.w3", "model.layers.42.block_sparse_moe.experts.26.w3", "model.layers.42.block_sparse_moe.experts.27.w3", "model.layers.42.block_sparse_moe.experts.28.w3", "model.layers.42.block_sparse_moe.experts.29.w3", "model.layers.42.block_sparse_moe.experts.30.w3", "model.layers.42.block_sparse_moe.experts.31.w3", "model.layers.42.block_sparse_moe.experts.32.w3", "model.layers.42.block_sparse_moe.experts.33.w3", "model.layers.42.block_sparse_moe.experts.34.w3", "model.layers.42.block_sparse_moe.experts.35.w3", "model.layers.42.block_sparse_moe.experts.36.w3", "model.layers.42.block_sparse_moe.experts.37.w3", "model.layers.42.block_sparse_moe.experts.38.w3", "model.layers.42.block_sparse_moe.experts.39.w3", "model.layers.42.block_sparse_moe.experts.40.w3", "model.layers.42.block_sparse_moe.experts.41.w3", "model.layers.42.block_sparse_moe.experts.42.w3", "model.layers.42.block_sparse_moe.experts.43.w3", "model.layers.42.block_sparse_moe.experts.44.w3", "model.layers.42.block_sparse_moe.experts.45.w3", "model.layers.42.block_sparse_moe.experts.46.w3", "model.layers.42.block_sparse_moe.experts.47.w3", "model.layers.42.block_sparse_moe.experts.48.w3", "model.layers.42.block_sparse_moe.experts.49.w3", "model.layers.42.block_sparse_moe.experts.50.w3", "model.layers.42.block_sparse_moe.experts.51.w3", "model.layers.42.block_sparse_moe.experts.52.w3", "model.layers.42.block_sparse_moe.experts.53.w3", "model.layers.42.block_sparse_moe.experts.54.w3", "model.layers.42.block_sparse_moe.experts.55.w3", "model.layers.42.block_sparse_moe.experts.56.w3", "model.layers.42.block_sparse_moe.experts.57.w3", "model.layers.42.block_sparse_moe.experts.58.w3", "model.layers.42.block_sparse_moe.experts.59.w3", "model.layers.42.block_sparse_moe.experts.60.w3", "model.layers.42.block_sparse_moe.experts.61.w3", "model.layers.42.block_sparse_moe.experts.62.w3", "model.layers.42.block_sparse_moe.experts.63.w3", "model.layers.42.block_sparse_moe.experts.64.w3", "model.layers.42.block_sparse_moe.experts.65.w3", "model.layers.42.block_sparse_moe.experts.66.w3", "model.layers.42.block_sparse_moe.experts.67.w3", "model.layers.42.block_sparse_moe.experts.68.w3", "model.layers.42.block_sparse_moe.experts.69.w3", "model.layers.42.block_sparse_moe.experts.70.w3", "model.layers.42.block_sparse_moe.experts.71.w3", "model.layers.42.block_sparse_moe.experts.72.w3", "model.layers.42.block_sparse_moe.experts.73.w3", "model.layers.42.block_sparse_moe.experts.74.w3", "model.layers.42.block_sparse_moe.experts.75.w3", "model.layers.42.block_sparse_moe.experts.76.w3", "model.layers.42.block_sparse_moe.experts.77.w3", "model.layers.42.block_sparse_moe.experts.78.w3", "model.layers.42.block_sparse_moe.experts.79.w3", "model.layers.42.block_sparse_moe.experts.80.w3", "model.layers.42.block_sparse_moe.experts.81.w3", "model.layers.42.block_sparse_moe.experts.82.w3", "model.layers.42.block_sparse_moe.experts.83.w3", "model.layers.42.block_sparse_moe.experts.84.w3", "model.layers.42.block_sparse_moe.experts.85.w3", "model.layers.42.block_sparse_moe.experts.86.w3", "model.layers.42.block_sparse_moe.experts.87.w3", "model.layers.42.block_sparse_moe.experts.88.w3", "model.layers.42.block_sparse_moe.experts.89.w3", "model.layers.42.block_sparse_moe.experts.90.w3", "model.layers.42.block_sparse_moe.experts.91.w3", "model.layers.42.block_sparse_moe.experts.92.w3", "model.layers.42.block_sparse_moe.experts.93.w3", "model.layers.42.block_sparse_moe.experts.94.w3", "model.layers.42.block_sparse_moe.experts.95.w3", "model.layers.42.block_sparse_moe.experts.96.w3", "model.layers.42.block_sparse_moe.experts.97.w3", "model.layers.42.block_sparse_moe.experts.98.w3", "model.layers.42.block_sparse_moe.experts.99.w3", "model.layers.42.block_sparse_moe.experts.100.w3", "model.layers.42.block_sparse_moe.experts.101.w3", "model.layers.42.block_sparse_moe.experts.102.w3", "model.layers.42.block_sparse_moe.experts.103.w3", "model.layers.42.block_sparse_moe.experts.104.w3", "model.layers.42.block_sparse_moe.experts.105.w3", "model.layers.42.block_sparse_moe.experts.106.w3", "model.layers.42.block_sparse_moe.experts.107.w3", "model.layers.42.block_sparse_moe.experts.108.w3", "model.layers.42.block_sparse_moe.experts.109.w3", "model.layers.42.block_sparse_moe.experts.110.w3", "model.layers.42.block_sparse_moe.experts.111.w3", "model.layers.42.block_sparse_moe.experts.112.w3", "model.layers.42.block_sparse_moe.experts.113.w3", "model.layers.42.block_sparse_moe.experts.114.w3", "model.layers.42.block_sparse_moe.experts.115.w3", "model.layers.42.block_sparse_moe.experts.116.w3", "model.layers.42.block_sparse_moe.experts.117.w3", "model.layers.42.block_sparse_moe.experts.118.w3", "model.layers.42.block_sparse_moe.experts.119.w3", "model.layers.42.block_sparse_moe.experts.120.w3", "model.layers.42.block_sparse_moe.experts.121.w3", "model.layers.42.block_sparse_moe.experts.122.w3", "model.layers.42.block_sparse_moe.experts.123.w3", "model.layers.42.block_sparse_moe.experts.124.w3", "model.layers.42.block_sparse_moe.experts.125.w3", "model.layers.42.block_sparse_moe.experts.126.w3", "model.layers.42.block_sparse_moe.experts.127.w3", "model.layers.42.block_sparse_moe.experts.128.w3", "model.layers.42.block_sparse_moe.experts.129.w3", "model.layers.42.block_sparse_moe.experts.130.w3", "model.layers.42.block_sparse_moe.experts.131.w3", "model.layers.42.block_sparse_moe.experts.132.w3", "model.layers.42.block_sparse_moe.experts.133.w3", "model.layers.42.block_sparse_moe.experts.134.w3", "model.layers.42.block_sparse_moe.experts.135.w3", "model.layers.42.block_sparse_moe.experts.136.w3", "model.layers.42.block_sparse_moe.experts.137.w3", "model.layers.42.block_sparse_moe.experts.138.w3", "model.layers.42.block_sparse_moe.experts.139.w3", "model.layers.42.block_sparse_moe.experts.140.w3", "model.layers.42.block_sparse_moe.experts.141.w3", "model.layers.42.block_sparse_moe.experts.142.w3", "model.layers.42.block_sparse_moe.experts.143.w3", "model.layers.42.block_sparse_moe.experts.144.w3", "model.layers.42.block_sparse_moe.experts.145.w3", "model.layers.42.block_sparse_moe.experts.146.w3", "model.layers.42.block_sparse_moe.experts.147.w3", "model.layers.42.block_sparse_moe.experts.148.w3", "model.layers.42.block_sparse_moe.experts.149.w3", "model.layers.42.block_sparse_moe.experts.150.w3", "model.layers.42.block_sparse_moe.experts.151.w3", "model.layers.42.block_sparse_moe.experts.152.w3", "model.layers.42.block_sparse_moe.experts.153.w3", "model.layers.42.block_sparse_moe.experts.154.w3", "model.layers.42.block_sparse_moe.experts.155.w3", "model.layers.42.block_sparse_moe.experts.156.w3", "model.layers.42.block_sparse_moe.experts.157.w3", "model.layers.42.block_sparse_moe.experts.158.w3", "model.layers.42.block_sparse_moe.experts.159.w3", "model.layers.42.block_sparse_moe.experts.160.w3", "model.layers.42.block_sparse_moe.experts.161.w3", "model.layers.42.block_sparse_moe.experts.162.w3", "model.layers.42.block_sparse_moe.experts.163.w3", "model.layers.42.block_sparse_moe.experts.164.w3", "model.layers.42.block_sparse_moe.experts.165.w3", "model.layers.42.block_sparse_moe.experts.166.w3", "model.layers.42.block_sparse_moe.experts.167.w3", "model.layers.42.block_sparse_moe.experts.168.w3", "model.layers.42.block_sparse_moe.experts.169.w3", "model.layers.42.block_sparse_moe.experts.170.w3", "model.layers.42.block_sparse_moe.experts.171.w3", "model.layers.42.block_sparse_moe.experts.172.w3", "model.layers.42.block_sparse_moe.experts.173.w3", "model.layers.42.block_sparse_moe.experts.174.w3", "model.layers.42.block_sparse_moe.experts.175.w3", "model.layers.42.block_sparse_moe.experts.176.w3", "model.layers.42.block_sparse_moe.experts.177.w3", "model.layers.42.block_sparse_moe.experts.178.w3", "model.layers.42.block_sparse_moe.experts.179.w3", "model.layers.42.block_sparse_moe.experts.180.w3", "model.layers.42.block_sparse_moe.experts.181.w3", "model.layers.42.block_sparse_moe.experts.182.w3", "model.layers.42.block_sparse_moe.experts.183.w3", "model.layers.42.block_sparse_moe.experts.184.w3", "model.layers.42.block_sparse_moe.experts.185.w3", "model.layers.42.block_sparse_moe.experts.186.w3", "model.layers.42.block_sparse_moe.experts.187.w3", "model.layers.42.block_sparse_moe.experts.188.w3", "model.layers.42.block_sparse_moe.experts.189.w3", "model.layers.42.block_sparse_moe.experts.190.w3", "model.layers.42.block_sparse_moe.experts.191.w3", "model.layers.42.block_sparse_moe.experts.192.w3", "model.layers.42.block_sparse_moe.experts.193.w3", "model.layers.42.block_sparse_moe.experts.194.w3", "model.layers.42.block_sparse_moe.experts.195.w3", "model.layers.42.block_sparse_moe.experts.196.w3", "model.layers.42.block_sparse_moe.experts.197.w3", "model.layers.42.block_sparse_moe.experts.198.w3", "model.layers.42.block_sparse_moe.experts.199.w3", "model.layers.42.block_sparse_moe.experts.200.w3", "model.layers.42.block_sparse_moe.experts.201.w3", "model.layers.42.block_sparse_moe.experts.202.w3", "model.layers.42.block_sparse_moe.experts.203.w3", "model.layers.42.block_sparse_moe.experts.204.w3", "model.layers.42.block_sparse_moe.experts.205.w3", "model.layers.42.block_sparse_moe.experts.206.w3", "model.layers.42.block_sparse_moe.experts.207.w3", "model.layers.42.block_sparse_moe.experts.208.w3", "model.layers.42.block_sparse_moe.experts.209.w3", "model.layers.42.block_sparse_moe.experts.210.w3", "model.layers.42.block_sparse_moe.experts.211.w3", "model.layers.42.block_sparse_moe.experts.212.w3", "model.layers.42.block_sparse_moe.experts.213.w3", "model.layers.42.block_sparse_moe.experts.214.w3", "model.layers.42.block_sparse_moe.experts.215.w3", "model.layers.42.block_sparse_moe.experts.216.w3", "model.layers.42.block_sparse_moe.experts.217.w3", "model.layers.42.block_sparse_moe.experts.218.w3", "model.layers.42.block_sparse_moe.experts.219.w3", "model.layers.42.block_sparse_moe.experts.220.w3", "model.layers.42.block_sparse_moe.experts.221.w3", "model.layers.42.block_sparse_moe.experts.222.w3", "model.layers.42.block_sparse_moe.experts.223.w3", "model.layers.42.block_sparse_moe.experts.224.w3", "model.layers.42.block_sparse_moe.experts.225.w3", "model.layers.42.block_sparse_moe.experts.226.w3", "model.layers.42.block_sparse_moe.experts.227.w3", "model.layers.42.block_sparse_moe.experts.228.w3", "model.layers.42.block_sparse_moe.experts.229.w3", "model.layers.42.block_sparse_moe.experts.230.w3", "model.layers.42.block_sparse_moe.experts.231.w3", "model.layers.42.block_sparse_moe.experts.232.w3", "model.layers.42.block_sparse_moe.experts.233.w3", "model.layers.42.block_sparse_moe.experts.234.w3", "model.layers.42.block_sparse_moe.experts.235.w3", "model.layers.42.block_sparse_moe.experts.236.w3", "model.layers.42.block_sparse_moe.experts.237.w3", "model.layers.42.block_sparse_moe.experts.238.w3", "model.layers.42.block_sparse_moe.experts.239.w3", "model.layers.42.block_sparse_moe.experts.240.w3", "model.layers.42.block_sparse_moe.experts.241.w3", "model.layers.42.block_sparse_moe.experts.242.w3", "model.layers.42.block_sparse_moe.experts.243.w3", "model.layers.42.block_sparse_moe.experts.244.w3", "model.layers.42.block_sparse_moe.experts.245.w3", "model.layers.42.block_sparse_moe.experts.246.w3", "model.layers.42.block_sparse_moe.experts.247.w3", "model.layers.42.block_sparse_moe.experts.248.w3", "model.layers.42.block_sparse_moe.experts.249.w3", "model.layers.42.block_sparse_moe.experts.250.w3", "model.layers.42.block_sparse_moe.experts.251.w3", "model.layers.42.block_sparse_moe.experts.252.w3", "model.layers.42.block_sparse_moe.experts.253.w3", "model.layers.42.block_sparse_moe.experts.254.w3", "model.layers.42.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 7.172711193559334e-05, "dbits": 2415919104 } ] }, { "idx": 214, "layers": [ "model.layers.42.block_sparse_moe.experts.0.w2", "model.layers.42.block_sparse_moe.experts.1.w2", "model.layers.42.block_sparse_moe.experts.2.w2", "model.layers.42.block_sparse_moe.experts.3.w2", "model.layers.42.block_sparse_moe.experts.4.w2", "model.layers.42.block_sparse_moe.experts.5.w2", "model.layers.42.block_sparse_moe.experts.6.w2", "model.layers.42.block_sparse_moe.experts.7.w2", "model.layers.42.block_sparse_moe.experts.8.w2", "model.layers.42.block_sparse_moe.experts.9.w2", "model.layers.42.block_sparse_moe.experts.10.w2", "model.layers.42.block_sparse_moe.experts.11.w2", "model.layers.42.block_sparse_moe.experts.12.w2", "model.layers.42.block_sparse_moe.experts.13.w2", "model.layers.42.block_sparse_moe.experts.14.w2", "model.layers.42.block_sparse_moe.experts.15.w2", "model.layers.42.block_sparse_moe.experts.16.w2", "model.layers.42.block_sparse_moe.experts.17.w2", "model.layers.42.block_sparse_moe.experts.18.w2", "model.layers.42.block_sparse_moe.experts.19.w2", "model.layers.42.block_sparse_moe.experts.20.w2", "model.layers.42.block_sparse_moe.experts.21.w2", "model.layers.42.block_sparse_moe.experts.22.w2", "model.layers.42.block_sparse_moe.experts.23.w2", "model.layers.42.block_sparse_moe.experts.24.w2", "model.layers.42.block_sparse_moe.experts.25.w2", "model.layers.42.block_sparse_moe.experts.26.w2", "model.layers.42.block_sparse_moe.experts.27.w2", "model.layers.42.block_sparse_moe.experts.28.w2", "model.layers.42.block_sparse_moe.experts.29.w2", "model.layers.42.block_sparse_moe.experts.30.w2", "model.layers.42.block_sparse_moe.experts.31.w2", "model.layers.42.block_sparse_moe.experts.32.w2", "model.layers.42.block_sparse_moe.experts.33.w2", "model.layers.42.block_sparse_moe.experts.34.w2", "model.layers.42.block_sparse_moe.experts.35.w2", "model.layers.42.block_sparse_moe.experts.36.w2", "model.layers.42.block_sparse_moe.experts.37.w2", "model.layers.42.block_sparse_moe.experts.38.w2", "model.layers.42.block_sparse_moe.experts.39.w2", "model.layers.42.block_sparse_moe.experts.40.w2", "model.layers.42.block_sparse_moe.experts.41.w2", "model.layers.42.block_sparse_moe.experts.42.w2", "model.layers.42.block_sparse_moe.experts.43.w2", "model.layers.42.block_sparse_moe.experts.44.w2", "model.layers.42.block_sparse_moe.experts.45.w2", "model.layers.42.block_sparse_moe.experts.46.w2", "model.layers.42.block_sparse_moe.experts.47.w2", "model.layers.42.block_sparse_moe.experts.48.w2", "model.layers.42.block_sparse_moe.experts.49.w2", "model.layers.42.block_sparse_moe.experts.50.w2", "model.layers.42.block_sparse_moe.experts.51.w2", "model.layers.42.block_sparse_moe.experts.52.w2", "model.layers.42.block_sparse_moe.experts.53.w2", "model.layers.42.block_sparse_moe.experts.54.w2", "model.layers.42.block_sparse_moe.experts.55.w2", "model.layers.42.block_sparse_moe.experts.56.w2", "model.layers.42.block_sparse_moe.experts.57.w2", "model.layers.42.block_sparse_moe.experts.58.w2", "model.layers.42.block_sparse_moe.experts.59.w2", "model.layers.42.block_sparse_moe.experts.60.w2", "model.layers.42.block_sparse_moe.experts.61.w2", "model.layers.42.block_sparse_moe.experts.62.w2", "model.layers.42.block_sparse_moe.experts.63.w2", "model.layers.42.block_sparse_moe.experts.64.w2", "model.layers.42.block_sparse_moe.experts.65.w2", "model.layers.42.block_sparse_moe.experts.66.w2", "model.layers.42.block_sparse_moe.experts.67.w2", "model.layers.42.block_sparse_moe.experts.68.w2", "model.layers.42.block_sparse_moe.experts.69.w2", "model.layers.42.block_sparse_moe.experts.70.w2", "model.layers.42.block_sparse_moe.experts.71.w2", "model.layers.42.block_sparse_moe.experts.72.w2", "model.layers.42.block_sparse_moe.experts.73.w2", "model.layers.42.block_sparse_moe.experts.74.w2", "model.layers.42.block_sparse_moe.experts.75.w2", "model.layers.42.block_sparse_moe.experts.76.w2", "model.layers.42.block_sparse_moe.experts.77.w2", "model.layers.42.block_sparse_moe.experts.78.w2", "model.layers.42.block_sparse_moe.experts.79.w2", "model.layers.42.block_sparse_moe.experts.80.w2", "model.layers.42.block_sparse_moe.experts.81.w2", "model.layers.42.block_sparse_moe.experts.82.w2", "model.layers.42.block_sparse_moe.experts.83.w2", "model.layers.42.block_sparse_moe.experts.84.w2", "model.layers.42.block_sparse_moe.experts.85.w2", "model.layers.42.block_sparse_moe.experts.86.w2", "model.layers.42.block_sparse_moe.experts.87.w2", "model.layers.42.block_sparse_moe.experts.88.w2", "model.layers.42.block_sparse_moe.experts.89.w2", "model.layers.42.block_sparse_moe.experts.90.w2", "model.layers.42.block_sparse_moe.experts.91.w2", "model.layers.42.block_sparse_moe.experts.92.w2", "model.layers.42.block_sparse_moe.experts.93.w2", "model.layers.42.block_sparse_moe.experts.94.w2", "model.layers.42.block_sparse_moe.experts.95.w2", "model.layers.42.block_sparse_moe.experts.96.w2", "model.layers.42.block_sparse_moe.experts.97.w2", "model.layers.42.block_sparse_moe.experts.98.w2", "model.layers.42.block_sparse_moe.experts.99.w2", "model.layers.42.block_sparse_moe.experts.100.w2", "model.layers.42.block_sparse_moe.experts.101.w2", "model.layers.42.block_sparse_moe.experts.102.w2", "model.layers.42.block_sparse_moe.experts.103.w2", "model.layers.42.block_sparse_moe.experts.104.w2", "model.layers.42.block_sparse_moe.experts.105.w2", "model.layers.42.block_sparse_moe.experts.106.w2", "model.layers.42.block_sparse_moe.experts.107.w2", "model.layers.42.block_sparse_moe.experts.108.w2", "model.layers.42.block_sparse_moe.experts.109.w2", "model.layers.42.block_sparse_moe.experts.110.w2", "model.layers.42.block_sparse_moe.experts.111.w2", "model.layers.42.block_sparse_moe.experts.112.w2", "model.layers.42.block_sparse_moe.experts.113.w2", "model.layers.42.block_sparse_moe.experts.114.w2", "model.layers.42.block_sparse_moe.experts.115.w2", "model.layers.42.block_sparse_moe.experts.116.w2", "model.layers.42.block_sparse_moe.experts.117.w2", "model.layers.42.block_sparse_moe.experts.118.w2", "model.layers.42.block_sparse_moe.experts.119.w2", "model.layers.42.block_sparse_moe.experts.120.w2", "model.layers.42.block_sparse_moe.experts.121.w2", "model.layers.42.block_sparse_moe.experts.122.w2", "model.layers.42.block_sparse_moe.experts.123.w2", "model.layers.42.block_sparse_moe.experts.124.w2", "model.layers.42.block_sparse_moe.experts.125.w2", "model.layers.42.block_sparse_moe.experts.126.w2", "model.layers.42.block_sparse_moe.experts.127.w2", "model.layers.42.block_sparse_moe.experts.128.w2", "model.layers.42.block_sparse_moe.experts.129.w2", "model.layers.42.block_sparse_moe.experts.130.w2", "model.layers.42.block_sparse_moe.experts.131.w2", "model.layers.42.block_sparse_moe.experts.132.w2", "model.layers.42.block_sparse_moe.experts.133.w2", "model.layers.42.block_sparse_moe.experts.134.w2", "model.layers.42.block_sparse_moe.experts.135.w2", "model.layers.42.block_sparse_moe.experts.136.w2", "model.layers.42.block_sparse_moe.experts.137.w2", "model.layers.42.block_sparse_moe.experts.138.w2", "model.layers.42.block_sparse_moe.experts.139.w2", "model.layers.42.block_sparse_moe.experts.140.w2", "model.layers.42.block_sparse_moe.experts.141.w2", "model.layers.42.block_sparse_moe.experts.142.w2", "model.layers.42.block_sparse_moe.experts.143.w2", "model.layers.42.block_sparse_moe.experts.144.w2", "model.layers.42.block_sparse_moe.experts.145.w2", "model.layers.42.block_sparse_moe.experts.146.w2", "model.layers.42.block_sparse_moe.experts.147.w2", "model.layers.42.block_sparse_moe.experts.148.w2", "model.layers.42.block_sparse_moe.experts.149.w2", "model.layers.42.block_sparse_moe.experts.150.w2", "model.layers.42.block_sparse_moe.experts.151.w2", "model.layers.42.block_sparse_moe.experts.152.w2", "model.layers.42.block_sparse_moe.experts.153.w2", "model.layers.42.block_sparse_moe.experts.154.w2", "model.layers.42.block_sparse_moe.experts.155.w2", "model.layers.42.block_sparse_moe.experts.156.w2", "model.layers.42.block_sparse_moe.experts.157.w2", "model.layers.42.block_sparse_moe.experts.158.w2", "model.layers.42.block_sparse_moe.experts.159.w2", "model.layers.42.block_sparse_moe.experts.160.w2", "model.layers.42.block_sparse_moe.experts.161.w2", "model.layers.42.block_sparse_moe.experts.162.w2", "model.layers.42.block_sparse_moe.experts.163.w2", "model.layers.42.block_sparse_moe.experts.164.w2", "model.layers.42.block_sparse_moe.experts.165.w2", "model.layers.42.block_sparse_moe.experts.166.w2", "model.layers.42.block_sparse_moe.experts.167.w2", "model.layers.42.block_sparse_moe.experts.168.w2", "model.layers.42.block_sparse_moe.experts.169.w2", "model.layers.42.block_sparse_moe.experts.170.w2", "model.layers.42.block_sparse_moe.experts.171.w2", "model.layers.42.block_sparse_moe.experts.172.w2", "model.layers.42.block_sparse_moe.experts.173.w2", "model.layers.42.block_sparse_moe.experts.174.w2", "model.layers.42.block_sparse_moe.experts.175.w2", "model.layers.42.block_sparse_moe.experts.176.w2", "model.layers.42.block_sparse_moe.experts.177.w2", "model.layers.42.block_sparse_moe.experts.178.w2", "model.layers.42.block_sparse_moe.experts.179.w2", "model.layers.42.block_sparse_moe.experts.180.w2", "model.layers.42.block_sparse_moe.experts.181.w2", "model.layers.42.block_sparse_moe.experts.182.w2", "model.layers.42.block_sparse_moe.experts.183.w2", "model.layers.42.block_sparse_moe.experts.184.w2", "model.layers.42.block_sparse_moe.experts.185.w2", "model.layers.42.block_sparse_moe.experts.186.w2", "model.layers.42.block_sparse_moe.experts.187.w2", "model.layers.42.block_sparse_moe.experts.188.w2", "model.layers.42.block_sparse_moe.experts.189.w2", "model.layers.42.block_sparse_moe.experts.190.w2", "model.layers.42.block_sparse_moe.experts.191.w2", "model.layers.42.block_sparse_moe.experts.192.w2", "model.layers.42.block_sparse_moe.experts.193.w2", "model.layers.42.block_sparse_moe.experts.194.w2", "model.layers.42.block_sparse_moe.experts.195.w2", "model.layers.42.block_sparse_moe.experts.196.w2", "model.layers.42.block_sparse_moe.experts.197.w2", "model.layers.42.block_sparse_moe.experts.198.w2", "model.layers.42.block_sparse_moe.experts.199.w2", "model.layers.42.block_sparse_moe.experts.200.w2", "model.layers.42.block_sparse_moe.experts.201.w2", "model.layers.42.block_sparse_moe.experts.202.w2", "model.layers.42.block_sparse_moe.experts.203.w2", "model.layers.42.block_sparse_moe.experts.204.w2", "model.layers.42.block_sparse_moe.experts.205.w2", "model.layers.42.block_sparse_moe.experts.206.w2", "model.layers.42.block_sparse_moe.experts.207.w2", "model.layers.42.block_sparse_moe.experts.208.w2", "model.layers.42.block_sparse_moe.experts.209.w2", "model.layers.42.block_sparse_moe.experts.210.w2", "model.layers.42.block_sparse_moe.experts.211.w2", "model.layers.42.block_sparse_moe.experts.212.w2", "model.layers.42.block_sparse_moe.experts.213.w2", "model.layers.42.block_sparse_moe.experts.214.w2", "model.layers.42.block_sparse_moe.experts.215.w2", "model.layers.42.block_sparse_moe.experts.216.w2", "model.layers.42.block_sparse_moe.experts.217.w2", "model.layers.42.block_sparse_moe.experts.218.w2", "model.layers.42.block_sparse_moe.experts.219.w2", "model.layers.42.block_sparse_moe.experts.220.w2", "model.layers.42.block_sparse_moe.experts.221.w2", "model.layers.42.block_sparse_moe.experts.222.w2", "model.layers.42.block_sparse_moe.experts.223.w2", "model.layers.42.block_sparse_moe.experts.224.w2", "model.layers.42.block_sparse_moe.experts.225.w2", "model.layers.42.block_sparse_moe.experts.226.w2", "model.layers.42.block_sparse_moe.experts.227.w2", "model.layers.42.block_sparse_moe.experts.228.w2", "model.layers.42.block_sparse_moe.experts.229.w2", "model.layers.42.block_sparse_moe.experts.230.w2", "model.layers.42.block_sparse_moe.experts.231.w2", "model.layers.42.block_sparse_moe.experts.232.w2", "model.layers.42.block_sparse_moe.experts.233.w2", "model.layers.42.block_sparse_moe.experts.234.w2", "model.layers.42.block_sparse_moe.experts.235.w2", "model.layers.42.block_sparse_moe.experts.236.w2", "model.layers.42.block_sparse_moe.experts.237.w2", "model.layers.42.block_sparse_moe.experts.238.w2", "model.layers.42.block_sparse_moe.experts.239.w2", "model.layers.42.block_sparse_moe.experts.240.w2", "model.layers.42.block_sparse_moe.experts.241.w2", "model.layers.42.block_sparse_moe.experts.242.w2", "model.layers.42.block_sparse_moe.experts.243.w2", "model.layers.42.block_sparse_moe.experts.244.w2", "model.layers.42.block_sparse_moe.experts.245.w2", "model.layers.42.block_sparse_moe.experts.246.w2", "model.layers.42.block_sparse_moe.experts.247.w2", "model.layers.42.block_sparse_moe.experts.248.w2", "model.layers.42.block_sparse_moe.experts.249.w2", "model.layers.42.block_sparse_moe.experts.250.w2", "model.layers.42.block_sparse_moe.experts.251.w2", "model.layers.42.block_sparse_moe.experts.252.w2", "model.layers.42.block_sparse_moe.experts.253.w2", "model.layers.42.block_sparse_moe.experts.254.w2", "model.layers.42.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0003477025777101628, "dbits": 1207959552 } ] }, { "idx": 215, "layers": [ "model.layers.43.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0006179295480251423, "dbits": 18874368 } ] }, { "idx": 216, "layers": [ "model.layers.43.self_attn.k_proj", "model.layers.43.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0038718204945326073, "dbits": 6291456 } ] }, { "idx": 217, "layers": [ "model.layers.43.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0003141634166241025, "dbits": 18874368 } ] }, { "idx": 218, "layers": [ "model.layers.43.block_sparse_moe.experts.0.w1", "model.layers.43.block_sparse_moe.experts.1.w1", "model.layers.43.block_sparse_moe.experts.2.w1", "model.layers.43.block_sparse_moe.experts.3.w1", "model.layers.43.block_sparse_moe.experts.4.w1", "model.layers.43.block_sparse_moe.experts.5.w1", "model.layers.43.block_sparse_moe.experts.6.w1", "model.layers.43.block_sparse_moe.experts.7.w1", "model.layers.43.block_sparse_moe.experts.8.w1", "model.layers.43.block_sparse_moe.experts.9.w1", "model.layers.43.block_sparse_moe.experts.10.w1", "model.layers.43.block_sparse_moe.experts.11.w1", "model.layers.43.block_sparse_moe.experts.12.w1", "model.layers.43.block_sparse_moe.experts.13.w1", "model.layers.43.block_sparse_moe.experts.14.w1", "model.layers.43.block_sparse_moe.experts.15.w1", "model.layers.43.block_sparse_moe.experts.16.w1", "model.layers.43.block_sparse_moe.experts.17.w1", "model.layers.43.block_sparse_moe.experts.18.w1", "model.layers.43.block_sparse_moe.experts.19.w1", "model.layers.43.block_sparse_moe.experts.20.w1", "model.layers.43.block_sparse_moe.experts.21.w1", "model.layers.43.block_sparse_moe.experts.22.w1", "model.layers.43.block_sparse_moe.experts.23.w1", "model.layers.43.block_sparse_moe.experts.24.w1", "model.layers.43.block_sparse_moe.experts.25.w1", "model.layers.43.block_sparse_moe.experts.26.w1", "model.layers.43.block_sparse_moe.experts.27.w1", "model.layers.43.block_sparse_moe.experts.28.w1", "model.layers.43.block_sparse_moe.experts.29.w1", "model.layers.43.block_sparse_moe.experts.30.w1", "model.layers.43.block_sparse_moe.experts.31.w1", "model.layers.43.block_sparse_moe.experts.32.w1", "model.layers.43.block_sparse_moe.experts.33.w1", "model.layers.43.block_sparse_moe.experts.34.w1", "model.layers.43.block_sparse_moe.experts.35.w1", "model.layers.43.block_sparse_moe.experts.36.w1", "model.layers.43.block_sparse_moe.experts.37.w1", "model.layers.43.block_sparse_moe.experts.38.w1", "model.layers.43.block_sparse_moe.experts.39.w1", "model.layers.43.block_sparse_moe.experts.40.w1", "model.layers.43.block_sparse_moe.experts.41.w1", "model.layers.43.block_sparse_moe.experts.42.w1", "model.layers.43.block_sparse_moe.experts.43.w1", "model.layers.43.block_sparse_moe.experts.44.w1", "model.layers.43.block_sparse_moe.experts.45.w1", "model.layers.43.block_sparse_moe.experts.46.w1", "model.layers.43.block_sparse_moe.experts.47.w1", "model.layers.43.block_sparse_moe.experts.48.w1", "model.layers.43.block_sparse_moe.experts.49.w1", "model.layers.43.block_sparse_moe.experts.50.w1", "model.layers.43.block_sparse_moe.experts.51.w1", "model.layers.43.block_sparse_moe.experts.52.w1", "model.layers.43.block_sparse_moe.experts.53.w1", "model.layers.43.block_sparse_moe.experts.54.w1", "model.layers.43.block_sparse_moe.experts.55.w1", "model.layers.43.block_sparse_moe.experts.56.w1", "model.layers.43.block_sparse_moe.experts.57.w1", "model.layers.43.block_sparse_moe.experts.58.w1", "model.layers.43.block_sparse_moe.experts.59.w1", "model.layers.43.block_sparse_moe.experts.60.w1", "model.layers.43.block_sparse_moe.experts.61.w1", "model.layers.43.block_sparse_moe.experts.62.w1", "model.layers.43.block_sparse_moe.experts.63.w1", "model.layers.43.block_sparse_moe.experts.64.w1", "model.layers.43.block_sparse_moe.experts.65.w1", "model.layers.43.block_sparse_moe.experts.66.w1", "model.layers.43.block_sparse_moe.experts.67.w1", "model.layers.43.block_sparse_moe.experts.68.w1", "model.layers.43.block_sparse_moe.experts.69.w1", "model.layers.43.block_sparse_moe.experts.70.w1", "model.layers.43.block_sparse_moe.experts.71.w1", "model.layers.43.block_sparse_moe.experts.72.w1", "model.layers.43.block_sparse_moe.experts.73.w1", "model.layers.43.block_sparse_moe.experts.74.w1", "model.layers.43.block_sparse_moe.experts.75.w1", "model.layers.43.block_sparse_moe.experts.76.w1", "model.layers.43.block_sparse_moe.experts.77.w1", "model.layers.43.block_sparse_moe.experts.78.w1", "model.layers.43.block_sparse_moe.experts.79.w1", "model.layers.43.block_sparse_moe.experts.80.w1", "model.layers.43.block_sparse_moe.experts.81.w1", "model.layers.43.block_sparse_moe.experts.82.w1", "model.layers.43.block_sparse_moe.experts.83.w1", "model.layers.43.block_sparse_moe.experts.84.w1", "model.layers.43.block_sparse_moe.experts.85.w1", "model.layers.43.block_sparse_moe.experts.86.w1", "model.layers.43.block_sparse_moe.experts.87.w1", "model.layers.43.block_sparse_moe.experts.88.w1", "model.layers.43.block_sparse_moe.experts.89.w1", "model.layers.43.block_sparse_moe.experts.90.w1", "model.layers.43.block_sparse_moe.experts.91.w1", "model.layers.43.block_sparse_moe.experts.92.w1", "model.layers.43.block_sparse_moe.experts.93.w1", "model.layers.43.block_sparse_moe.experts.94.w1", "model.layers.43.block_sparse_moe.experts.95.w1", "model.layers.43.block_sparse_moe.experts.96.w1", "model.layers.43.block_sparse_moe.experts.97.w1", "model.layers.43.block_sparse_moe.experts.98.w1", "model.layers.43.block_sparse_moe.experts.99.w1", "model.layers.43.block_sparse_moe.experts.100.w1", "model.layers.43.block_sparse_moe.experts.101.w1", "model.layers.43.block_sparse_moe.experts.102.w1", "model.layers.43.block_sparse_moe.experts.103.w1", "model.layers.43.block_sparse_moe.experts.104.w1", "model.layers.43.block_sparse_moe.experts.105.w1", "model.layers.43.block_sparse_moe.experts.106.w1", "model.layers.43.block_sparse_moe.experts.107.w1", "model.layers.43.block_sparse_moe.experts.108.w1", "model.layers.43.block_sparse_moe.experts.109.w1", "model.layers.43.block_sparse_moe.experts.110.w1", "model.layers.43.block_sparse_moe.experts.111.w1", "model.layers.43.block_sparse_moe.experts.112.w1", "model.layers.43.block_sparse_moe.experts.113.w1", "model.layers.43.block_sparse_moe.experts.114.w1", "model.layers.43.block_sparse_moe.experts.115.w1", "model.layers.43.block_sparse_moe.experts.116.w1", "model.layers.43.block_sparse_moe.experts.117.w1", "model.layers.43.block_sparse_moe.experts.118.w1", "model.layers.43.block_sparse_moe.experts.119.w1", "model.layers.43.block_sparse_moe.experts.120.w1", "model.layers.43.block_sparse_moe.experts.121.w1", "model.layers.43.block_sparse_moe.experts.122.w1", "model.layers.43.block_sparse_moe.experts.123.w1", "model.layers.43.block_sparse_moe.experts.124.w1", "model.layers.43.block_sparse_moe.experts.125.w1", "model.layers.43.block_sparse_moe.experts.126.w1", "model.layers.43.block_sparse_moe.experts.127.w1", "model.layers.43.block_sparse_moe.experts.128.w1", "model.layers.43.block_sparse_moe.experts.129.w1", "model.layers.43.block_sparse_moe.experts.130.w1", "model.layers.43.block_sparse_moe.experts.131.w1", "model.layers.43.block_sparse_moe.experts.132.w1", "model.layers.43.block_sparse_moe.experts.133.w1", "model.layers.43.block_sparse_moe.experts.134.w1", "model.layers.43.block_sparse_moe.experts.135.w1", "model.layers.43.block_sparse_moe.experts.136.w1", "model.layers.43.block_sparse_moe.experts.137.w1", "model.layers.43.block_sparse_moe.experts.138.w1", "model.layers.43.block_sparse_moe.experts.139.w1", "model.layers.43.block_sparse_moe.experts.140.w1", "model.layers.43.block_sparse_moe.experts.141.w1", "model.layers.43.block_sparse_moe.experts.142.w1", "model.layers.43.block_sparse_moe.experts.143.w1", "model.layers.43.block_sparse_moe.experts.144.w1", "model.layers.43.block_sparse_moe.experts.145.w1", "model.layers.43.block_sparse_moe.experts.146.w1", "model.layers.43.block_sparse_moe.experts.147.w1", "model.layers.43.block_sparse_moe.experts.148.w1", "model.layers.43.block_sparse_moe.experts.149.w1", "model.layers.43.block_sparse_moe.experts.150.w1", "model.layers.43.block_sparse_moe.experts.151.w1", "model.layers.43.block_sparse_moe.experts.152.w1", "model.layers.43.block_sparse_moe.experts.153.w1", "model.layers.43.block_sparse_moe.experts.154.w1", "model.layers.43.block_sparse_moe.experts.155.w1", "model.layers.43.block_sparse_moe.experts.156.w1", "model.layers.43.block_sparse_moe.experts.157.w1", "model.layers.43.block_sparse_moe.experts.158.w1", "model.layers.43.block_sparse_moe.experts.159.w1", "model.layers.43.block_sparse_moe.experts.160.w1", "model.layers.43.block_sparse_moe.experts.161.w1", "model.layers.43.block_sparse_moe.experts.162.w1", "model.layers.43.block_sparse_moe.experts.163.w1", "model.layers.43.block_sparse_moe.experts.164.w1", "model.layers.43.block_sparse_moe.experts.165.w1", "model.layers.43.block_sparse_moe.experts.166.w1", "model.layers.43.block_sparse_moe.experts.167.w1", "model.layers.43.block_sparse_moe.experts.168.w1", "model.layers.43.block_sparse_moe.experts.169.w1", "model.layers.43.block_sparse_moe.experts.170.w1", "model.layers.43.block_sparse_moe.experts.171.w1", "model.layers.43.block_sparse_moe.experts.172.w1", "model.layers.43.block_sparse_moe.experts.173.w1", "model.layers.43.block_sparse_moe.experts.174.w1", "model.layers.43.block_sparse_moe.experts.175.w1", "model.layers.43.block_sparse_moe.experts.176.w1", "model.layers.43.block_sparse_moe.experts.177.w1", "model.layers.43.block_sparse_moe.experts.178.w1", "model.layers.43.block_sparse_moe.experts.179.w1", "model.layers.43.block_sparse_moe.experts.180.w1", "model.layers.43.block_sparse_moe.experts.181.w1", "model.layers.43.block_sparse_moe.experts.182.w1", "model.layers.43.block_sparse_moe.experts.183.w1", "model.layers.43.block_sparse_moe.experts.184.w1", "model.layers.43.block_sparse_moe.experts.185.w1", "model.layers.43.block_sparse_moe.experts.186.w1", "model.layers.43.block_sparse_moe.experts.187.w1", "model.layers.43.block_sparse_moe.experts.188.w1", "model.layers.43.block_sparse_moe.experts.189.w1", "model.layers.43.block_sparse_moe.experts.190.w1", "model.layers.43.block_sparse_moe.experts.191.w1", "model.layers.43.block_sparse_moe.experts.192.w1", "model.layers.43.block_sparse_moe.experts.193.w1", "model.layers.43.block_sparse_moe.experts.194.w1", "model.layers.43.block_sparse_moe.experts.195.w1", "model.layers.43.block_sparse_moe.experts.196.w1", "model.layers.43.block_sparse_moe.experts.197.w1", "model.layers.43.block_sparse_moe.experts.198.w1", "model.layers.43.block_sparse_moe.experts.199.w1", "model.layers.43.block_sparse_moe.experts.200.w1", "model.layers.43.block_sparse_moe.experts.201.w1", "model.layers.43.block_sparse_moe.experts.202.w1", "model.layers.43.block_sparse_moe.experts.203.w1", "model.layers.43.block_sparse_moe.experts.204.w1", "model.layers.43.block_sparse_moe.experts.205.w1", "model.layers.43.block_sparse_moe.experts.206.w1", "model.layers.43.block_sparse_moe.experts.207.w1", "model.layers.43.block_sparse_moe.experts.208.w1", "model.layers.43.block_sparse_moe.experts.209.w1", "model.layers.43.block_sparse_moe.experts.210.w1", "model.layers.43.block_sparse_moe.experts.211.w1", "model.layers.43.block_sparse_moe.experts.212.w1", "model.layers.43.block_sparse_moe.experts.213.w1", "model.layers.43.block_sparse_moe.experts.214.w1", "model.layers.43.block_sparse_moe.experts.215.w1", "model.layers.43.block_sparse_moe.experts.216.w1", "model.layers.43.block_sparse_moe.experts.217.w1", "model.layers.43.block_sparse_moe.experts.218.w1", "model.layers.43.block_sparse_moe.experts.219.w1", "model.layers.43.block_sparse_moe.experts.220.w1", "model.layers.43.block_sparse_moe.experts.221.w1", "model.layers.43.block_sparse_moe.experts.222.w1", "model.layers.43.block_sparse_moe.experts.223.w1", "model.layers.43.block_sparse_moe.experts.224.w1", "model.layers.43.block_sparse_moe.experts.225.w1", "model.layers.43.block_sparse_moe.experts.226.w1", "model.layers.43.block_sparse_moe.experts.227.w1", "model.layers.43.block_sparse_moe.experts.228.w1", "model.layers.43.block_sparse_moe.experts.229.w1", "model.layers.43.block_sparse_moe.experts.230.w1", "model.layers.43.block_sparse_moe.experts.231.w1", "model.layers.43.block_sparse_moe.experts.232.w1", "model.layers.43.block_sparse_moe.experts.233.w1", "model.layers.43.block_sparse_moe.experts.234.w1", "model.layers.43.block_sparse_moe.experts.235.w1", "model.layers.43.block_sparse_moe.experts.236.w1", "model.layers.43.block_sparse_moe.experts.237.w1", "model.layers.43.block_sparse_moe.experts.238.w1", "model.layers.43.block_sparse_moe.experts.239.w1", "model.layers.43.block_sparse_moe.experts.240.w1", "model.layers.43.block_sparse_moe.experts.241.w1", "model.layers.43.block_sparse_moe.experts.242.w1", "model.layers.43.block_sparse_moe.experts.243.w1", "model.layers.43.block_sparse_moe.experts.244.w1", "model.layers.43.block_sparse_moe.experts.245.w1", "model.layers.43.block_sparse_moe.experts.246.w1", "model.layers.43.block_sparse_moe.experts.247.w1", "model.layers.43.block_sparse_moe.experts.248.w1", "model.layers.43.block_sparse_moe.experts.249.w1", "model.layers.43.block_sparse_moe.experts.250.w1", "model.layers.43.block_sparse_moe.experts.251.w1", "model.layers.43.block_sparse_moe.experts.252.w1", "model.layers.43.block_sparse_moe.experts.253.w1", "model.layers.43.block_sparse_moe.experts.254.w1", "model.layers.43.block_sparse_moe.experts.255.w1", "model.layers.43.block_sparse_moe.experts.0.w3", "model.layers.43.block_sparse_moe.experts.1.w3", "model.layers.43.block_sparse_moe.experts.2.w3", "model.layers.43.block_sparse_moe.experts.3.w3", "model.layers.43.block_sparse_moe.experts.4.w3", "model.layers.43.block_sparse_moe.experts.5.w3", "model.layers.43.block_sparse_moe.experts.6.w3", "model.layers.43.block_sparse_moe.experts.7.w3", "model.layers.43.block_sparse_moe.experts.8.w3", "model.layers.43.block_sparse_moe.experts.9.w3", "model.layers.43.block_sparse_moe.experts.10.w3", "model.layers.43.block_sparse_moe.experts.11.w3", "model.layers.43.block_sparse_moe.experts.12.w3", "model.layers.43.block_sparse_moe.experts.13.w3", "model.layers.43.block_sparse_moe.experts.14.w3", "model.layers.43.block_sparse_moe.experts.15.w3", "model.layers.43.block_sparse_moe.experts.16.w3", "model.layers.43.block_sparse_moe.experts.17.w3", "model.layers.43.block_sparse_moe.experts.18.w3", "model.layers.43.block_sparse_moe.experts.19.w3", "model.layers.43.block_sparse_moe.experts.20.w3", "model.layers.43.block_sparse_moe.experts.21.w3", "model.layers.43.block_sparse_moe.experts.22.w3", "model.layers.43.block_sparse_moe.experts.23.w3", "model.layers.43.block_sparse_moe.experts.24.w3", "model.layers.43.block_sparse_moe.experts.25.w3", "model.layers.43.block_sparse_moe.experts.26.w3", "model.layers.43.block_sparse_moe.experts.27.w3", "model.layers.43.block_sparse_moe.experts.28.w3", "model.layers.43.block_sparse_moe.experts.29.w3", "model.layers.43.block_sparse_moe.experts.30.w3", "model.layers.43.block_sparse_moe.experts.31.w3", "model.layers.43.block_sparse_moe.experts.32.w3", "model.layers.43.block_sparse_moe.experts.33.w3", "model.layers.43.block_sparse_moe.experts.34.w3", "model.layers.43.block_sparse_moe.experts.35.w3", "model.layers.43.block_sparse_moe.experts.36.w3", "model.layers.43.block_sparse_moe.experts.37.w3", "model.layers.43.block_sparse_moe.experts.38.w3", "model.layers.43.block_sparse_moe.experts.39.w3", "model.layers.43.block_sparse_moe.experts.40.w3", "model.layers.43.block_sparse_moe.experts.41.w3", "model.layers.43.block_sparse_moe.experts.42.w3", "model.layers.43.block_sparse_moe.experts.43.w3", "model.layers.43.block_sparse_moe.experts.44.w3", "model.layers.43.block_sparse_moe.experts.45.w3", "model.layers.43.block_sparse_moe.experts.46.w3", "model.layers.43.block_sparse_moe.experts.47.w3", "model.layers.43.block_sparse_moe.experts.48.w3", "model.layers.43.block_sparse_moe.experts.49.w3", "model.layers.43.block_sparse_moe.experts.50.w3", "model.layers.43.block_sparse_moe.experts.51.w3", "model.layers.43.block_sparse_moe.experts.52.w3", "model.layers.43.block_sparse_moe.experts.53.w3", "model.layers.43.block_sparse_moe.experts.54.w3", "model.layers.43.block_sparse_moe.experts.55.w3", "model.layers.43.block_sparse_moe.experts.56.w3", "model.layers.43.block_sparse_moe.experts.57.w3", "model.layers.43.block_sparse_moe.experts.58.w3", "model.layers.43.block_sparse_moe.experts.59.w3", "model.layers.43.block_sparse_moe.experts.60.w3", "model.layers.43.block_sparse_moe.experts.61.w3", "model.layers.43.block_sparse_moe.experts.62.w3", "model.layers.43.block_sparse_moe.experts.63.w3", "model.layers.43.block_sparse_moe.experts.64.w3", "model.layers.43.block_sparse_moe.experts.65.w3", "model.layers.43.block_sparse_moe.experts.66.w3", "model.layers.43.block_sparse_moe.experts.67.w3", "model.layers.43.block_sparse_moe.experts.68.w3", "model.layers.43.block_sparse_moe.experts.69.w3", "model.layers.43.block_sparse_moe.experts.70.w3", "model.layers.43.block_sparse_moe.experts.71.w3", "model.layers.43.block_sparse_moe.experts.72.w3", "model.layers.43.block_sparse_moe.experts.73.w3", "model.layers.43.block_sparse_moe.experts.74.w3", "model.layers.43.block_sparse_moe.experts.75.w3", "model.layers.43.block_sparse_moe.experts.76.w3", "model.layers.43.block_sparse_moe.experts.77.w3", "model.layers.43.block_sparse_moe.experts.78.w3", "model.layers.43.block_sparse_moe.experts.79.w3", "model.layers.43.block_sparse_moe.experts.80.w3", "model.layers.43.block_sparse_moe.experts.81.w3", "model.layers.43.block_sparse_moe.experts.82.w3", "model.layers.43.block_sparse_moe.experts.83.w3", "model.layers.43.block_sparse_moe.experts.84.w3", "model.layers.43.block_sparse_moe.experts.85.w3", "model.layers.43.block_sparse_moe.experts.86.w3", "model.layers.43.block_sparse_moe.experts.87.w3", "model.layers.43.block_sparse_moe.experts.88.w3", "model.layers.43.block_sparse_moe.experts.89.w3", "model.layers.43.block_sparse_moe.experts.90.w3", "model.layers.43.block_sparse_moe.experts.91.w3", "model.layers.43.block_sparse_moe.experts.92.w3", "model.layers.43.block_sparse_moe.experts.93.w3", "model.layers.43.block_sparse_moe.experts.94.w3", "model.layers.43.block_sparse_moe.experts.95.w3", "model.layers.43.block_sparse_moe.experts.96.w3", "model.layers.43.block_sparse_moe.experts.97.w3", "model.layers.43.block_sparse_moe.experts.98.w3", "model.layers.43.block_sparse_moe.experts.99.w3", "model.layers.43.block_sparse_moe.experts.100.w3", "model.layers.43.block_sparse_moe.experts.101.w3", "model.layers.43.block_sparse_moe.experts.102.w3", "model.layers.43.block_sparse_moe.experts.103.w3", "model.layers.43.block_sparse_moe.experts.104.w3", "model.layers.43.block_sparse_moe.experts.105.w3", "model.layers.43.block_sparse_moe.experts.106.w3", "model.layers.43.block_sparse_moe.experts.107.w3", "model.layers.43.block_sparse_moe.experts.108.w3", "model.layers.43.block_sparse_moe.experts.109.w3", "model.layers.43.block_sparse_moe.experts.110.w3", "model.layers.43.block_sparse_moe.experts.111.w3", "model.layers.43.block_sparse_moe.experts.112.w3", "model.layers.43.block_sparse_moe.experts.113.w3", "model.layers.43.block_sparse_moe.experts.114.w3", "model.layers.43.block_sparse_moe.experts.115.w3", "model.layers.43.block_sparse_moe.experts.116.w3", "model.layers.43.block_sparse_moe.experts.117.w3", "model.layers.43.block_sparse_moe.experts.118.w3", "model.layers.43.block_sparse_moe.experts.119.w3", "model.layers.43.block_sparse_moe.experts.120.w3", "model.layers.43.block_sparse_moe.experts.121.w3", "model.layers.43.block_sparse_moe.experts.122.w3", "model.layers.43.block_sparse_moe.experts.123.w3", "model.layers.43.block_sparse_moe.experts.124.w3", "model.layers.43.block_sparse_moe.experts.125.w3", "model.layers.43.block_sparse_moe.experts.126.w3", "model.layers.43.block_sparse_moe.experts.127.w3", "model.layers.43.block_sparse_moe.experts.128.w3", "model.layers.43.block_sparse_moe.experts.129.w3", "model.layers.43.block_sparse_moe.experts.130.w3", "model.layers.43.block_sparse_moe.experts.131.w3", "model.layers.43.block_sparse_moe.experts.132.w3", "model.layers.43.block_sparse_moe.experts.133.w3", "model.layers.43.block_sparse_moe.experts.134.w3", "model.layers.43.block_sparse_moe.experts.135.w3", "model.layers.43.block_sparse_moe.experts.136.w3", "model.layers.43.block_sparse_moe.experts.137.w3", "model.layers.43.block_sparse_moe.experts.138.w3", "model.layers.43.block_sparse_moe.experts.139.w3", "model.layers.43.block_sparse_moe.experts.140.w3", "model.layers.43.block_sparse_moe.experts.141.w3", "model.layers.43.block_sparse_moe.experts.142.w3", "model.layers.43.block_sparse_moe.experts.143.w3", "model.layers.43.block_sparse_moe.experts.144.w3", "model.layers.43.block_sparse_moe.experts.145.w3", "model.layers.43.block_sparse_moe.experts.146.w3", "model.layers.43.block_sparse_moe.experts.147.w3", "model.layers.43.block_sparse_moe.experts.148.w3", "model.layers.43.block_sparse_moe.experts.149.w3", "model.layers.43.block_sparse_moe.experts.150.w3", "model.layers.43.block_sparse_moe.experts.151.w3", "model.layers.43.block_sparse_moe.experts.152.w3", "model.layers.43.block_sparse_moe.experts.153.w3", "model.layers.43.block_sparse_moe.experts.154.w3", "model.layers.43.block_sparse_moe.experts.155.w3", "model.layers.43.block_sparse_moe.experts.156.w3", "model.layers.43.block_sparse_moe.experts.157.w3", "model.layers.43.block_sparse_moe.experts.158.w3", "model.layers.43.block_sparse_moe.experts.159.w3", "model.layers.43.block_sparse_moe.experts.160.w3", "model.layers.43.block_sparse_moe.experts.161.w3", "model.layers.43.block_sparse_moe.experts.162.w3", "model.layers.43.block_sparse_moe.experts.163.w3", "model.layers.43.block_sparse_moe.experts.164.w3", "model.layers.43.block_sparse_moe.experts.165.w3", "model.layers.43.block_sparse_moe.experts.166.w3", "model.layers.43.block_sparse_moe.experts.167.w3", "model.layers.43.block_sparse_moe.experts.168.w3", "model.layers.43.block_sparse_moe.experts.169.w3", "model.layers.43.block_sparse_moe.experts.170.w3", "model.layers.43.block_sparse_moe.experts.171.w3", "model.layers.43.block_sparse_moe.experts.172.w3", "model.layers.43.block_sparse_moe.experts.173.w3", "model.layers.43.block_sparse_moe.experts.174.w3", "model.layers.43.block_sparse_moe.experts.175.w3", "model.layers.43.block_sparse_moe.experts.176.w3", "model.layers.43.block_sparse_moe.experts.177.w3", "model.layers.43.block_sparse_moe.experts.178.w3", "model.layers.43.block_sparse_moe.experts.179.w3", "model.layers.43.block_sparse_moe.experts.180.w3", "model.layers.43.block_sparse_moe.experts.181.w3", "model.layers.43.block_sparse_moe.experts.182.w3", "model.layers.43.block_sparse_moe.experts.183.w3", "model.layers.43.block_sparse_moe.experts.184.w3", "model.layers.43.block_sparse_moe.experts.185.w3", "model.layers.43.block_sparse_moe.experts.186.w3", "model.layers.43.block_sparse_moe.experts.187.w3", "model.layers.43.block_sparse_moe.experts.188.w3", "model.layers.43.block_sparse_moe.experts.189.w3", "model.layers.43.block_sparse_moe.experts.190.w3", "model.layers.43.block_sparse_moe.experts.191.w3", "model.layers.43.block_sparse_moe.experts.192.w3", "model.layers.43.block_sparse_moe.experts.193.w3", "model.layers.43.block_sparse_moe.experts.194.w3", "model.layers.43.block_sparse_moe.experts.195.w3", "model.layers.43.block_sparse_moe.experts.196.w3", "model.layers.43.block_sparse_moe.experts.197.w3", "model.layers.43.block_sparse_moe.experts.198.w3", "model.layers.43.block_sparse_moe.experts.199.w3", "model.layers.43.block_sparse_moe.experts.200.w3", "model.layers.43.block_sparse_moe.experts.201.w3", "model.layers.43.block_sparse_moe.experts.202.w3", "model.layers.43.block_sparse_moe.experts.203.w3", "model.layers.43.block_sparse_moe.experts.204.w3", "model.layers.43.block_sparse_moe.experts.205.w3", "model.layers.43.block_sparse_moe.experts.206.w3", "model.layers.43.block_sparse_moe.experts.207.w3", "model.layers.43.block_sparse_moe.experts.208.w3", "model.layers.43.block_sparse_moe.experts.209.w3", "model.layers.43.block_sparse_moe.experts.210.w3", "model.layers.43.block_sparse_moe.experts.211.w3", "model.layers.43.block_sparse_moe.experts.212.w3", "model.layers.43.block_sparse_moe.experts.213.w3", "model.layers.43.block_sparse_moe.experts.214.w3", "model.layers.43.block_sparse_moe.experts.215.w3", "model.layers.43.block_sparse_moe.experts.216.w3", "model.layers.43.block_sparse_moe.experts.217.w3", "model.layers.43.block_sparse_moe.experts.218.w3", "model.layers.43.block_sparse_moe.experts.219.w3", "model.layers.43.block_sparse_moe.experts.220.w3", "model.layers.43.block_sparse_moe.experts.221.w3", "model.layers.43.block_sparse_moe.experts.222.w3", "model.layers.43.block_sparse_moe.experts.223.w3", "model.layers.43.block_sparse_moe.experts.224.w3", "model.layers.43.block_sparse_moe.experts.225.w3", "model.layers.43.block_sparse_moe.experts.226.w3", "model.layers.43.block_sparse_moe.experts.227.w3", "model.layers.43.block_sparse_moe.experts.228.w3", "model.layers.43.block_sparse_moe.experts.229.w3", "model.layers.43.block_sparse_moe.experts.230.w3", "model.layers.43.block_sparse_moe.experts.231.w3", "model.layers.43.block_sparse_moe.experts.232.w3", "model.layers.43.block_sparse_moe.experts.233.w3", "model.layers.43.block_sparse_moe.experts.234.w3", "model.layers.43.block_sparse_moe.experts.235.w3", "model.layers.43.block_sparse_moe.experts.236.w3", "model.layers.43.block_sparse_moe.experts.237.w3", "model.layers.43.block_sparse_moe.experts.238.w3", "model.layers.43.block_sparse_moe.experts.239.w3", "model.layers.43.block_sparse_moe.experts.240.w3", "model.layers.43.block_sparse_moe.experts.241.w3", "model.layers.43.block_sparse_moe.experts.242.w3", "model.layers.43.block_sparse_moe.experts.243.w3", "model.layers.43.block_sparse_moe.experts.244.w3", "model.layers.43.block_sparse_moe.experts.245.w3", "model.layers.43.block_sparse_moe.experts.246.w3", "model.layers.43.block_sparse_moe.experts.247.w3", "model.layers.43.block_sparse_moe.experts.248.w3", "model.layers.43.block_sparse_moe.experts.249.w3", "model.layers.43.block_sparse_moe.experts.250.w3", "model.layers.43.block_sparse_moe.experts.251.w3", "model.layers.43.block_sparse_moe.experts.252.w3", "model.layers.43.block_sparse_moe.experts.253.w3", "model.layers.43.block_sparse_moe.experts.254.w3", "model.layers.43.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00017916262149814122, "dbits": 2415919104 } ] }, { "idx": 219, "layers": [ "model.layers.43.block_sparse_moe.experts.0.w2", "model.layers.43.block_sparse_moe.experts.1.w2", "model.layers.43.block_sparse_moe.experts.2.w2", "model.layers.43.block_sparse_moe.experts.3.w2", "model.layers.43.block_sparse_moe.experts.4.w2", "model.layers.43.block_sparse_moe.experts.5.w2", "model.layers.43.block_sparse_moe.experts.6.w2", "model.layers.43.block_sparse_moe.experts.7.w2", "model.layers.43.block_sparse_moe.experts.8.w2", "model.layers.43.block_sparse_moe.experts.9.w2", "model.layers.43.block_sparse_moe.experts.10.w2", "model.layers.43.block_sparse_moe.experts.11.w2", "model.layers.43.block_sparse_moe.experts.12.w2", "model.layers.43.block_sparse_moe.experts.13.w2", "model.layers.43.block_sparse_moe.experts.14.w2", "model.layers.43.block_sparse_moe.experts.15.w2", "model.layers.43.block_sparse_moe.experts.16.w2", "model.layers.43.block_sparse_moe.experts.17.w2", "model.layers.43.block_sparse_moe.experts.18.w2", "model.layers.43.block_sparse_moe.experts.19.w2", "model.layers.43.block_sparse_moe.experts.20.w2", "model.layers.43.block_sparse_moe.experts.21.w2", "model.layers.43.block_sparse_moe.experts.22.w2", "model.layers.43.block_sparse_moe.experts.23.w2", "model.layers.43.block_sparse_moe.experts.24.w2", "model.layers.43.block_sparse_moe.experts.25.w2", "model.layers.43.block_sparse_moe.experts.26.w2", "model.layers.43.block_sparse_moe.experts.27.w2", "model.layers.43.block_sparse_moe.experts.28.w2", "model.layers.43.block_sparse_moe.experts.29.w2", "model.layers.43.block_sparse_moe.experts.30.w2", "model.layers.43.block_sparse_moe.experts.31.w2", "model.layers.43.block_sparse_moe.experts.32.w2", "model.layers.43.block_sparse_moe.experts.33.w2", "model.layers.43.block_sparse_moe.experts.34.w2", "model.layers.43.block_sparse_moe.experts.35.w2", "model.layers.43.block_sparse_moe.experts.36.w2", "model.layers.43.block_sparse_moe.experts.37.w2", "model.layers.43.block_sparse_moe.experts.38.w2", "model.layers.43.block_sparse_moe.experts.39.w2", "model.layers.43.block_sparse_moe.experts.40.w2", "model.layers.43.block_sparse_moe.experts.41.w2", "model.layers.43.block_sparse_moe.experts.42.w2", "model.layers.43.block_sparse_moe.experts.43.w2", "model.layers.43.block_sparse_moe.experts.44.w2", "model.layers.43.block_sparse_moe.experts.45.w2", "model.layers.43.block_sparse_moe.experts.46.w2", "model.layers.43.block_sparse_moe.experts.47.w2", "model.layers.43.block_sparse_moe.experts.48.w2", "model.layers.43.block_sparse_moe.experts.49.w2", "model.layers.43.block_sparse_moe.experts.50.w2", "model.layers.43.block_sparse_moe.experts.51.w2", "model.layers.43.block_sparse_moe.experts.52.w2", "model.layers.43.block_sparse_moe.experts.53.w2", "model.layers.43.block_sparse_moe.experts.54.w2", "model.layers.43.block_sparse_moe.experts.55.w2", "model.layers.43.block_sparse_moe.experts.56.w2", "model.layers.43.block_sparse_moe.experts.57.w2", "model.layers.43.block_sparse_moe.experts.58.w2", "model.layers.43.block_sparse_moe.experts.59.w2", "model.layers.43.block_sparse_moe.experts.60.w2", "model.layers.43.block_sparse_moe.experts.61.w2", "model.layers.43.block_sparse_moe.experts.62.w2", "model.layers.43.block_sparse_moe.experts.63.w2", "model.layers.43.block_sparse_moe.experts.64.w2", "model.layers.43.block_sparse_moe.experts.65.w2", "model.layers.43.block_sparse_moe.experts.66.w2", "model.layers.43.block_sparse_moe.experts.67.w2", "model.layers.43.block_sparse_moe.experts.68.w2", "model.layers.43.block_sparse_moe.experts.69.w2", "model.layers.43.block_sparse_moe.experts.70.w2", "model.layers.43.block_sparse_moe.experts.71.w2", "model.layers.43.block_sparse_moe.experts.72.w2", "model.layers.43.block_sparse_moe.experts.73.w2", "model.layers.43.block_sparse_moe.experts.74.w2", "model.layers.43.block_sparse_moe.experts.75.w2", "model.layers.43.block_sparse_moe.experts.76.w2", "model.layers.43.block_sparse_moe.experts.77.w2", "model.layers.43.block_sparse_moe.experts.78.w2", "model.layers.43.block_sparse_moe.experts.79.w2", "model.layers.43.block_sparse_moe.experts.80.w2", "model.layers.43.block_sparse_moe.experts.81.w2", "model.layers.43.block_sparse_moe.experts.82.w2", "model.layers.43.block_sparse_moe.experts.83.w2", "model.layers.43.block_sparse_moe.experts.84.w2", "model.layers.43.block_sparse_moe.experts.85.w2", "model.layers.43.block_sparse_moe.experts.86.w2", "model.layers.43.block_sparse_moe.experts.87.w2", "model.layers.43.block_sparse_moe.experts.88.w2", "model.layers.43.block_sparse_moe.experts.89.w2", "model.layers.43.block_sparse_moe.experts.90.w2", "model.layers.43.block_sparse_moe.experts.91.w2", "model.layers.43.block_sparse_moe.experts.92.w2", "model.layers.43.block_sparse_moe.experts.93.w2", "model.layers.43.block_sparse_moe.experts.94.w2", "model.layers.43.block_sparse_moe.experts.95.w2", "model.layers.43.block_sparse_moe.experts.96.w2", "model.layers.43.block_sparse_moe.experts.97.w2", "model.layers.43.block_sparse_moe.experts.98.w2", "model.layers.43.block_sparse_moe.experts.99.w2", "model.layers.43.block_sparse_moe.experts.100.w2", "model.layers.43.block_sparse_moe.experts.101.w2", "model.layers.43.block_sparse_moe.experts.102.w2", "model.layers.43.block_sparse_moe.experts.103.w2", "model.layers.43.block_sparse_moe.experts.104.w2", "model.layers.43.block_sparse_moe.experts.105.w2", "model.layers.43.block_sparse_moe.experts.106.w2", "model.layers.43.block_sparse_moe.experts.107.w2", "model.layers.43.block_sparse_moe.experts.108.w2", "model.layers.43.block_sparse_moe.experts.109.w2", "model.layers.43.block_sparse_moe.experts.110.w2", "model.layers.43.block_sparse_moe.experts.111.w2", "model.layers.43.block_sparse_moe.experts.112.w2", "model.layers.43.block_sparse_moe.experts.113.w2", "model.layers.43.block_sparse_moe.experts.114.w2", "model.layers.43.block_sparse_moe.experts.115.w2", "model.layers.43.block_sparse_moe.experts.116.w2", "model.layers.43.block_sparse_moe.experts.117.w2", "model.layers.43.block_sparse_moe.experts.118.w2", "model.layers.43.block_sparse_moe.experts.119.w2", "model.layers.43.block_sparse_moe.experts.120.w2", "model.layers.43.block_sparse_moe.experts.121.w2", "model.layers.43.block_sparse_moe.experts.122.w2", "model.layers.43.block_sparse_moe.experts.123.w2", "model.layers.43.block_sparse_moe.experts.124.w2", "model.layers.43.block_sparse_moe.experts.125.w2", "model.layers.43.block_sparse_moe.experts.126.w2", "model.layers.43.block_sparse_moe.experts.127.w2", "model.layers.43.block_sparse_moe.experts.128.w2", "model.layers.43.block_sparse_moe.experts.129.w2", "model.layers.43.block_sparse_moe.experts.130.w2", "model.layers.43.block_sparse_moe.experts.131.w2", "model.layers.43.block_sparse_moe.experts.132.w2", "model.layers.43.block_sparse_moe.experts.133.w2", "model.layers.43.block_sparse_moe.experts.134.w2", "model.layers.43.block_sparse_moe.experts.135.w2", "model.layers.43.block_sparse_moe.experts.136.w2", "model.layers.43.block_sparse_moe.experts.137.w2", "model.layers.43.block_sparse_moe.experts.138.w2", "model.layers.43.block_sparse_moe.experts.139.w2", "model.layers.43.block_sparse_moe.experts.140.w2", "model.layers.43.block_sparse_moe.experts.141.w2", "model.layers.43.block_sparse_moe.experts.142.w2", "model.layers.43.block_sparse_moe.experts.143.w2", "model.layers.43.block_sparse_moe.experts.144.w2", "model.layers.43.block_sparse_moe.experts.145.w2", "model.layers.43.block_sparse_moe.experts.146.w2", "model.layers.43.block_sparse_moe.experts.147.w2", "model.layers.43.block_sparse_moe.experts.148.w2", "model.layers.43.block_sparse_moe.experts.149.w2", "model.layers.43.block_sparse_moe.experts.150.w2", "model.layers.43.block_sparse_moe.experts.151.w2", "model.layers.43.block_sparse_moe.experts.152.w2", "model.layers.43.block_sparse_moe.experts.153.w2", "model.layers.43.block_sparse_moe.experts.154.w2", "model.layers.43.block_sparse_moe.experts.155.w2", "model.layers.43.block_sparse_moe.experts.156.w2", "model.layers.43.block_sparse_moe.experts.157.w2", "model.layers.43.block_sparse_moe.experts.158.w2", "model.layers.43.block_sparse_moe.experts.159.w2", "model.layers.43.block_sparse_moe.experts.160.w2", "model.layers.43.block_sparse_moe.experts.161.w2", "model.layers.43.block_sparse_moe.experts.162.w2", "model.layers.43.block_sparse_moe.experts.163.w2", "model.layers.43.block_sparse_moe.experts.164.w2", "model.layers.43.block_sparse_moe.experts.165.w2", "model.layers.43.block_sparse_moe.experts.166.w2", "model.layers.43.block_sparse_moe.experts.167.w2", "model.layers.43.block_sparse_moe.experts.168.w2", "model.layers.43.block_sparse_moe.experts.169.w2", "model.layers.43.block_sparse_moe.experts.170.w2", "model.layers.43.block_sparse_moe.experts.171.w2", "model.layers.43.block_sparse_moe.experts.172.w2", "model.layers.43.block_sparse_moe.experts.173.w2", "model.layers.43.block_sparse_moe.experts.174.w2", "model.layers.43.block_sparse_moe.experts.175.w2", "model.layers.43.block_sparse_moe.experts.176.w2", "model.layers.43.block_sparse_moe.experts.177.w2", "model.layers.43.block_sparse_moe.experts.178.w2", "model.layers.43.block_sparse_moe.experts.179.w2", "model.layers.43.block_sparse_moe.experts.180.w2", "model.layers.43.block_sparse_moe.experts.181.w2", "model.layers.43.block_sparse_moe.experts.182.w2", "model.layers.43.block_sparse_moe.experts.183.w2", "model.layers.43.block_sparse_moe.experts.184.w2", "model.layers.43.block_sparse_moe.experts.185.w2", "model.layers.43.block_sparse_moe.experts.186.w2", "model.layers.43.block_sparse_moe.experts.187.w2", "model.layers.43.block_sparse_moe.experts.188.w2", "model.layers.43.block_sparse_moe.experts.189.w2", "model.layers.43.block_sparse_moe.experts.190.w2", "model.layers.43.block_sparse_moe.experts.191.w2", "model.layers.43.block_sparse_moe.experts.192.w2", "model.layers.43.block_sparse_moe.experts.193.w2", "model.layers.43.block_sparse_moe.experts.194.w2", "model.layers.43.block_sparse_moe.experts.195.w2", "model.layers.43.block_sparse_moe.experts.196.w2", "model.layers.43.block_sparse_moe.experts.197.w2", "model.layers.43.block_sparse_moe.experts.198.w2", "model.layers.43.block_sparse_moe.experts.199.w2", "model.layers.43.block_sparse_moe.experts.200.w2", "model.layers.43.block_sparse_moe.experts.201.w2", "model.layers.43.block_sparse_moe.experts.202.w2", "model.layers.43.block_sparse_moe.experts.203.w2", "model.layers.43.block_sparse_moe.experts.204.w2", "model.layers.43.block_sparse_moe.experts.205.w2", "model.layers.43.block_sparse_moe.experts.206.w2", "model.layers.43.block_sparse_moe.experts.207.w2", "model.layers.43.block_sparse_moe.experts.208.w2", "model.layers.43.block_sparse_moe.experts.209.w2", "model.layers.43.block_sparse_moe.experts.210.w2", "model.layers.43.block_sparse_moe.experts.211.w2", "model.layers.43.block_sparse_moe.experts.212.w2", "model.layers.43.block_sparse_moe.experts.213.w2", "model.layers.43.block_sparse_moe.experts.214.w2", "model.layers.43.block_sparse_moe.experts.215.w2", "model.layers.43.block_sparse_moe.experts.216.w2", "model.layers.43.block_sparse_moe.experts.217.w2", "model.layers.43.block_sparse_moe.experts.218.w2", "model.layers.43.block_sparse_moe.experts.219.w2", "model.layers.43.block_sparse_moe.experts.220.w2", "model.layers.43.block_sparse_moe.experts.221.w2", "model.layers.43.block_sparse_moe.experts.222.w2", "model.layers.43.block_sparse_moe.experts.223.w2", "model.layers.43.block_sparse_moe.experts.224.w2", "model.layers.43.block_sparse_moe.experts.225.w2", "model.layers.43.block_sparse_moe.experts.226.w2", "model.layers.43.block_sparse_moe.experts.227.w2", "model.layers.43.block_sparse_moe.experts.228.w2", "model.layers.43.block_sparse_moe.experts.229.w2", "model.layers.43.block_sparse_moe.experts.230.w2", "model.layers.43.block_sparse_moe.experts.231.w2", "model.layers.43.block_sparse_moe.experts.232.w2", "model.layers.43.block_sparse_moe.experts.233.w2", "model.layers.43.block_sparse_moe.experts.234.w2", "model.layers.43.block_sparse_moe.experts.235.w2", "model.layers.43.block_sparse_moe.experts.236.w2", "model.layers.43.block_sparse_moe.experts.237.w2", "model.layers.43.block_sparse_moe.experts.238.w2", "model.layers.43.block_sparse_moe.experts.239.w2", "model.layers.43.block_sparse_moe.experts.240.w2", "model.layers.43.block_sparse_moe.experts.241.w2", "model.layers.43.block_sparse_moe.experts.242.w2", "model.layers.43.block_sparse_moe.experts.243.w2", "model.layers.43.block_sparse_moe.experts.244.w2", "model.layers.43.block_sparse_moe.experts.245.w2", "model.layers.43.block_sparse_moe.experts.246.w2", "model.layers.43.block_sparse_moe.experts.247.w2", "model.layers.43.block_sparse_moe.experts.248.w2", "model.layers.43.block_sparse_moe.experts.249.w2", "model.layers.43.block_sparse_moe.experts.250.w2", "model.layers.43.block_sparse_moe.experts.251.w2", "model.layers.43.block_sparse_moe.experts.252.w2", "model.layers.43.block_sparse_moe.experts.253.w2", "model.layers.43.block_sparse_moe.experts.254.w2", "model.layers.43.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.0001803092658519967, "dbits": 1207959552 } ] }, { "idx": 220, "layers": [ "model.layers.44.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0005760233849287366, "dbits": 18874368 } ] }, { "idx": 221, "layers": [ "model.layers.44.self_attn.k_proj", "model.layers.44.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0009072210639715084, "dbits": 6291456 } ] }, { "idx": 222, "layers": [ "model.layers.44.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0021023143082857243, "dbits": 18874368 } ] }, { "idx": 223, "layers": [ "model.layers.44.block_sparse_moe.experts.0.w1", "model.layers.44.block_sparse_moe.experts.1.w1", "model.layers.44.block_sparse_moe.experts.2.w1", "model.layers.44.block_sparse_moe.experts.3.w1", "model.layers.44.block_sparse_moe.experts.4.w1", "model.layers.44.block_sparse_moe.experts.5.w1", "model.layers.44.block_sparse_moe.experts.6.w1", "model.layers.44.block_sparse_moe.experts.7.w1", "model.layers.44.block_sparse_moe.experts.8.w1", "model.layers.44.block_sparse_moe.experts.9.w1", "model.layers.44.block_sparse_moe.experts.10.w1", "model.layers.44.block_sparse_moe.experts.11.w1", "model.layers.44.block_sparse_moe.experts.12.w1", "model.layers.44.block_sparse_moe.experts.13.w1", "model.layers.44.block_sparse_moe.experts.14.w1", "model.layers.44.block_sparse_moe.experts.15.w1", "model.layers.44.block_sparse_moe.experts.16.w1", "model.layers.44.block_sparse_moe.experts.17.w1", "model.layers.44.block_sparse_moe.experts.18.w1", "model.layers.44.block_sparse_moe.experts.19.w1", "model.layers.44.block_sparse_moe.experts.20.w1", "model.layers.44.block_sparse_moe.experts.21.w1", "model.layers.44.block_sparse_moe.experts.22.w1", "model.layers.44.block_sparse_moe.experts.23.w1", "model.layers.44.block_sparse_moe.experts.24.w1", "model.layers.44.block_sparse_moe.experts.25.w1", "model.layers.44.block_sparse_moe.experts.26.w1", "model.layers.44.block_sparse_moe.experts.27.w1", "model.layers.44.block_sparse_moe.experts.28.w1", "model.layers.44.block_sparse_moe.experts.29.w1", "model.layers.44.block_sparse_moe.experts.30.w1", "model.layers.44.block_sparse_moe.experts.31.w1", "model.layers.44.block_sparse_moe.experts.32.w1", "model.layers.44.block_sparse_moe.experts.33.w1", "model.layers.44.block_sparse_moe.experts.34.w1", "model.layers.44.block_sparse_moe.experts.35.w1", "model.layers.44.block_sparse_moe.experts.36.w1", "model.layers.44.block_sparse_moe.experts.37.w1", "model.layers.44.block_sparse_moe.experts.38.w1", "model.layers.44.block_sparse_moe.experts.39.w1", "model.layers.44.block_sparse_moe.experts.40.w1", "model.layers.44.block_sparse_moe.experts.41.w1", "model.layers.44.block_sparse_moe.experts.42.w1", "model.layers.44.block_sparse_moe.experts.43.w1", "model.layers.44.block_sparse_moe.experts.44.w1", "model.layers.44.block_sparse_moe.experts.45.w1", "model.layers.44.block_sparse_moe.experts.46.w1", "model.layers.44.block_sparse_moe.experts.47.w1", "model.layers.44.block_sparse_moe.experts.48.w1", "model.layers.44.block_sparse_moe.experts.49.w1", "model.layers.44.block_sparse_moe.experts.50.w1", "model.layers.44.block_sparse_moe.experts.51.w1", "model.layers.44.block_sparse_moe.experts.52.w1", "model.layers.44.block_sparse_moe.experts.53.w1", "model.layers.44.block_sparse_moe.experts.54.w1", "model.layers.44.block_sparse_moe.experts.55.w1", "model.layers.44.block_sparse_moe.experts.56.w1", "model.layers.44.block_sparse_moe.experts.57.w1", "model.layers.44.block_sparse_moe.experts.58.w1", "model.layers.44.block_sparse_moe.experts.59.w1", "model.layers.44.block_sparse_moe.experts.60.w1", "model.layers.44.block_sparse_moe.experts.61.w1", "model.layers.44.block_sparse_moe.experts.62.w1", "model.layers.44.block_sparse_moe.experts.63.w1", "model.layers.44.block_sparse_moe.experts.64.w1", "model.layers.44.block_sparse_moe.experts.65.w1", "model.layers.44.block_sparse_moe.experts.66.w1", "model.layers.44.block_sparse_moe.experts.67.w1", "model.layers.44.block_sparse_moe.experts.68.w1", "model.layers.44.block_sparse_moe.experts.69.w1", "model.layers.44.block_sparse_moe.experts.70.w1", "model.layers.44.block_sparse_moe.experts.71.w1", "model.layers.44.block_sparse_moe.experts.72.w1", "model.layers.44.block_sparse_moe.experts.73.w1", "model.layers.44.block_sparse_moe.experts.74.w1", "model.layers.44.block_sparse_moe.experts.75.w1", "model.layers.44.block_sparse_moe.experts.76.w1", "model.layers.44.block_sparse_moe.experts.77.w1", "model.layers.44.block_sparse_moe.experts.78.w1", "model.layers.44.block_sparse_moe.experts.79.w1", "model.layers.44.block_sparse_moe.experts.80.w1", "model.layers.44.block_sparse_moe.experts.81.w1", "model.layers.44.block_sparse_moe.experts.82.w1", "model.layers.44.block_sparse_moe.experts.83.w1", "model.layers.44.block_sparse_moe.experts.84.w1", "model.layers.44.block_sparse_moe.experts.85.w1", "model.layers.44.block_sparse_moe.experts.86.w1", "model.layers.44.block_sparse_moe.experts.87.w1", "model.layers.44.block_sparse_moe.experts.88.w1", "model.layers.44.block_sparse_moe.experts.89.w1", "model.layers.44.block_sparse_moe.experts.90.w1", "model.layers.44.block_sparse_moe.experts.91.w1", "model.layers.44.block_sparse_moe.experts.92.w1", "model.layers.44.block_sparse_moe.experts.93.w1", "model.layers.44.block_sparse_moe.experts.94.w1", "model.layers.44.block_sparse_moe.experts.95.w1", "model.layers.44.block_sparse_moe.experts.96.w1", "model.layers.44.block_sparse_moe.experts.97.w1", "model.layers.44.block_sparse_moe.experts.98.w1", "model.layers.44.block_sparse_moe.experts.99.w1", "model.layers.44.block_sparse_moe.experts.100.w1", "model.layers.44.block_sparse_moe.experts.101.w1", "model.layers.44.block_sparse_moe.experts.102.w1", "model.layers.44.block_sparse_moe.experts.103.w1", "model.layers.44.block_sparse_moe.experts.104.w1", "model.layers.44.block_sparse_moe.experts.105.w1", "model.layers.44.block_sparse_moe.experts.106.w1", "model.layers.44.block_sparse_moe.experts.107.w1", "model.layers.44.block_sparse_moe.experts.108.w1", "model.layers.44.block_sparse_moe.experts.109.w1", "model.layers.44.block_sparse_moe.experts.110.w1", "model.layers.44.block_sparse_moe.experts.111.w1", "model.layers.44.block_sparse_moe.experts.112.w1", "model.layers.44.block_sparse_moe.experts.113.w1", "model.layers.44.block_sparse_moe.experts.114.w1", "model.layers.44.block_sparse_moe.experts.115.w1", "model.layers.44.block_sparse_moe.experts.116.w1", "model.layers.44.block_sparse_moe.experts.117.w1", "model.layers.44.block_sparse_moe.experts.118.w1", "model.layers.44.block_sparse_moe.experts.119.w1", "model.layers.44.block_sparse_moe.experts.120.w1", "model.layers.44.block_sparse_moe.experts.121.w1", "model.layers.44.block_sparse_moe.experts.122.w1", "model.layers.44.block_sparse_moe.experts.123.w1", "model.layers.44.block_sparse_moe.experts.124.w1", "model.layers.44.block_sparse_moe.experts.125.w1", "model.layers.44.block_sparse_moe.experts.126.w1", "model.layers.44.block_sparse_moe.experts.127.w1", "model.layers.44.block_sparse_moe.experts.128.w1", "model.layers.44.block_sparse_moe.experts.129.w1", "model.layers.44.block_sparse_moe.experts.130.w1", "model.layers.44.block_sparse_moe.experts.131.w1", "model.layers.44.block_sparse_moe.experts.132.w1", "model.layers.44.block_sparse_moe.experts.133.w1", "model.layers.44.block_sparse_moe.experts.134.w1", "model.layers.44.block_sparse_moe.experts.135.w1", "model.layers.44.block_sparse_moe.experts.136.w1", "model.layers.44.block_sparse_moe.experts.137.w1", "model.layers.44.block_sparse_moe.experts.138.w1", "model.layers.44.block_sparse_moe.experts.139.w1", "model.layers.44.block_sparse_moe.experts.140.w1", "model.layers.44.block_sparse_moe.experts.141.w1", "model.layers.44.block_sparse_moe.experts.142.w1", "model.layers.44.block_sparse_moe.experts.143.w1", "model.layers.44.block_sparse_moe.experts.144.w1", "model.layers.44.block_sparse_moe.experts.145.w1", "model.layers.44.block_sparse_moe.experts.146.w1", "model.layers.44.block_sparse_moe.experts.147.w1", "model.layers.44.block_sparse_moe.experts.148.w1", "model.layers.44.block_sparse_moe.experts.149.w1", "model.layers.44.block_sparse_moe.experts.150.w1", "model.layers.44.block_sparse_moe.experts.151.w1", "model.layers.44.block_sparse_moe.experts.152.w1", "model.layers.44.block_sparse_moe.experts.153.w1", "model.layers.44.block_sparse_moe.experts.154.w1", "model.layers.44.block_sparse_moe.experts.155.w1", "model.layers.44.block_sparse_moe.experts.156.w1", "model.layers.44.block_sparse_moe.experts.157.w1", "model.layers.44.block_sparse_moe.experts.158.w1", "model.layers.44.block_sparse_moe.experts.159.w1", "model.layers.44.block_sparse_moe.experts.160.w1", "model.layers.44.block_sparse_moe.experts.161.w1", "model.layers.44.block_sparse_moe.experts.162.w1", "model.layers.44.block_sparse_moe.experts.163.w1", "model.layers.44.block_sparse_moe.experts.164.w1", "model.layers.44.block_sparse_moe.experts.165.w1", "model.layers.44.block_sparse_moe.experts.166.w1", "model.layers.44.block_sparse_moe.experts.167.w1", "model.layers.44.block_sparse_moe.experts.168.w1", "model.layers.44.block_sparse_moe.experts.169.w1", "model.layers.44.block_sparse_moe.experts.170.w1", "model.layers.44.block_sparse_moe.experts.171.w1", "model.layers.44.block_sparse_moe.experts.172.w1", "model.layers.44.block_sparse_moe.experts.173.w1", "model.layers.44.block_sparse_moe.experts.174.w1", "model.layers.44.block_sparse_moe.experts.175.w1", "model.layers.44.block_sparse_moe.experts.176.w1", "model.layers.44.block_sparse_moe.experts.177.w1", "model.layers.44.block_sparse_moe.experts.178.w1", "model.layers.44.block_sparse_moe.experts.179.w1", "model.layers.44.block_sparse_moe.experts.180.w1", "model.layers.44.block_sparse_moe.experts.181.w1", "model.layers.44.block_sparse_moe.experts.182.w1", "model.layers.44.block_sparse_moe.experts.183.w1", "model.layers.44.block_sparse_moe.experts.184.w1", "model.layers.44.block_sparse_moe.experts.185.w1", "model.layers.44.block_sparse_moe.experts.186.w1", "model.layers.44.block_sparse_moe.experts.187.w1", "model.layers.44.block_sparse_moe.experts.188.w1", "model.layers.44.block_sparse_moe.experts.189.w1", "model.layers.44.block_sparse_moe.experts.190.w1", "model.layers.44.block_sparse_moe.experts.191.w1", "model.layers.44.block_sparse_moe.experts.192.w1", "model.layers.44.block_sparse_moe.experts.193.w1", "model.layers.44.block_sparse_moe.experts.194.w1", "model.layers.44.block_sparse_moe.experts.195.w1", "model.layers.44.block_sparse_moe.experts.196.w1", "model.layers.44.block_sparse_moe.experts.197.w1", "model.layers.44.block_sparse_moe.experts.198.w1", "model.layers.44.block_sparse_moe.experts.199.w1", "model.layers.44.block_sparse_moe.experts.200.w1", "model.layers.44.block_sparse_moe.experts.201.w1", "model.layers.44.block_sparse_moe.experts.202.w1", "model.layers.44.block_sparse_moe.experts.203.w1", "model.layers.44.block_sparse_moe.experts.204.w1", "model.layers.44.block_sparse_moe.experts.205.w1", "model.layers.44.block_sparse_moe.experts.206.w1", "model.layers.44.block_sparse_moe.experts.207.w1", "model.layers.44.block_sparse_moe.experts.208.w1", "model.layers.44.block_sparse_moe.experts.209.w1", "model.layers.44.block_sparse_moe.experts.210.w1", "model.layers.44.block_sparse_moe.experts.211.w1", "model.layers.44.block_sparse_moe.experts.212.w1", "model.layers.44.block_sparse_moe.experts.213.w1", "model.layers.44.block_sparse_moe.experts.214.w1", "model.layers.44.block_sparse_moe.experts.215.w1", "model.layers.44.block_sparse_moe.experts.216.w1", "model.layers.44.block_sparse_moe.experts.217.w1", "model.layers.44.block_sparse_moe.experts.218.w1", "model.layers.44.block_sparse_moe.experts.219.w1", "model.layers.44.block_sparse_moe.experts.220.w1", "model.layers.44.block_sparse_moe.experts.221.w1", "model.layers.44.block_sparse_moe.experts.222.w1", "model.layers.44.block_sparse_moe.experts.223.w1", "model.layers.44.block_sparse_moe.experts.224.w1", "model.layers.44.block_sparse_moe.experts.225.w1", "model.layers.44.block_sparse_moe.experts.226.w1", "model.layers.44.block_sparse_moe.experts.227.w1", "model.layers.44.block_sparse_moe.experts.228.w1", "model.layers.44.block_sparse_moe.experts.229.w1", "model.layers.44.block_sparse_moe.experts.230.w1", "model.layers.44.block_sparse_moe.experts.231.w1", "model.layers.44.block_sparse_moe.experts.232.w1", "model.layers.44.block_sparse_moe.experts.233.w1", "model.layers.44.block_sparse_moe.experts.234.w1", "model.layers.44.block_sparse_moe.experts.235.w1", "model.layers.44.block_sparse_moe.experts.236.w1", "model.layers.44.block_sparse_moe.experts.237.w1", "model.layers.44.block_sparse_moe.experts.238.w1", "model.layers.44.block_sparse_moe.experts.239.w1", "model.layers.44.block_sparse_moe.experts.240.w1", "model.layers.44.block_sparse_moe.experts.241.w1", "model.layers.44.block_sparse_moe.experts.242.w1", "model.layers.44.block_sparse_moe.experts.243.w1", "model.layers.44.block_sparse_moe.experts.244.w1", "model.layers.44.block_sparse_moe.experts.245.w1", "model.layers.44.block_sparse_moe.experts.246.w1", "model.layers.44.block_sparse_moe.experts.247.w1", "model.layers.44.block_sparse_moe.experts.248.w1", "model.layers.44.block_sparse_moe.experts.249.w1", "model.layers.44.block_sparse_moe.experts.250.w1", "model.layers.44.block_sparse_moe.experts.251.w1", "model.layers.44.block_sparse_moe.experts.252.w1", "model.layers.44.block_sparse_moe.experts.253.w1", "model.layers.44.block_sparse_moe.experts.254.w1", "model.layers.44.block_sparse_moe.experts.255.w1", "model.layers.44.block_sparse_moe.experts.0.w3", "model.layers.44.block_sparse_moe.experts.1.w3", "model.layers.44.block_sparse_moe.experts.2.w3", "model.layers.44.block_sparse_moe.experts.3.w3", "model.layers.44.block_sparse_moe.experts.4.w3", "model.layers.44.block_sparse_moe.experts.5.w3", "model.layers.44.block_sparse_moe.experts.6.w3", "model.layers.44.block_sparse_moe.experts.7.w3", "model.layers.44.block_sparse_moe.experts.8.w3", "model.layers.44.block_sparse_moe.experts.9.w3", "model.layers.44.block_sparse_moe.experts.10.w3", "model.layers.44.block_sparse_moe.experts.11.w3", "model.layers.44.block_sparse_moe.experts.12.w3", "model.layers.44.block_sparse_moe.experts.13.w3", "model.layers.44.block_sparse_moe.experts.14.w3", "model.layers.44.block_sparse_moe.experts.15.w3", "model.layers.44.block_sparse_moe.experts.16.w3", "model.layers.44.block_sparse_moe.experts.17.w3", "model.layers.44.block_sparse_moe.experts.18.w3", "model.layers.44.block_sparse_moe.experts.19.w3", "model.layers.44.block_sparse_moe.experts.20.w3", "model.layers.44.block_sparse_moe.experts.21.w3", "model.layers.44.block_sparse_moe.experts.22.w3", "model.layers.44.block_sparse_moe.experts.23.w3", "model.layers.44.block_sparse_moe.experts.24.w3", "model.layers.44.block_sparse_moe.experts.25.w3", "model.layers.44.block_sparse_moe.experts.26.w3", "model.layers.44.block_sparse_moe.experts.27.w3", "model.layers.44.block_sparse_moe.experts.28.w3", "model.layers.44.block_sparse_moe.experts.29.w3", "model.layers.44.block_sparse_moe.experts.30.w3", "model.layers.44.block_sparse_moe.experts.31.w3", "model.layers.44.block_sparse_moe.experts.32.w3", "model.layers.44.block_sparse_moe.experts.33.w3", "model.layers.44.block_sparse_moe.experts.34.w3", "model.layers.44.block_sparse_moe.experts.35.w3", "model.layers.44.block_sparse_moe.experts.36.w3", "model.layers.44.block_sparse_moe.experts.37.w3", "model.layers.44.block_sparse_moe.experts.38.w3", "model.layers.44.block_sparse_moe.experts.39.w3", "model.layers.44.block_sparse_moe.experts.40.w3", "model.layers.44.block_sparse_moe.experts.41.w3", "model.layers.44.block_sparse_moe.experts.42.w3", "model.layers.44.block_sparse_moe.experts.43.w3", "model.layers.44.block_sparse_moe.experts.44.w3", "model.layers.44.block_sparse_moe.experts.45.w3", "model.layers.44.block_sparse_moe.experts.46.w3", "model.layers.44.block_sparse_moe.experts.47.w3", "model.layers.44.block_sparse_moe.experts.48.w3", "model.layers.44.block_sparse_moe.experts.49.w3", "model.layers.44.block_sparse_moe.experts.50.w3", "model.layers.44.block_sparse_moe.experts.51.w3", "model.layers.44.block_sparse_moe.experts.52.w3", "model.layers.44.block_sparse_moe.experts.53.w3", "model.layers.44.block_sparse_moe.experts.54.w3", "model.layers.44.block_sparse_moe.experts.55.w3", "model.layers.44.block_sparse_moe.experts.56.w3", "model.layers.44.block_sparse_moe.experts.57.w3", "model.layers.44.block_sparse_moe.experts.58.w3", "model.layers.44.block_sparse_moe.experts.59.w3", "model.layers.44.block_sparse_moe.experts.60.w3", "model.layers.44.block_sparse_moe.experts.61.w3", "model.layers.44.block_sparse_moe.experts.62.w3", "model.layers.44.block_sparse_moe.experts.63.w3", "model.layers.44.block_sparse_moe.experts.64.w3", "model.layers.44.block_sparse_moe.experts.65.w3", "model.layers.44.block_sparse_moe.experts.66.w3", "model.layers.44.block_sparse_moe.experts.67.w3", "model.layers.44.block_sparse_moe.experts.68.w3", "model.layers.44.block_sparse_moe.experts.69.w3", "model.layers.44.block_sparse_moe.experts.70.w3", "model.layers.44.block_sparse_moe.experts.71.w3", "model.layers.44.block_sparse_moe.experts.72.w3", "model.layers.44.block_sparse_moe.experts.73.w3", "model.layers.44.block_sparse_moe.experts.74.w3", "model.layers.44.block_sparse_moe.experts.75.w3", "model.layers.44.block_sparse_moe.experts.76.w3", "model.layers.44.block_sparse_moe.experts.77.w3", "model.layers.44.block_sparse_moe.experts.78.w3", "model.layers.44.block_sparse_moe.experts.79.w3", "model.layers.44.block_sparse_moe.experts.80.w3", "model.layers.44.block_sparse_moe.experts.81.w3", "model.layers.44.block_sparse_moe.experts.82.w3", "model.layers.44.block_sparse_moe.experts.83.w3", "model.layers.44.block_sparse_moe.experts.84.w3", "model.layers.44.block_sparse_moe.experts.85.w3", "model.layers.44.block_sparse_moe.experts.86.w3", "model.layers.44.block_sparse_moe.experts.87.w3", "model.layers.44.block_sparse_moe.experts.88.w3", "model.layers.44.block_sparse_moe.experts.89.w3", "model.layers.44.block_sparse_moe.experts.90.w3", "model.layers.44.block_sparse_moe.experts.91.w3", "model.layers.44.block_sparse_moe.experts.92.w3", "model.layers.44.block_sparse_moe.experts.93.w3", "model.layers.44.block_sparse_moe.experts.94.w3", "model.layers.44.block_sparse_moe.experts.95.w3", "model.layers.44.block_sparse_moe.experts.96.w3", "model.layers.44.block_sparse_moe.experts.97.w3", "model.layers.44.block_sparse_moe.experts.98.w3", "model.layers.44.block_sparse_moe.experts.99.w3", "model.layers.44.block_sparse_moe.experts.100.w3", "model.layers.44.block_sparse_moe.experts.101.w3", "model.layers.44.block_sparse_moe.experts.102.w3", "model.layers.44.block_sparse_moe.experts.103.w3", "model.layers.44.block_sparse_moe.experts.104.w3", "model.layers.44.block_sparse_moe.experts.105.w3", "model.layers.44.block_sparse_moe.experts.106.w3", "model.layers.44.block_sparse_moe.experts.107.w3", "model.layers.44.block_sparse_moe.experts.108.w3", "model.layers.44.block_sparse_moe.experts.109.w3", "model.layers.44.block_sparse_moe.experts.110.w3", "model.layers.44.block_sparse_moe.experts.111.w3", "model.layers.44.block_sparse_moe.experts.112.w3", "model.layers.44.block_sparse_moe.experts.113.w3", "model.layers.44.block_sparse_moe.experts.114.w3", "model.layers.44.block_sparse_moe.experts.115.w3", "model.layers.44.block_sparse_moe.experts.116.w3", "model.layers.44.block_sparse_moe.experts.117.w3", "model.layers.44.block_sparse_moe.experts.118.w3", "model.layers.44.block_sparse_moe.experts.119.w3", "model.layers.44.block_sparse_moe.experts.120.w3", "model.layers.44.block_sparse_moe.experts.121.w3", "model.layers.44.block_sparse_moe.experts.122.w3", "model.layers.44.block_sparse_moe.experts.123.w3", "model.layers.44.block_sparse_moe.experts.124.w3", "model.layers.44.block_sparse_moe.experts.125.w3", "model.layers.44.block_sparse_moe.experts.126.w3", "model.layers.44.block_sparse_moe.experts.127.w3", "model.layers.44.block_sparse_moe.experts.128.w3", "model.layers.44.block_sparse_moe.experts.129.w3", "model.layers.44.block_sparse_moe.experts.130.w3", "model.layers.44.block_sparse_moe.experts.131.w3", "model.layers.44.block_sparse_moe.experts.132.w3", "model.layers.44.block_sparse_moe.experts.133.w3", "model.layers.44.block_sparse_moe.experts.134.w3", "model.layers.44.block_sparse_moe.experts.135.w3", "model.layers.44.block_sparse_moe.experts.136.w3", "model.layers.44.block_sparse_moe.experts.137.w3", "model.layers.44.block_sparse_moe.experts.138.w3", "model.layers.44.block_sparse_moe.experts.139.w3", "model.layers.44.block_sparse_moe.experts.140.w3", "model.layers.44.block_sparse_moe.experts.141.w3", "model.layers.44.block_sparse_moe.experts.142.w3", "model.layers.44.block_sparse_moe.experts.143.w3", "model.layers.44.block_sparse_moe.experts.144.w3", "model.layers.44.block_sparse_moe.experts.145.w3", "model.layers.44.block_sparse_moe.experts.146.w3", "model.layers.44.block_sparse_moe.experts.147.w3", "model.layers.44.block_sparse_moe.experts.148.w3", "model.layers.44.block_sparse_moe.experts.149.w3", "model.layers.44.block_sparse_moe.experts.150.w3", "model.layers.44.block_sparse_moe.experts.151.w3", "model.layers.44.block_sparse_moe.experts.152.w3", "model.layers.44.block_sparse_moe.experts.153.w3", "model.layers.44.block_sparse_moe.experts.154.w3", "model.layers.44.block_sparse_moe.experts.155.w3", "model.layers.44.block_sparse_moe.experts.156.w3", "model.layers.44.block_sparse_moe.experts.157.w3", "model.layers.44.block_sparse_moe.experts.158.w3", "model.layers.44.block_sparse_moe.experts.159.w3", "model.layers.44.block_sparse_moe.experts.160.w3", "model.layers.44.block_sparse_moe.experts.161.w3", "model.layers.44.block_sparse_moe.experts.162.w3", "model.layers.44.block_sparse_moe.experts.163.w3", "model.layers.44.block_sparse_moe.experts.164.w3", "model.layers.44.block_sparse_moe.experts.165.w3", "model.layers.44.block_sparse_moe.experts.166.w3", "model.layers.44.block_sparse_moe.experts.167.w3", "model.layers.44.block_sparse_moe.experts.168.w3", "model.layers.44.block_sparse_moe.experts.169.w3", "model.layers.44.block_sparse_moe.experts.170.w3", "model.layers.44.block_sparse_moe.experts.171.w3", "model.layers.44.block_sparse_moe.experts.172.w3", "model.layers.44.block_sparse_moe.experts.173.w3", "model.layers.44.block_sparse_moe.experts.174.w3", "model.layers.44.block_sparse_moe.experts.175.w3", "model.layers.44.block_sparse_moe.experts.176.w3", "model.layers.44.block_sparse_moe.experts.177.w3", "model.layers.44.block_sparse_moe.experts.178.w3", "model.layers.44.block_sparse_moe.experts.179.w3", "model.layers.44.block_sparse_moe.experts.180.w3", "model.layers.44.block_sparse_moe.experts.181.w3", "model.layers.44.block_sparse_moe.experts.182.w3", "model.layers.44.block_sparse_moe.experts.183.w3", "model.layers.44.block_sparse_moe.experts.184.w3", "model.layers.44.block_sparse_moe.experts.185.w3", "model.layers.44.block_sparse_moe.experts.186.w3", "model.layers.44.block_sparse_moe.experts.187.w3", "model.layers.44.block_sparse_moe.experts.188.w3", "model.layers.44.block_sparse_moe.experts.189.w3", "model.layers.44.block_sparse_moe.experts.190.w3", "model.layers.44.block_sparse_moe.experts.191.w3", "model.layers.44.block_sparse_moe.experts.192.w3", "model.layers.44.block_sparse_moe.experts.193.w3", "model.layers.44.block_sparse_moe.experts.194.w3", "model.layers.44.block_sparse_moe.experts.195.w3", "model.layers.44.block_sparse_moe.experts.196.w3", "model.layers.44.block_sparse_moe.experts.197.w3", "model.layers.44.block_sparse_moe.experts.198.w3", "model.layers.44.block_sparse_moe.experts.199.w3", "model.layers.44.block_sparse_moe.experts.200.w3", "model.layers.44.block_sparse_moe.experts.201.w3", "model.layers.44.block_sparse_moe.experts.202.w3", "model.layers.44.block_sparse_moe.experts.203.w3", "model.layers.44.block_sparse_moe.experts.204.w3", "model.layers.44.block_sparse_moe.experts.205.w3", "model.layers.44.block_sparse_moe.experts.206.w3", "model.layers.44.block_sparse_moe.experts.207.w3", "model.layers.44.block_sparse_moe.experts.208.w3", "model.layers.44.block_sparse_moe.experts.209.w3", "model.layers.44.block_sparse_moe.experts.210.w3", "model.layers.44.block_sparse_moe.experts.211.w3", "model.layers.44.block_sparse_moe.experts.212.w3", "model.layers.44.block_sparse_moe.experts.213.w3", "model.layers.44.block_sparse_moe.experts.214.w3", "model.layers.44.block_sparse_moe.experts.215.w3", "model.layers.44.block_sparse_moe.experts.216.w3", "model.layers.44.block_sparse_moe.experts.217.w3", "model.layers.44.block_sparse_moe.experts.218.w3", "model.layers.44.block_sparse_moe.experts.219.w3", "model.layers.44.block_sparse_moe.experts.220.w3", "model.layers.44.block_sparse_moe.experts.221.w3", "model.layers.44.block_sparse_moe.experts.222.w3", "model.layers.44.block_sparse_moe.experts.223.w3", "model.layers.44.block_sparse_moe.experts.224.w3", "model.layers.44.block_sparse_moe.experts.225.w3", "model.layers.44.block_sparse_moe.experts.226.w3", "model.layers.44.block_sparse_moe.experts.227.w3", "model.layers.44.block_sparse_moe.experts.228.w3", "model.layers.44.block_sparse_moe.experts.229.w3", "model.layers.44.block_sparse_moe.experts.230.w3", "model.layers.44.block_sparse_moe.experts.231.w3", "model.layers.44.block_sparse_moe.experts.232.w3", "model.layers.44.block_sparse_moe.experts.233.w3", "model.layers.44.block_sparse_moe.experts.234.w3", "model.layers.44.block_sparse_moe.experts.235.w3", "model.layers.44.block_sparse_moe.experts.236.w3", "model.layers.44.block_sparse_moe.experts.237.w3", "model.layers.44.block_sparse_moe.experts.238.w3", "model.layers.44.block_sparse_moe.experts.239.w3", "model.layers.44.block_sparse_moe.experts.240.w3", "model.layers.44.block_sparse_moe.experts.241.w3", "model.layers.44.block_sparse_moe.experts.242.w3", "model.layers.44.block_sparse_moe.experts.243.w3", "model.layers.44.block_sparse_moe.experts.244.w3", "model.layers.44.block_sparse_moe.experts.245.w3", "model.layers.44.block_sparse_moe.experts.246.w3", "model.layers.44.block_sparse_moe.experts.247.w3", "model.layers.44.block_sparse_moe.experts.248.w3", "model.layers.44.block_sparse_moe.experts.249.w3", "model.layers.44.block_sparse_moe.experts.250.w3", "model.layers.44.block_sparse_moe.experts.251.w3", "model.layers.44.block_sparse_moe.experts.252.w3", "model.layers.44.block_sparse_moe.experts.253.w3", "model.layers.44.block_sparse_moe.experts.254.w3", "model.layers.44.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0002046007663011884, "dbits": 2415919104 } ] }, { "idx": 224, "layers": [ "model.layers.44.block_sparse_moe.experts.0.w2", "model.layers.44.block_sparse_moe.experts.1.w2", "model.layers.44.block_sparse_moe.experts.2.w2", "model.layers.44.block_sparse_moe.experts.3.w2", "model.layers.44.block_sparse_moe.experts.4.w2", "model.layers.44.block_sparse_moe.experts.5.w2", "model.layers.44.block_sparse_moe.experts.6.w2", "model.layers.44.block_sparse_moe.experts.7.w2", "model.layers.44.block_sparse_moe.experts.8.w2", "model.layers.44.block_sparse_moe.experts.9.w2", "model.layers.44.block_sparse_moe.experts.10.w2", "model.layers.44.block_sparse_moe.experts.11.w2", "model.layers.44.block_sparse_moe.experts.12.w2", "model.layers.44.block_sparse_moe.experts.13.w2", "model.layers.44.block_sparse_moe.experts.14.w2", "model.layers.44.block_sparse_moe.experts.15.w2", "model.layers.44.block_sparse_moe.experts.16.w2", "model.layers.44.block_sparse_moe.experts.17.w2", "model.layers.44.block_sparse_moe.experts.18.w2", "model.layers.44.block_sparse_moe.experts.19.w2", "model.layers.44.block_sparse_moe.experts.20.w2", "model.layers.44.block_sparse_moe.experts.21.w2", "model.layers.44.block_sparse_moe.experts.22.w2", "model.layers.44.block_sparse_moe.experts.23.w2", "model.layers.44.block_sparse_moe.experts.24.w2", "model.layers.44.block_sparse_moe.experts.25.w2", "model.layers.44.block_sparse_moe.experts.26.w2", "model.layers.44.block_sparse_moe.experts.27.w2", "model.layers.44.block_sparse_moe.experts.28.w2", "model.layers.44.block_sparse_moe.experts.29.w2", "model.layers.44.block_sparse_moe.experts.30.w2", "model.layers.44.block_sparse_moe.experts.31.w2", "model.layers.44.block_sparse_moe.experts.32.w2", "model.layers.44.block_sparse_moe.experts.33.w2", "model.layers.44.block_sparse_moe.experts.34.w2", "model.layers.44.block_sparse_moe.experts.35.w2", "model.layers.44.block_sparse_moe.experts.36.w2", "model.layers.44.block_sparse_moe.experts.37.w2", "model.layers.44.block_sparse_moe.experts.38.w2", "model.layers.44.block_sparse_moe.experts.39.w2", "model.layers.44.block_sparse_moe.experts.40.w2", "model.layers.44.block_sparse_moe.experts.41.w2", "model.layers.44.block_sparse_moe.experts.42.w2", "model.layers.44.block_sparse_moe.experts.43.w2", "model.layers.44.block_sparse_moe.experts.44.w2", "model.layers.44.block_sparse_moe.experts.45.w2", "model.layers.44.block_sparse_moe.experts.46.w2", "model.layers.44.block_sparse_moe.experts.47.w2", "model.layers.44.block_sparse_moe.experts.48.w2", "model.layers.44.block_sparse_moe.experts.49.w2", "model.layers.44.block_sparse_moe.experts.50.w2", "model.layers.44.block_sparse_moe.experts.51.w2", "model.layers.44.block_sparse_moe.experts.52.w2", "model.layers.44.block_sparse_moe.experts.53.w2", "model.layers.44.block_sparse_moe.experts.54.w2", "model.layers.44.block_sparse_moe.experts.55.w2", "model.layers.44.block_sparse_moe.experts.56.w2", "model.layers.44.block_sparse_moe.experts.57.w2", "model.layers.44.block_sparse_moe.experts.58.w2", "model.layers.44.block_sparse_moe.experts.59.w2", "model.layers.44.block_sparse_moe.experts.60.w2", "model.layers.44.block_sparse_moe.experts.61.w2", "model.layers.44.block_sparse_moe.experts.62.w2", "model.layers.44.block_sparse_moe.experts.63.w2", "model.layers.44.block_sparse_moe.experts.64.w2", "model.layers.44.block_sparse_moe.experts.65.w2", "model.layers.44.block_sparse_moe.experts.66.w2", "model.layers.44.block_sparse_moe.experts.67.w2", "model.layers.44.block_sparse_moe.experts.68.w2", "model.layers.44.block_sparse_moe.experts.69.w2", "model.layers.44.block_sparse_moe.experts.70.w2", "model.layers.44.block_sparse_moe.experts.71.w2", "model.layers.44.block_sparse_moe.experts.72.w2", "model.layers.44.block_sparse_moe.experts.73.w2", "model.layers.44.block_sparse_moe.experts.74.w2", "model.layers.44.block_sparse_moe.experts.75.w2", "model.layers.44.block_sparse_moe.experts.76.w2", "model.layers.44.block_sparse_moe.experts.77.w2", "model.layers.44.block_sparse_moe.experts.78.w2", "model.layers.44.block_sparse_moe.experts.79.w2", "model.layers.44.block_sparse_moe.experts.80.w2", "model.layers.44.block_sparse_moe.experts.81.w2", "model.layers.44.block_sparse_moe.experts.82.w2", "model.layers.44.block_sparse_moe.experts.83.w2", "model.layers.44.block_sparse_moe.experts.84.w2", "model.layers.44.block_sparse_moe.experts.85.w2", "model.layers.44.block_sparse_moe.experts.86.w2", "model.layers.44.block_sparse_moe.experts.87.w2", "model.layers.44.block_sparse_moe.experts.88.w2", "model.layers.44.block_sparse_moe.experts.89.w2", "model.layers.44.block_sparse_moe.experts.90.w2", "model.layers.44.block_sparse_moe.experts.91.w2", "model.layers.44.block_sparse_moe.experts.92.w2", "model.layers.44.block_sparse_moe.experts.93.w2", "model.layers.44.block_sparse_moe.experts.94.w2", "model.layers.44.block_sparse_moe.experts.95.w2", "model.layers.44.block_sparse_moe.experts.96.w2", "model.layers.44.block_sparse_moe.experts.97.w2", "model.layers.44.block_sparse_moe.experts.98.w2", "model.layers.44.block_sparse_moe.experts.99.w2", "model.layers.44.block_sparse_moe.experts.100.w2", "model.layers.44.block_sparse_moe.experts.101.w2", "model.layers.44.block_sparse_moe.experts.102.w2", "model.layers.44.block_sparse_moe.experts.103.w2", "model.layers.44.block_sparse_moe.experts.104.w2", "model.layers.44.block_sparse_moe.experts.105.w2", "model.layers.44.block_sparse_moe.experts.106.w2", "model.layers.44.block_sparse_moe.experts.107.w2", "model.layers.44.block_sparse_moe.experts.108.w2", "model.layers.44.block_sparse_moe.experts.109.w2", "model.layers.44.block_sparse_moe.experts.110.w2", "model.layers.44.block_sparse_moe.experts.111.w2", "model.layers.44.block_sparse_moe.experts.112.w2", "model.layers.44.block_sparse_moe.experts.113.w2", "model.layers.44.block_sparse_moe.experts.114.w2", "model.layers.44.block_sparse_moe.experts.115.w2", "model.layers.44.block_sparse_moe.experts.116.w2", "model.layers.44.block_sparse_moe.experts.117.w2", "model.layers.44.block_sparse_moe.experts.118.w2", "model.layers.44.block_sparse_moe.experts.119.w2", "model.layers.44.block_sparse_moe.experts.120.w2", "model.layers.44.block_sparse_moe.experts.121.w2", "model.layers.44.block_sparse_moe.experts.122.w2", "model.layers.44.block_sparse_moe.experts.123.w2", "model.layers.44.block_sparse_moe.experts.124.w2", "model.layers.44.block_sparse_moe.experts.125.w2", "model.layers.44.block_sparse_moe.experts.126.w2", "model.layers.44.block_sparse_moe.experts.127.w2", "model.layers.44.block_sparse_moe.experts.128.w2", "model.layers.44.block_sparse_moe.experts.129.w2", "model.layers.44.block_sparse_moe.experts.130.w2", "model.layers.44.block_sparse_moe.experts.131.w2", "model.layers.44.block_sparse_moe.experts.132.w2", "model.layers.44.block_sparse_moe.experts.133.w2", "model.layers.44.block_sparse_moe.experts.134.w2", "model.layers.44.block_sparse_moe.experts.135.w2", "model.layers.44.block_sparse_moe.experts.136.w2", "model.layers.44.block_sparse_moe.experts.137.w2", "model.layers.44.block_sparse_moe.experts.138.w2", "model.layers.44.block_sparse_moe.experts.139.w2", "model.layers.44.block_sparse_moe.experts.140.w2", "model.layers.44.block_sparse_moe.experts.141.w2", "model.layers.44.block_sparse_moe.experts.142.w2", "model.layers.44.block_sparse_moe.experts.143.w2", "model.layers.44.block_sparse_moe.experts.144.w2", "model.layers.44.block_sparse_moe.experts.145.w2", "model.layers.44.block_sparse_moe.experts.146.w2", "model.layers.44.block_sparse_moe.experts.147.w2", "model.layers.44.block_sparse_moe.experts.148.w2", "model.layers.44.block_sparse_moe.experts.149.w2", "model.layers.44.block_sparse_moe.experts.150.w2", "model.layers.44.block_sparse_moe.experts.151.w2", "model.layers.44.block_sparse_moe.experts.152.w2", "model.layers.44.block_sparse_moe.experts.153.w2", "model.layers.44.block_sparse_moe.experts.154.w2", "model.layers.44.block_sparse_moe.experts.155.w2", "model.layers.44.block_sparse_moe.experts.156.w2", "model.layers.44.block_sparse_moe.experts.157.w2", "model.layers.44.block_sparse_moe.experts.158.w2", "model.layers.44.block_sparse_moe.experts.159.w2", "model.layers.44.block_sparse_moe.experts.160.w2", "model.layers.44.block_sparse_moe.experts.161.w2", "model.layers.44.block_sparse_moe.experts.162.w2", "model.layers.44.block_sparse_moe.experts.163.w2", "model.layers.44.block_sparse_moe.experts.164.w2", "model.layers.44.block_sparse_moe.experts.165.w2", "model.layers.44.block_sparse_moe.experts.166.w2", "model.layers.44.block_sparse_moe.experts.167.w2", "model.layers.44.block_sparse_moe.experts.168.w2", "model.layers.44.block_sparse_moe.experts.169.w2", "model.layers.44.block_sparse_moe.experts.170.w2", "model.layers.44.block_sparse_moe.experts.171.w2", "model.layers.44.block_sparse_moe.experts.172.w2", "model.layers.44.block_sparse_moe.experts.173.w2", "model.layers.44.block_sparse_moe.experts.174.w2", "model.layers.44.block_sparse_moe.experts.175.w2", "model.layers.44.block_sparse_moe.experts.176.w2", "model.layers.44.block_sparse_moe.experts.177.w2", "model.layers.44.block_sparse_moe.experts.178.w2", "model.layers.44.block_sparse_moe.experts.179.w2", "model.layers.44.block_sparse_moe.experts.180.w2", "model.layers.44.block_sparse_moe.experts.181.w2", "model.layers.44.block_sparse_moe.experts.182.w2", "model.layers.44.block_sparse_moe.experts.183.w2", "model.layers.44.block_sparse_moe.experts.184.w2", "model.layers.44.block_sparse_moe.experts.185.w2", "model.layers.44.block_sparse_moe.experts.186.w2", "model.layers.44.block_sparse_moe.experts.187.w2", "model.layers.44.block_sparse_moe.experts.188.w2", "model.layers.44.block_sparse_moe.experts.189.w2", "model.layers.44.block_sparse_moe.experts.190.w2", "model.layers.44.block_sparse_moe.experts.191.w2", "model.layers.44.block_sparse_moe.experts.192.w2", "model.layers.44.block_sparse_moe.experts.193.w2", "model.layers.44.block_sparse_moe.experts.194.w2", "model.layers.44.block_sparse_moe.experts.195.w2", "model.layers.44.block_sparse_moe.experts.196.w2", "model.layers.44.block_sparse_moe.experts.197.w2", "model.layers.44.block_sparse_moe.experts.198.w2", "model.layers.44.block_sparse_moe.experts.199.w2", "model.layers.44.block_sparse_moe.experts.200.w2", "model.layers.44.block_sparse_moe.experts.201.w2", "model.layers.44.block_sparse_moe.experts.202.w2", "model.layers.44.block_sparse_moe.experts.203.w2", "model.layers.44.block_sparse_moe.experts.204.w2", "model.layers.44.block_sparse_moe.experts.205.w2", "model.layers.44.block_sparse_moe.experts.206.w2", "model.layers.44.block_sparse_moe.experts.207.w2", "model.layers.44.block_sparse_moe.experts.208.w2", "model.layers.44.block_sparse_moe.experts.209.w2", "model.layers.44.block_sparse_moe.experts.210.w2", "model.layers.44.block_sparse_moe.experts.211.w2", "model.layers.44.block_sparse_moe.experts.212.w2", "model.layers.44.block_sparse_moe.experts.213.w2", "model.layers.44.block_sparse_moe.experts.214.w2", "model.layers.44.block_sparse_moe.experts.215.w2", "model.layers.44.block_sparse_moe.experts.216.w2", "model.layers.44.block_sparse_moe.experts.217.w2", "model.layers.44.block_sparse_moe.experts.218.w2", "model.layers.44.block_sparse_moe.experts.219.w2", "model.layers.44.block_sparse_moe.experts.220.w2", "model.layers.44.block_sparse_moe.experts.221.w2", "model.layers.44.block_sparse_moe.experts.222.w2", "model.layers.44.block_sparse_moe.experts.223.w2", "model.layers.44.block_sparse_moe.experts.224.w2", "model.layers.44.block_sparse_moe.experts.225.w2", "model.layers.44.block_sparse_moe.experts.226.w2", "model.layers.44.block_sparse_moe.experts.227.w2", "model.layers.44.block_sparse_moe.experts.228.w2", "model.layers.44.block_sparse_moe.experts.229.w2", "model.layers.44.block_sparse_moe.experts.230.w2", "model.layers.44.block_sparse_moe.experts.231.w2", "model.layers.44.block_sparse_moe.experts.232.w2", "model.layers.44.block_sparse_moe.experts.233.w2", "model.layers.44.block_sparse_moe.experts.234.w2", "model.layers.44.block_sparse_moe.experts.235.w2", "model.layers.44.block_sparse_moe.experts.236.w2", "model.layers.44.block_sparse_moe.experts.237.w2", "model.layers.44.block_sparse_moe.experts.238.w2", "model.layers.44.block_sparse_moe.experts.239.w2", "model.layers.44.block_sparse_moe.experts.240.w2", "model.layers.44.block_sparse_moe.experts.241.w2", "model.layers.44.block_sparse_moe.experts.242.w2", "model.layers.44.block_sparse_moe.experts.243.w2", "model.layers.44.block_sparse_moe.experts.244.w2", "model.layers.44.block_sparse_moe.experts.245.w2", "model.layers.44.block_sparse_moe.experts.246.w2", "model.layers.44.block_sparse_moe.experts.247.w2", "model.layers.44.block_sparse_moe.experts.248.w2", "model.layers.44.block_sparse_moe.experts.249.w2", "model.layers.44.block_sparse_moe.experts.250.w2", "model.layers.44.block_sparse_moe.experts.251.w2", "model.layers.44.block_sparse_moe.experts.252.w2", "model.layers.44.block_sparse_moe.experts.253.w2", "model.layers.44.block_sparse_moe.experts.254.w2", "model.layers.44.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -9.273067116738476e-05, "dbits": 1207959552 } ] }, { "idx": 225, "layers": [ "model.layers.45.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0004917282611131557, "dbits": 18874368 } ] }, { "idx": 226, "layers": [ "model.layers.45.self_attn.k_proj", "model.layers.45.self_attn.v_proj" ], "candidates": [ { "dkld": 0.003277195617556572, "dbits": 6291456 } ] }, { "idx": 227, "layers": [ "model.layers.45.self_attn.o_proj" ], "candidates": [ { "dkld": -0.002494661882519733, "dbits": 18874368 } ] }, { "idx": 228, "layers": [ "model.layers.45.block_sparse_moe.experts.0.w1", "model.layers.45.block_sparse_moe.experts.1.w1", "model.layers.45.block_sparse_moe.experts.2.w1", "model.layers.45.block_sparse_moe.experts.3.w1", "model.layers.45.block_sparse_moe.experts.4.w1", "model.layers.45.block_sparse_moe.experts.5.w1", "model.layers.45.block_sparse_moe.experts.6.w1", "model.layers.45.block_sparse_moe.experts.7.w1", "model.layers.45.block_sparse_moe.experts.8.w1", "model.layers.45.block_sparse_moe.experts.9.w1", "model.layers.45.block_sparse_moe.experts.10.w1", "model.layers.45.block_sparse_moe.experts.11.w1", "model.layers.45.block_sparse_moe.experts.12.w1", "model.layers.45.block_sparse_moe.experts.13.w1", "model.layers.45.block_sparse_moe.experts.14.w1", "model.layers.45.block_sparse_moe.experts.15.w1", "model.layers.45.block_sparse_moe.experts.16.w1", "model.layers.45.block_sparse_moe.experts.17.w1", "model.layers.45.block_sparse_moe.experts.18.w1", "model.layers.45.block_sparse_moe.experts.19.w1", "model.layers.45.block_sparse_moe.experts.20.w1", "model.layers.45.block_sparse_moe.experts.21.w1", "model.layers.45.block_sparse_moe.experts.22.w1", "model.layers.45.block_sparse_moe.experts.23.w1", "model.layers.45.block_sparse_moe.experts.24.w1", "model.layers.45.block_sparse_moe.experts.25.w1", "model.layers.45.block_sparse_moe.experts.26.w1", "model.layers.45.block_sparse_moe.experts.27.w1", "model.layers.45.block_sparse_moe.experts.28.w1", "model.layers.45.block_sparse_moe.experts.29.w1", "model.layers.45.block_sparse_moe.experts.30.w1", "model.layers.45.block_sparse_moe.experts.31.w1", "model.layers.45.block_sparse_moe.experts.32.w1", "model.layers.45.block_sparse_moe.experts.33.w1", "model.layers.45.block_sparse_moe.experts.34.w1", "model.layers.45.block_sparse_moe.experts.35.w1", "model.layers.45.block_sparse_moe.experts.36.w1", "model.layers.45.block_sparse_moe.experts.37.w1", "model.layers.45.block_sparse_moe.experts.38.w1", "model.layers.45.block_sparse_moe.experts.39.w1", "model.layers.45.block_sparse_moe.experts.40.w1", "model.layers.45.block_sparse_moe.experts.41.w1", "model.layers.45.block_sparse_moe.experts.42.w1", "model.layers.45.block_sparse_moe.experts.43.w1", "model.layers.45.block_sparse_moe.experts.44.w1", "model.layers.45.block_sparse_moe.experts.45.w1", "model.layers.45.block_sparse_moe.experts.46.w1", "model.layers.45.block_sparse_moe.experts.47.w1", "model.layers.45.block_sparse_moe.experts.48.w1", "model.layers.45.block_sparse_moe.experts.49.w1", "model.layers.45.block_sparse_moe.experts.50.w1", "model.layers.45.block_sparse_moe.experts.51.w1", "model.layers.45.block_sparse_moe.experts.52.w1", "model.layers.45.block_sparse_moe.experts.53.w1", "model.layers.45.block_sparse_moe.experts.54.w1", "model.layers.45.block_sparse_moe.experts.55.w1", "model.layers.45.block_sparse_moe.experts.56.w1", "model.layers.45.block_sparse_moe.experts.57.w1", "model.layers.45.block_sparse_moe.experts.58.w1", "model.layers.45.block_sparse_moe.experts.59.w1", "model.layers.45.block_sparse_moe.experts.60.w1", "model.layers.45.block_sparse_moe.experts.61.w1", "model.layers.45.block_sparse_moe.experts.62.w1", "model.layers.45.block_sparse_moe.experts.63.w1", "model.layers.45.block_sparse_moe.experts.64.w1", "model.layers.45.block_sparse_moe.experts.65.w1", "model.layers.45.block_sparse_moe.experts.66.w1", "model.layers.45.block_sparse_moe.experts.67.w1", "model.layers.45.block_sparse_moe.experts.68.w1", "model.layers.45.block_sparse_moe.experts.69.w1", "model.layers.45.block_sparse_moe.experts.70.w1", "model.layers.45.block_sparse_moe.experts.71.w1", "model.layers.45.block_sparse_moe.experts.72.w1", "model.layers.45.block_sparse_moe.experts.73.w1", "model.layers.45.block_sparse_moe.experts.74.w1", "model.layers.45.block_sparse_moe.experts.75.w1", "model.layers.45.block_sparse_moe.experts.76.w1", "model.layers.45.block_sparse_moe.experts.77.w1", "model.layers.45.block_sparse_moe.experts.78.w1", "model.layers.45.block_sparse_moe.experts.79.w1", "model.layers.45.block_sparse_moe.experts.80.w1", "model.layers.45.block_sparse_moe.experts.81.w1", "model.layers.45.block_sparse_moe.experts.82.w1", "model.layers.45.block_sparse_moe.experts.83.w1", "model.layers.45.block_sparse_moe.experts.84.w1", "model.layers.45.block_sparse_moe.experts.85.w1", "model.layers.45.block_sparse_moe.experts.86.w1", "model.layers.45.block_sparse_moe.experts.87.w1", "model.layers.45.block_sparse_moe.experts.88.w1", "model.layers.45.block_sparse_moe.experts.89.w1", "model.layers.45.block_sparse_moe.experts.90.w1", "model.layers.45.block_sparse_moe.experts.91.w1", "model.layers.45.block_sparse_moe.experts.92.w1", "model.layers.45.block_sparse_moe.experts.93.w1", "model.layers.45.block_sparse_moe.experts.94.w1", "model.layers.45.block_sparse_moe.experts.95.w1", "model.layers.45.block_sparse_moe.experts.96.w1", "model.layers.45.block_sparse_moe.experts.97.w1", "model.layers.45.block_sparse_moe.experts.98.w1", "model.layers.45.block_sparse_moe.experts.99.w1", "model.layers.45.block_sparse_moe.experts.100.w1", "model.layers.45.block_sparse_moe.experts.101.w1", "model.layers.45.block_sparse_moe.experts.102.w1", "model.layers.45.block_sparse_moe.experts.103.w1", "model.layers.45.block_sparse_moe.experts.104.w1", "model.layers.45.block_sparse_moe.experts.105.w1", "model.layers.45.block_sparse_moe.experts.106.w1", "model.layers.45.block_sparse_moe.experts.107.w1", "model.layers.45.block_sparse_moe.experts.108.w1", "model.layers.45.block_sparse_moe.experts.109.w1", "model.layers.45.block_sparse_moe.experts.110.w1", "model.layers.45.block_sparse_moe.experts.111.w1", "model.layers.45.block_sparse_moe.experts.112.w1", "model.layers.45.block_sparse_moe.experts.113.w1", "model.layers.45.block_sparse_moe.experts.114.w1", "model.layers.45.block_sparse_moe.experts.115.w1", "model.layers.45.block_sparse_moe.experts.116.w1", "model.layers.45.block_sparse_moe.experts.117.w1", "model.layers.45.block_sparse_moe.experts.118.w1", "model.layers.45.block_sparse_moe.experts.119.w1", "model.layers.45.block_sparse_moe.experts.120.w1", "model.layers.45.block_sparse_moe.experts.121.w1", "model.layers.45.block_sparse_moe.experts.122.w1", "model.layers.45.block_sparse_moe.experts.123.w1", "model.layers.45.block_sparse_moe.experts.124.w1", "model.layers.45.block_sparse_moe.experts.125.w1", "model.layers.45.block_sparse_moe.experts.126.w1", "model.layers.45.block_sparse_moe.experts.127.w1", "model.layers.45.block_sparse_moe.experts.128.w1", "model.layers.45.block_sparse_moe.experts.129.w1", "model.layers.45.block_sparse_moe.experts.130.w1", "model.layers.45.block_sparse_moe.experts.131.w1", "model.layers.45.block_sparse_moe.experts.132.w1", "model.layers.45.block_sparse_moe.experts.133.w1", "model.layers.45.block_sparse_moe.experts.134.w1", "model.layers.45.block_sparse_moe.experts.135.w1", "model.layers.45.block_sparse_moe.experts.136.w1", "model.layers.45.block_sparse_moe.experts.137.w1", "model.layers.45.block_sparse_moe.experts.138.w1", "model.layers.45.block_sparse_moe.experts.139.w1", "model.layers.45.block_sparse_moe.experts.140.w1", "model.layers.45.block_sparse_moe.experts.141.w1", "model.layers.45.block_sparse_moe.experts.142.w1", "model.layers.45.block_sparse_moe.experts.143.w1", "model.layers.45.block_sparse_moe.experts.144.w1", "model.layers.45.block_sparse_moe.experts.145.w1", "model.layers.45.block_sparse_moe.experts.146.w1", "model.layers.45.block_sparse_moe.experts.147.w1", "model.layers.45.block_sparse_moe.experts.148.w1", "model.layers.45.block_sparse_moe.experts.149.w1", "model.layers.45.block_sparse_moe.experts.150.w1", "model.layers.45.block_sparse_moe.experts.151.w1", "model.layers.45.block_sparse_moe.experts.152.w1", "model.layers.45.block_sparse_moe.experts.153.w1", "model.layers.45.block_sparse_moe.experts.154.w1", "model.layers.45.block_sparse_moe.experts.155.w1", "model.layers.45.block_sparse_moe.experts.156.w1", "model.layers.45.block_sparse_moe.experts.157.w1", "model.layers.45.block_sparse_moe.experts.158.w1", "model.layers.45.block_sparse_moe.experts.159.w1", "model.layers.45.block_sparse_moe.experts.160.w1", "model.layers.45.block_sparse_moe.experts.161.w1", "model.layers.45.block_sparse_moe.experts.162.w1", "model.layers.45.block_sparse_moe.experts.163.w1", "model.layers.45.block_sparse_moe.experts.164.w1", "model.layers.45.block_sparse_moe.experts.165.w1", "model.layers.45.block_sparse_moe.experts.166.w1", "model.layers.45.block_sparse_moe.experts.167.w1", "model.layers.45.block_sparse_moe.experts.168.w1", "model.layers.45.block_sparse_moe.experts.169.w1", "model.layers.45.block_sparse_moe.experts.170.w1", "model.layers.45.block_sparse_moe.experts.171.w1", "model.layers.45.block_sparse_moe.experts.172.w1", "model.layers.45.block_sparse_moe.experts.173.w1", "model.layers.45.block_sparse_moe.experts.174.w1", "model.layers.45.block_sparse_moe.experts.175.w1", "model.layers.45.block_sparse_moe.experts.176.w1", "model.layers.45.block_sparse_moe.experts.177.w1", "model.layers.45.block_sparse_moe.experts.178.w1", "model.layers.45.block_sparse_moe.experts.179.w1", "model.layers.45.block_sparse_moe.experts.180.w1", "model.layers.45.block_sparse_moe.experts.181.w1", "model.layers.45.block_sparse_moe.experts.182.w1", "model.layers.45.block_sparse_moe.experts.183.w1", "model.layers.45.block_sparse_moe.experts.184.w1", "model.layers.45.block_sparse_moe.experts.185.w1", "model.layers.45.block_sparse_moe.experts.186.w1", "model.layers.45.block_sparse_moe.experts.187.w1", "model.layers.45.block_sparse_moe.experts.188.w1", "model.layers.45.block_sparse_moe.experts.189.w1", "model.layers.45.block_sparse_moe.experts.190.w1", "model.layers.45.block_sparse_moe.experts.191.w1", "model.layers.45.block_sparse_moe.experts.192.w1", "model.layers.45.block_sparse_moe.experts.193.w1", "model.layers.45.block_sparse_moe.experts.194.w1", "model.layers.45.block_sparse_moe.experts.195.w1", "model.layers.45.block_sparse_moe.experts.196.w1", "model.layers.45.block_sparse_moe.experts.197.w1", "model.layers.45.block_sparse_moe.experts.198.w1", "model.layers.45.block_sparse_moe.experts.199.w1", "model.layers.45.block_sparse_moe.experts.200.w1", "model.layers.45.block_sparse_moe.experts.201.w1", "model.layers.45.block_sparse_moe.experts.202.w1", "model.layers.45.block_sparse_moe.experts.203.w1", "model.layers.45.block_sparse_moe.experts.204.w1", "model.layers.45.block_sparse_moe.experts.205.w1", "model.layers.45.block_sparse_moe.experts.206.w1", "model.layers.45.block_sparse_moe.experts.207.w1", "model.layers.45.block_sparse_moe.experts.208.w1", "model.layers.45.block_sparse_moe.experts.209.w1", "model.layers.45.block_sparse_moe.experts.210.w1", "model.layers.45.block_sparse_moe.experts.211.w1", "model.layers.45.block_sparse_moe.experts.212.w1", "model.layers.45.block_sparse_moe.experts.213.w1", "model.layers.45.block_sparse_moe.experts.214.w1", "model.layers.45.block_sparse_moe.experts.215.w1", "model.layers.45.block_sparse_moe.experts.216.w1", "model.layers.45.block_sparse_moe.experts.217.w1", "model.layers.45.block_sparse_moe.experts.218.w1", "model.layers.45.block_sparse_moe.experts.219.w1", "model.layers.45.block_sparse_moe.experts.220.w1", "model.layers.45.block_sparse_moe.experts.221.w1", "model.layers.45.block_sparse_moe.experts.222.w1", "model.layers.45.block_sparse_moe.experts.223.w1", "model.layers.45.block_sparse_moe.experts.224.w1", "model.layers.45.block_sparse_moe.experts.225.w1", "model.layers.45.block_sparse_moe.experts.226.w1", "model.layers.45.block_sparse_moe.experts.227.w1", "model.layers.45.block_sparse_moe.experts.228.w1", "model.layers.45.block_sparse_moe.experts.229.w1", "model.layers.45.block_sparse_moe.experts.230.w1", "model.layers.45.block_sparse_moe.experts.231.w1", "model.layers.45.block_sparse_moe.experts.232.w1", "model.layers.45.block_sparse_moe.experts.233.w1", "model.layers.45.block_sparse_moe.experts.234.w1", "model.layers.45.block_sparse_moe.experts.235.w1", "model.layers.45.block_sparse_moe.experts.236.w1", "model.layers.45.block_sparse_moe.experts.237.w1", "model.layers.45.block_sparse_moe.experts.238.w1", "model.layers.45.block_sparse_moe.experts.239.w1", "model.layers.45.block_sparse_moe.experts.240.w1", "model.layers.45.block_sparse_moe.experts.241.w1", "model.layers.45.block_sparse_moe.experts.242.w1", "model.layers.45.block_sparse_moe.experts.243.w1", "model.layers.45.block_sparse_moe.experts.244.w1", "model.layers.45.block_sparse_moe.experts.245.w1", "model.layers.45.block_sparse_moe.experts.246.w1", "model.layers.45.block_sparse_moe.experts.247.w1", "model.layers.45.block_sparse_moe.experts.248.w1", "model.layers.45.block_sparse_moe.experts.249.w1", "model.layers.45.block_sparse_moe.experts.250.w1", "model.layers.45.block_sparse_moe.experts.251.w1", "model.layers.45.block_sparse_moe.experts.252.w1", "model.layers.45.block_sparse_moe.experts.253.w1", "model.layers.45.block_sparse_moe.experts.254.w1", "model.layers.45.block_sparse_moe.experts.255.w1", "model.layers.45.block_sparse_moe.experts.0.w3", "model.layers.45.block_sparse_moe.experts.1.w3", "model.layers.45.block_sparse_moe.experts.2.w3", "model.layers.45.block_sparse_moe.experts.3.w3", "model.layers.45.block_sparse_moe.experts.4.w3", "model.layers.45.block_sparse_moe.experts.5.w3", "model.layers.45.block_sparse_moe.experts.6.w3", "model.layers.45.block_sparse_moe.experts.7.w3", "model.layers.45.block_sparse_moe.experts.8.w3", "model.layers.45.block_sparse_moe.experts.9.w3", "model.layers.45.block_sparse_moe.experts.10.w3", "model.layers.45.block_sparse_moe.experts.11.w3", "model.layers.45.block_sparse_moe.experts.12.w3", "model.layers.45.block_sparse_moe.experts.13.w3", "model.layers.45.block_sparse_moe.experts.14.w3", "model.layers.45.block_sparse_moe.experts.15.w3", "model.layers.45.block_sparse_moe.experts.16.w3", "model.layers.45.block_sparse_moe.experts.17.w3", "model.layers.45.block_sparse_moe.experts.18.w3", "model.layers.45.block_sparse_moe.experts.19.w3", "model.layers.45.block_sparse_moe.experts.20.w3", "model.layers.45.block_sparse_moe.experts.21.w3", "model.layers.45.block_sparse_moe.experts.22.w3", "model.layers.45.block_sparse_moe.experts.23.w3", "model.layers.45.block_sparse_moe.experts.24.w3", "model.layers.45.block_sparse_moe.experts.25.w3", "model.layers.45.block_sparse_moe.experts.26.w3", "model.layers.45.block_sparse_moe.experts.27.w3", "model.layers.45.block_sparse_moe.experts.28.w3", "model.layers.45.block_sparse_moe.experts.29.w3", "model.layers.45.block_sparse_moe.experts.30.w3", "model.layers.45.block_sparse_moe.experts.31.w3", "model.layers.45.block_sparse_moe.experts.32.w3", "model.layers.45.block_sparse_moe.experts.33.w3", "model.layers.45.block_sparse_moe.experts.34.w3", "model.layers.45.block_sparse_moe.experts.35.w3", "model.layers.45.block_sparse_moe.experts.36.w3", "model.layers.45.block_sparse_moe.experts.37.w3", "model.layers.45.block_sparse_moe.experts.38.w3", "model.layers.45.block_sparse_moe.experts.39.w3", "model.layers.45.block_sparse_moe.experts.40.w3", "model.layers.45.block_sparse_moe.experts.41.w3", "model.layers.45.block_sparse_moe.experts.42.w3", "model.layers.45.block_sparse_moe.experts.43.w3", "model.layers.45.block_sparse_moe.experts.44.w3", "model.layers.45.block_sparse_moe.experts.45.w3", "model.layers.45.block_sparse_moe.experts.46.w3", "model.layers.45.block_sparse_moe.experts.47.w3", "model.layers.45.block_sparse_moe.experts.48.w3", "model.layers.45.block_sparse_moe.experts.49.w3", "model.layers.45.block_sparse_moe.experts.50.w3", "model.layers.45.block_sparse_moe.experts.51.w3", "model.layers.45.block_sparse_moe.experts.52.w3", "model.layers.45.block_sparse_moe.experts.53.w3", "model.layers.45.block_sparse_moe.experts.54.w3", "model.layers.45.block_sparse_moe.experts.55.w3", "model.layers.45.block_sparse_moe.experts.56.w3", "model.layers.45.block_sparse_moe.experts.57.w3", "model.layers.45.block_sparse_moe.experts.58.w3", "model.layers.45.block_sparse_moe.experts.59.w3", "model.layers.45.block_sparse_moe.experts.60.w3", "model.layers.45.block_sparse_moe.experts.61.w3", "model.layers.45.block_sparse_moe.experts.62.w3", "model.layers.45.block_sparse_moe.experts.63.w3", "model.layers.45.block_sparse_moe.experts.64.w3", "model.layers.45.block_sparse_moe.experts.65.w3", "model.layers.45.block_sparse_moe.experts.66.w3", "model.layers.45.block_sparse_moe.experts.67.w3", "model.layers.45.block_sparse_moe.experts.68.w3", "model.layers.45.block_sparse_moe.experts.69.w3", "model.layers.45.block_sparse_moe.experts.70.w3", "model.layers.45.block_sparse_moe.experts.71.w3", "model.layers.45.block_sparse_moe.experts.72.w3", "model.layers.45.block_sparse_moe.experts.73.w3", "model.layers.45.block_sparse_moe.experts.74.w3", "model.layers.45.block_sparse_moe.experts.75.w3", "model.layers.45.block_sparse_moe.experts.76.w3", "model.layers.45.block_sparse_moe.experts.77.w3", "model.layers.45.block_sparse_moe.experts.78.w3", "model.layers.45.block_sparse_moe.experts.79.w3", "model.layers.45.block_sparse_moe.experts.80.w3", "model.layers.45.block_sparse_moe.experts.81.w3", "model.layers.45.block_sparse_moe.experts.82.w3", "model.layers.45.block_sparse_moe.experts.83.w3", "model.layers.45.block_sparse_moe.experts.84.w3", "model.layers.45.block_sparse_moe.experts.85.w3", "model.layers.45.block_sparse_moe.experts.86.w3", "model.layers.45.block_sparse_moe.experts.87.w3", "model.layers.45.block_sparse_moe.experts.88.w3", "model.layers.45.block_sparse_moe.experts.89.w3", "model.layers.45.block_sparse_moe.experts.90.w3", "model.layers.45.block_sparse_moe.experts.91.w3", "model.layers.45.block_sparse_moe.experts.92.w3", "model.layers.45.block_sparse_moe.experts.93.w3", "model.layers.45.block_sparse_moe.experts.94.w3", "model.layers.45.block_sparse_moe.experts.95.w3", "model.layers.45.block_sparse_moe.experts.96.w3", "model.layers.45.block_sparse_moe.experts.97.w3", "model.layers.45.block_sparse_moe.experts.98.w3", "model.layers.45.block_sparse_moe.experts.99.w3", "model.layers.45.block_sparse_moe.experts.100.w3", "model.layers.45.block_sparse_moe.experts.101.w3", "model.layers.45.block_sparse_moe.experts.102.w3", "model.layers.45.block_sparse_moe.experts.103.w3", "model.layers.45.block_sparse_moe.experts.104.w3", "model.layers.45.block_sparse_moe.experts.105.w3", "model.layers.45.block_sparse_moe.experts.106.w3", "model.layers.45.block_sparse_moe.experts.107.w3", "model.layers.45.block_sparse_moe.experts.108.w3", "model.layers.45.block_sparse_moe.experts.109.w3", "model.layers.45.block_sparse_moe.experts.110.w3", "model.layers.45.block_sparse_moe.experts.111.w3", "model.layers.45.block_sparse_moe.experts.112.w3", "model.layers.45.block_sparse_moe.experts.113.w3", "model.layers.45.block_sparse_moe.experts.114.w3", "model.layers.45.block_sparse_moe.experts.115.w3", "model.layers.45.block_sparse_moe.experts.116.w3", "model.layers.45.block_sparse_moe.experts.117.w3", "model.layers.45.block_sparse_moe.experts.118.w3", "model.layers.45.block_sparse_moe.experts.119.w3", "model.layers.45.block_sparse_moe.experts.120.w3", "model.layers.45.block_sparse_moe.experts.121.w3", "model.layers.45.block_sparse_moe.experts.122.w3", "model.layers.45.block_sparse_moe.experts.123.w3", "model.layers.45.block_sparse_moe.experts.124.w3", "model.layers.45.block_sparse_moe.experts.125.w3", "model.layers.45.block_sparse_moe.experts.126.w3", "model.layers.45.block_sparse_moe.experts.127.w3", "model.layers.45.block_sparse_moe.experts.128.w3", "model.layers.45.block_sparse_moe.experts.129.w3", "model.layers.45.block_sparse_moe.experts.130.w3", "model.layers.45.block_sparse_moe.experts.131.w3", "model.layers.45.block_sparse_moe.experts.132.w3", "model.layers.45.block_sparse_moe.experts.133.w3", "model.layers.45.block_sparse_moe.experts.134.w3", "model.layers.45.block_sparse_moe.experts.135.w3", "model.layers.45.block_sparse_moe.experts.136.w3", "model.layers.45.block_sparse_moe.experts.137.w3", "model.layers.45.block_sparse_moe.experts.138.w3", "model.layers.45.block_sparse_moe.experts.139.w3", "model.layers.45.block_sparse_moe.experts.140.w3", "model.layers.45.block_sparse_moe.experts.141.w3", "model.layers.45.block_sparse_moe.experts.142.w3", "model.layers.45.block_sparse_moe.experts.143.w3", "model.layers.45.block_sparse_moe.experts.144.w3", "model.layers.45.block_sparse_moe.experts.145.w3", "model.layers.45.block_sparse_moe.experts.146.w3", "model.layers.45.block_sparse_moe.experts.147.w3", "model.layers.45.block_sparse_moe.experts.148.w3", "model.layers.45.block_sparse_moe.experts.149.w3", "model.layers.45.block_sparse_moe.experts.150.w3", "model.layers.45.block_sparse_moe.experts.151.w3", "model.layers.45.block_sparse_moe.experts.152.w3", "model.layers.45.block_sparse_moe.experts.153.w3", "model.layers.45.block_sparse_moe.experts.154.w3", "model.layers.45.block_sparse_moe.experts.155.w3", "model.layers.45.block_sparse_moe.experts.156.w3", "model.layers.45.block_sparse_moe.experts.157.w3", "model.layers.45.block_sparse_moe.experts.158.w3", "model.layers.45.block_sparse_moe.experts.159.w3", "model.layers.45.block_sparse_moe.experts.160.w3", "model.layers.45.block_sparse_moe.experts.161.w3", "model.layers.45.block_sparse_moe.experts.162.w3", "model.layers.45.block_sparse_moe.experts.163.w3", "model.layers.45.block_sparse_moe.experts.164.w3", "model.layers.45.block_sparse_moe.experts.165.w3", "model.layers.45.block_sparse_moe.experts.166.w3", "model.layers.45.block_sparse_moe.experts.167.w3", "model.layers.45.block_sparse_moe.experts.168.w3", "model.layers.45.block_sparse_moe.experts.169.w3", "model.layers.45.block_sparse_moe.experts.170.w3", "model.layers.45.block_sparse_moe.experts.171.w3", "model.layers.45.block_sparse_moe.experts.172.w3", "model.layers.45.block_sparse_moe.experts.173.w3", "model.layers.45.block_sparse_moe.experts.174.w3", "model.layers.45.block_sparse_moe.experts.175.w3", "model.layers.45.block_sparse_moe.experts.176.w3", "model.layers.45.block_sparse_moe.experts.177.w3", "model.layers.45.block_sparse_moe.experts.178.w3", "model.layers.45.block_sparse_moe.experts.179.w3", "model.layers.45.block_sparse_moe.experts.180.w3", "model.layers.45.block_sparse_moe.experts.181.w3", "model.layers.45.block_sparse_moe.experts.182.w3", "model.layers.45.block_sparse_moe.experts.183.w3", "model.layers.45.block_sparse_moe.experts.184.w3", "model.layers.45.block_sparse_moe.experts.185.w3", "model.layers.45.block_sparse_moe.experts.186.w3", "model.layers.45.block_sparse_moe.experts.187.w3", "model.layers.45.block_sparse_moe.experts.188.w3", "model.layers.45.block_sparse_moe.experts.189.w3", "model.layers.45.block_sparse_moe.experts.190.w3", "model.layers.45.block_sparse_moe.experts.191.w3", "model.layers.45.block_sparse_moe.experts.192.w3", "model.layers.45.block_sparse_moe.experts.193.w3", "model.layers.45.block_sparse_moe.experts.194.w3", "model.layers.45.block_sparse_moe.experts.195.w3", "model.layers.45.block_sparse_moe.experts.196.w3", "model.layers.45.block_sparse_moe.experts.197.w3", "model.layers.45.block_sparse_moe.experts.198.w3", "model.layers.45.block_sparse_moe.experts.199.w3", "model.layers.45.block_sparse_moe.experts.200.w3", "model.layers.45.block_sparse_moe.experts.201.w3", "model.layers.45.block_sparse_moe.experts.202.w3", "model.layers.45.block_sparse_moe.experts.203.w3", "model.layers.45.block_sparse_moe.experts.204.w3", "model.layers.45.block_sparse_moe.experts.205.w3", "model.layers.45.block_sparse_moe.experts.206.w3", "model.layers.45.block_sparse_moe.experts.207.w3", "model.layers.45.block_sparse_moe.experts.208.w3", "model.layers.45.block_sparse_moe.experts.209.w3", "model.layers.45.block_sparse_moe.experts.210.w3", "model.layers.45.block_sparse_moe.experts.211.w3", "model.layers.45.block_sparse_moe.experts.212.w3", "model.layers.45.block_sparse_moe.experts.213.w3", "model.layers.45.block_sparse_moe.experts.214.w3", "model.layers.45.block_sparse_moe.experts.215.w3", "model.layers.45.block_sparse_moe.experts.216.w3", "model.layers.45.block_sparse_moe.experts.217.w3", "model.layers.45.block_sparse_moe.experts.218.w3", "model.layers.45.block_sparse_moe.experts.219.w3", "model.layers.45.block_sparse_moe.experts.220.w3", "model.layers.45.block_sparse_moe.experts.221.w3", "model.layers.45.block_sparse_moe.experts.222.w3", "model.layers.45.block_sparse_moe.experts.223.w3", "model.layers.45.block_sparse_moe.experts.224.w3", "model.layers.45.block_sparse_moe.experts.225.w3", "model.layers.45.block_sparse_moe.experts.226.w3", "model.layers.45.block_sparse_moe.experts.227.w3", "model.layers.45.block_sparse_moe.experts.228.w3", "model.layers.45.block_sparse_moe.experts.229.w3", "model.layers.45.block_sparse_moe.experts.230.w3", "model.layers.45.block_sparse_moe.experts.231.w3", "model.layers.45.block_sparse_moe.experts.232.w3", "model.layers.45.block_sparse_moe.experts.233.w3", "model.layers.45.block_sparse_moe.experts.234.w3", "model.layers.45.block_sparse_moe.experts.235.w3", "model.layers.45.block_sparse_moe.experts.236.w3", "model.layers.45.block_sparse_moe.experts.237.w3", "model.layers.45.block_sparse_moe.experts.238.w3", "model.layers.45.block_sparse_moe.experts.239.w3", "model.layers.45.block_sparse_moe.experts.240.w3", "model.layers.45.block_sparse_moe.experts.241.w3", "model.layers.45.block_sparse_moe.experts.242.w3", "model.layers.45.block_sparse_moe.experts.243.w3", "model.layers.45.block_sparse_moe.experts.244.w3", "model.layers.45.block_sparse_moe.experts.245.w3", "model.layers.45.block_sparse_moe.experts.246.w3", "model.layers.45.block_sparse_moe.experts.247.w3", "model.layers.45.block_sparse_moe.experts.248.w3", "model.layers.45.block_sparse_moe.experts.249.w3", "model.layers.45.block_sparse_moe.experts.250.w3", "model.layers.45.block_sparse_moe.experts.251.w3", "model.layers.45.block_sparse_moe.experts.252.w3", "model.layers.45.block_sparse_moe.experts.253.w3", "model.layers.45.block_sparse_moe.experts.254.w3", "model.layers.45.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.0004950135946273915, "dbits": 2415919104 } ] }, { "idx": 229, "layers": [ "model.layers.45.block_sparse_moe.experts.0.w2", "model.layers.45.block_sparse_moe.experts.1.w2", "model.layers.45.block_sparse_moe.experts.2.w2", "model.layers.45.block_sparse_moe.experts.3.w2", "model.layers.45.block_sparse_moe.experts.4.w2", "model.layers.45.block_sparse_moe.experts.5.w2", "model.layers.45.block_sparse_moe.experts.6.w2", "model.layers.45.block_sparse_moe.experts.7.w2", "model.layers.45.block_sparse_moe.experts.8.w2", "model.layers.45.block_sparse_moe.experts.9.w2", "model.layers.45.block_sparse_moe.experts.10.w2", "model.layers.45.block_sparse_moe.experts.11.w2", "model.layers.45.block_sparse_moe.experts.12.w2", "model.layers.45.block_sparse_moe.experts.13.w2", "model.layers.45.block_sparse_moe.experts.14.w2", "model.layers.45.block_sparse_moe.experts.15.w2", "model.layers.45.block_sparse_moe.experts.16.w2", "model.layers.45.block_sparse_moe.experts.17.w2", "model.layers.45.block_sparse_moe.experts.18.w2", "model.layers.45.block_sparse_moe.experts.19.w2", "model.layers.45.block_sparse_moe.experts.20.w2", "model.layers.45.block_sparse_moe.experts.21.w2", "model.layers.45.block_sparse_moe.experts.22.w2", "model.layers.45.block_sparse_moe.experts.23.w2", "model.layers.45.block_sparse_moe.experts.24.w2", "model.layers.45.block_sparse_moe.experts.25.w2", "model.layers.45.block_sparse_moe.experts.26.w2", "model.layers.45.block_sparse_moe.experts.27.w2", "model.layers.45.block_sparse_moe.experts.28.w2", "model.layers.45.block_sparse_moe.experts.29.w2", "model.layers.45.block_sparse_moe.experts.30.w2", "model.layers.45.block_sparse_moe.experts.31.w2", "model.layers.45.block_sparse_moe.experts.32.w2", "model.layers.45.block_sparse_moe.experts.33.w2", "model.layers.45.block_sparse_moe.experts.34.w2", "model.layers.45.block_sparse_moe.experts.35.w2", "model.layers.45.block_sparse_moe.experts.36.w2", "model.layers.45.block_sparse_moe.experts.37.w2", "model.layers.45.block_sparse_moe.experts.38.w2", "model.layers.45.block_sparse_moe.experts.39.w2", "model.layers.45.block_sparse_moe.experts.40.w2", "model.layers.45.block_sparse_moe.experts.41.w2", "model.layers.45.block_sparse_moe.experts.42.w2", "model.layers.45.block_sparse_moe.experts.43.w2", "model.layers.45.block_sparse_moe.experts.44.w2", "model.layers.45.block_sparse_moe.experts.45.w2", "model.layers.45.block_sparse_moe.experts.46.w2", "model.layers.45.block_sparse_moe.experts.47.w2", "model.layers.45.block_sparse_moe.experts.48.w2", "model.layers.45.block_sparse_moe.experts.49.w2", "model.layers.45.block_sparse_moe.experts.50.w2", "model.layers.45.block_sparse_moe.experts.51.w2", "model.layers.45.block_sparse_moe.experts.52.w2", "model.layers.45.block_sparse_moe.experts.53.w2", "model.layers.45.block_sparse_moe.experts.54.w2", "model.layers.45.block_sparse_moe.experts.55.w2", "model.layers.45.block_sparse_moe.experts.56.w2", "model.layers.45.block_sparse_moe.experts.57.w2", "model.layers.45.block_sparse_moe.experts.58.w2", "model.layers.45.block_sparse_moe.experts.59.w2", "model.layers.45.block_sparse_moe.experts.60.w2", "model.layers.45.block_sparse_moe.experts.61.w2", "model.layers.45.block_sparse_moe.experts.62.w2", "model.layers.45.block_sparse_moe.experts.63.w2", "model.layers.45.block_sparse_moe.experts.64.w2", "model.layers.45.block_sparse_moe.experts.65.w2", "model.layers.45.block_sparse_moe.experts.66.w2", "model.layers.45.block_sparse_moe.experts.67.w2", "model.layers.45.block_sparse_moe.experts.68.w2", "model.layers.45.block_sparse_moe.experts.69.w2", "model.layers.45.block_sparse_moe.experts.70.w2", "model.layers.45.block_sparse_moe.experts.71.w2", "model.layers.45.block_sparse_moe.experts.72.w2", "model.layers.45.block_sparse_moe.experts.73.w2", "model.layers.45.block_sparse_moe.experts.74.w2", "model.layers.45.block_sparse_moe.experts.75.w2", "model.layers.45.block_sparse_moe.experts.76.w2", "model.layers.45.block_sparse_moe.experts.77.w2", "model.layers.45.block_sparse_moe.experts.78.w2", "model.layers.45.block_sparse_moe.experts.79.w2", "model.layers.45.block_sparse_moe.experts.80.w2", "model.layers.45.block_sparse_moe.experts.81.w2", "model.layers.45.block_sparse_moe.experts.82.w2", "model.layers.45.block_sparse_moe.experts.83.w2", "model.layers.45.block_sparse_moe.experts.84.w2", "model.layers.45.block_sparse_moe.experts.85.w2", "model.layers.45.block_sparse_moe.experts.86.w2", "model.layers.45.block_sparse_moe.experts.87.w2", "model.layers.45.block_sparse_moe.experts.88.w2", "model.layers.45.block_sparse_moe.experts.89.w2", "model.layers.45.block_sparse_moe.experts.90.w2", "model.layers.45.block_sparse_moe.experts.91.w2", "model.layers.45.block_sparse_moe.experts.92.w2", "model.layers.45.block_sparse_moe.experts.93.w2", "model.layers.45.block_sparse_moe.experts.94.w2", "model.layers.45.block_sparse_moe.experts.95.w2", "model.layers.45.block_sparse_moe.experts.96.w2", "model.layers.45.block_sparse_moe.experts.97.w2", "model.layers.45.block_sparse_moe.experts.98.w2", "model.layers.45.block_sparse_moe.experts.99.w2", "model.layers.45.block_sparse_moe.experts.100.w2", "model.layers.45.block_sparse_moe.experts.101.w2", "model.layers.45.block_sparse_moe.experts.102.w2", "model.layers.45.block_sparse_moe.experts.103.w2", "model.layers.45.block_sparse_moe.experts.104.w2", "model.layers.45.block_sparse_moe.experts.105.w2", "model.layers.45.block_sparse_moe.experts.106.w2", "model.layers.45.block_sparse_moe.experts.107.w2", "model.layers.45.block_sparse_moe.experts.108.w2", "model.layers.45.block_sparse_moe.experts.109.w2", "model.layers.45.block_sparse_moe.experts.110.w2", "model.layers.45.block_sparse_moe.experts.111.w2", "model.layers.45.block_sparse_moe.experts.112.w2", "model.layers.45.block_sparse_moe.experts.113.w2", "model.layers.45.block_sparse_moe.experts.114.w2", "model.layers.45.block_sparse_moe.experts.115.w2", "model.layers.45.block_sparse_moe.experts.116.w2", "model.layers.45.block_sparse_moe.experts.117.w2", "model.layers.45.block_sparse_moe.experts.118.w2", "model.layers.45.block_sparse_moe.experts.119.w2", "model.layers.45.block_sparse_moe.experts.120.w2", "model.layers.45.block_sparse_moe.experts.121.w2", "model.layers.45.block_sparse_moe.experts.122.w2", "model.layers.45.block_sparse_moe.experts.123.w2", "model.layers.45.block_sparse_moe.experts.124.w2", "model.layers.45.block_sparse_moe.experts.125.w2", "model.layers.45.block_sparse_moe.experts.126.w2", "model.layers.45.block_sparse_moe.experts.127.w2", "model.layers.45.block_sparse_moe.experts.128.w2", "model.layers.45.block_sparse_moe.experts.129.w2", "model.layers.45.block_sparse_moe.experts.130.w2", "model.layers.45.block_sparse_moe.experts.131.w2", "model.layers.45.block_sparse_moe.experts.132.w2", "model.layers.45.block_sparse_moe.experts.133.w2", "model.layers.45.block_sparse_moe.experts.134.w2", "model.layers.45.block_sparse_moe.experts.135.w2", "model.layers.45.block_sparse_moe.experts.136.w2", "model.layers.45.block_sparse_moe.experts.137.w2", "model.layers.45.block_sparse_moe.experts.138.w2", "model.layers.45.block_sparse_moe.experts.139.w2", "model.layers.45.block_sparse_moe.experts.140.w2", "model.layers.45.block_sparse_moe.experts.141.w2", "model.layers.45.block_sparse_moe.experts.142.w2", "model.layers.45.block_sparse_moe.experts.143.w2", "model.layers.45.block_sparse_moe.experts.144.w2", "model.layers.45.block_sparse_moe.experts.145.w2", "model.layers.45.block_sparse_moe.experts.146.w2", "model.layers.45.block_sparse_moe.experts.147.w2", "model.layers.45.block_sparse_moe.experts.148.w2", "model.layers.45.block_sparse_moe.experts.149.w2", "model.layers.45.block_sparse_moe.experts.150.w2", "model.layers.45.block_sparse_moe.experts.151.w2", "model.layers.45.block_sparse_moe.experts.152.w2", "model.layers.45.block_sparse_moe.experts.153.w2", "model.layers.45.block_sparse_moe.experts.154.w2", "model.layers.45.block_sparse_moe.experts.155.w2", "model.layers.45.block_sparse_moe.experts.156.w2", "model.layers.45.block_sparse_moe.experts.157.w2", "model.layers.45.block_sparse_moe.experts.158.w2", "model.layers.45.block_sparse_moe.experts.159.w2", "model.layers.45.block_sparse_moe.experts.160.w2", "model.layers.45.block_sparse_moe.experts.161.w2", "model.layers.45.block_sparse_moe.experts.162.w2", "model.layers.45.block_sparse_moe.experts.163.w2", "model.layers.45.block_sparse_moe.experts.164.w2", "model.layers.45.block_sparse_moe.experts.165.w2", "model.layers.45.block_sparse_moe.experts.166.w2", "model.layers.45.block_sparse_moe.experts.167.w2", "model.layers.45.block_sparse_moe.experts.168.w2", "model.layers.45.block_sparse_moe.experts.169.w2", "model.layers.45.block_sparse_moe.experts.170.w2", "model.layers.45.block_sparse_moe.experts.171.w2", "model.layers.45.block_sparse_moe.experts.172.w2", "model.layers.45.block_sparse_moe.experts.173.w2", "model.layers.45.block_sparse_moe.experts.174.w2", "model.layers.45.block_sparse_moe.experts.175.w2", "model.layers.45.block_sparse_moe.experts.176.w2", "model.layers.45.block_sparse_moe.experts.177.w2", "model.layers.45.block_sparse_moe.experts.178.w2", "model.layers.45.block_sparse_moe.experts.179.w2", "model.layers.45.block_sparse_moe.experts.180.w2", "model.layers.45.block_sparse_moe.experts.181.w2", "model.layers.45.block_sparse_moe.experts.182.w2", "model.layers.45.block_sparse_moe.experts.183.w2", "model.layers.45.block_sparse_moe.experts.184.w2", "model.layers.45.block_sparse_moe.experts.185.w2", "model.layers.45.block_sparse_moe.experts.186.w2", "model.layers.45.block_sparse_moe.experts.187.w2", "model.layers.45.block_sparse_moe.experts.188.w2", "model.layers.45.block_sparse_moe.experts.189.w2", "model.layers.45.block_sparse_moe.experts.190.w2", "model.layers.45.block_sparse_moe.experts.191.w2", "model.layers.45.block_sparse_moe.experts.192.w2", "model.layers.45.block_sparse_moe.experts.193.w2", "model.layers.45.block_sparse_moe.experts.194.w2", "model.layers.45.block_sparse_moe.experts.195.w2", "model.layers.45.block_sparse_moe.experts.196.w2", "model.layers.45.block_sparse_moe.experts.197.w2", "model.layers.45.block_sparse_moe.experts.198.w2", "model.layers.45.block_sparse_moe.experts.199.w2", "model.layers.45.block_sparse_moe.experts.200.w2", "model.layers.45.block_sparse_moe.experts.201.w2", "model.layers.45.block_sparse_moe.experts.202.w2", "model.layers.45.block_sparse_moe.experts.203.w2", "model.layers.45.block_sparse_moe.experts.204.w2", "model.layers.45.block_sparse_moe.experts.205.w2", "model.layers.45.block_sparse_moe.experts.206.w2", "model.layers.45.block_sparse_moe.experts.207.w2", "model.layers.45.block_sparse_moe.experts.208.w2", "model.layers.45.block_sparse_moe.experts.209.w2", "model.layers.45.block_sparse_moe.experts.210.w2", "model.layers.45.block_sparse_moe.experts.211.w2", "model.layers.45.block_sparse_moe.experts.212.w2", "model.layers.45.block_sparse_moe.experts.213.w2", "model.layers.45.block_sparse_moe.experts.214.w2", "model.layers.45.block_sparse_moe.experts.215.w2", "model.layers.45.block_sparse_moe.experts.216.w2", "model.layers.45.block_sparse_moe.experts.217.w2", "model.layers.45.block_sparse_moe.experts.218.w2", "model.layers.45.block_sparse_moe.experts.219.w2", "model.layers.45.block_sparse_moe.experts.220.w2", "model.layers.45.block_sparse_moe.experts.221.w2", "model.layers.45.block_sparse_moe.experts.222.w2", "model.layers.45.block_sparse_moe.experts.223.w2", "model.layers.45.block_sparse_moe.experts.224.w2", "model.layers.45.block_sparse_moe.experts.225.w2", "model.layers.45.block_sparse_moe.experts.226.w2", "model.layers.45.block_sparse_moe.experts.227.w2", "model.layers.45.block_sparse_moe.experts.228.w2", "model.layers.45.block_sparse_moe.experts.229.w2", "model.layers.45.block_sparse_moe.experts.230.w2", "model.layers.45.block_sparse_moe.experts.231.w2", "model.layers.45.block_sparse_moe.experts.232.w2", "model.layers.45.block_sparse_moe.experts.233.w2", "model.layers.45.block_sparse_moe.experts.234.w2", "model.layers.45.block_sparse_moe.experts.235.w2", "model.layers.45.block_sparse_moe.experts.236.w2", "model.layers.45.block_sparse_moe.experts.237.w2", "model.layers.45.block_sparse_moe.experts.238.w2", "model.layers.45.block_sparse_moe.experts.239.w2", "model.layers.45.block_sparse_moe.experts.240.w2", "model.layers.45.block_sparse_moe.experts.241.w2", "model.layers.45.block_sparse_moe.experts.242.w2", "model.layers.45.block_sparse_moe.experts.243.w2", "model.layers.45.block_sparse_moe.experts.244.w2", "model.layers.45.block_sparse_moe.experts.245.w2", "model.layers.45.block_sparse_moe.experts.246.w2", "model.layers.45.block_sparse_moe.experts.247.w2", "model.layers.45.block_sparse_moe.experts.248.w2", "model.layers.45.block_sparse_moe.experts.249.w2", "model.layers.45.block_sparse_moe.experts.250.w2", "model.layers.45.block_sparse_moe.experts.251.w2", "model.layers.45.block_sparse_moe.experts.252.w2", "model.layers.45.block_sparse_moe.experts.253.w2", "model.layers.45.block_sparse_moe.experts.254.w2", "model.layers.45.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.000205787271261193, "dbits": 1207959552 } ] }, { "idx": 230, "layers": [ "model.layers.46.self_attn.q_proj" ], "candidates": [ { "dkld": 0.00012597516179085888, "dbits": 18874368 } ] }, { "idx": 231, "layers": [ "model.layers.46.self_attn.k_proj", "model.layers.46.self_attn.v_proj" ], "candidates": [ { "dkld": -0.004496976360678695, "dbits": 6291456 } ] }, { "idx": 232, "layers": [ "model.layers.46.self_attn.o_proj" ], "candidates": [ { "dkld": -9.051486849787072e-05, "dbits": 18874368 } ] }, { "idx": 233, "layers": [ "model.layers.46.block_sparse_moe.experts.0.w1", "model.layers.46.block_sparse_moe.experts.1.w1", "model.layers.46.block_sparse_moe.experts.2.w1", "model.layers.46.block_sparse_moe.experts.3.w1", "model.layers.46.block_sparse_moe.experts.4.w1", "model.layers.46.block_sparse_moe.experts.5.w1", "model.layers.46.block_sparse_moe.experts.6.w1", "model.layers.46.block_sparse_moe.experts.7.w1", "model.layers.46.block_sparse_moe.experts.8.w1", "model.layers.46.block_sparse_moe.experts.9.w1", "model.layers.46.block_sparse_moe.experts.10.w1", "model.layers.46.block_sparse_moe.experts.11.w1", "model.layers.46.block_sparse_moe.experts.12.w1", "model.layers.46.block_sparse_moe.experts.13.w1", "model.layers.46.block_sparse_moe.experts.14.w1", "model.layers.46.block_sparse_moe.experts.15.w1", "model.layers.46.block_sparse_moe.experts.16.w1", "model.layers.46.block_sparse_moe.experts.17.w1", "model.layers.46.block_sparse_moe.experts.18.w1", "model.layers.46.block_sparse_moe.experts.19.w1", "model.layers.46.block_sparse_moe.experts.20.w1", "model.layers.46.block_sparse_moe.experts.21.w1", "model.layers.46.block_sparse_moe.experts.22.w1", "model.layers.46.block_sparse_moe.experts.23.w1", "model.layers.46.block_sparse_moe.experts.24.w1", "model.layers.46.block_sparse_moe.experts.25.w1", "model.layers.46.block_sparse_moe.experts.26.w1", "model.layers.46.block_sparse_moe.experts.27.w1", "model.layers.46.block_sparse_moe.experts.28.w1", "model.layers.46.block_sparse_moe.experts.29.w1", "model.layers.46.block_sparse_moe.experts.30.w1", "model.layers.46.block_sparse_moe.experts.31.w1", "model.layers.46.block_sparse_moe.experts.32.w1", "model.layers.46.block_sparse_moe.experts.33.w1", "model.layers.46.block_sparse_moe.experts.34.w1", "model.layers.46.block_sparse_moe.experts.35.w1", "model.layers.46.block_sparse_moe.experts.36.w1", "model.layers.46.block_sparse_moe.experts.37.w1", "model.layers.46.block_sparse_moe.experts.38.w1", "model.layers.46.block_sparse_moe.experts.39.w1", "model.layers.46.block_sparse_moe.experts.40.w1", "model.layers.46.block_sparse_moe.experts.41.w1", "model.layers.46.block_sparse_moe.experts.42.w1", "model.layers.46.block_sparse_moe.experts.43.w1", "model.layers.46.block_sparse_moe.experts.44.w1", "model.layers.46.block_sparse_moe.experts.45.w1", "model.layers.46.block_sparse_moe.experts.46.w1", "model.layers.46.block_sparse_moe.experts.47.w1", "model.layers.46.block_sparse_moe.experts.48.w1", "model.layers.46.block_sparse_moe.experts.49.w1", "model.layers.46.block_sparse_moe.experts.50.w1", "model.layers.46.block_sparse_moe.experts.51.w1", "model.layers.46.block_sparse_moe.experts.52.w1", "model.layers.46.block_sparse_moe.experts.53.w1", "model.layers.46.block_sparse_moe.experts.54.w1", "model.layers.46.block_sparse_moe.experts.55.w1", "model.layers.46.block_sparse_moe.experts.56.w1", "model.layers.46.block_sparse_moe.experts.57.w1", "model.layers.46.block_sparse_moe.experts.58.w1", "model.layers.46.block_sparse_moe.experts.59.w1", "model.layers.46.block_sparse_moe.experts.60.w1", "model.layers.46.block_sparse_moe.experts.61.w1", "model.layers.46.block_sparse_moe.experts.62.w1", "model.layers.46.block_sparse_moe.experts.63.w1", "model.layers.46.block_sparse_moe.experts.64.w1", "model.layers.46.block_sparse_moe.experts.65.w1", "model.layers.46.block_sparse_moe.experts.66.w1", "model.layers.46.block_sparse_moe.experts.67.w1", "model.layers.46.block_sparse_moe.experts.68.w1", "model.layers.46.block_sparse_moe.experts.69.w1", "model.layers.46.block_sparse_moe.experts.70.w1", "model.layers.46.block_sparse_moe.experts.71.w1", "model.layers.46.block_sparse_moe.experts.72.w1", "model.layers.46.block_sparse_moe.experts.73.w1", "model.layers.46.block_sparse_moe.experts.74.w1", "model.layers.46.block_sparse_moe.experts.75.w1", "model.layers.46.block_sparse_moe.experts.76.w1", "model.layers.46.block_sparse_moe.experts.77.w1", "model.layers.46.block_sparse_moe.experts.78.w1", "model.layers.46.block_sparse_moe.experts.79.w1", "model.layers.46.block_sparse_moe.experts.80.w1", "model.layers.46.block_sparse_moe.experts.81.w1", "model.layers.46.block_sparse_moe.experts.82.w1", "model.layers.46.block_sparse_moe.experts.83.w1", "model.layers.46.block_sparse_moe.experts.84.w1", "model.layers.46.block_sparse_moe.experts.85.w1", "model.layers.46.block_sparse_moe.experts.86.w1", "model.layers.46.block_sparse_moe.experts.87.w1", "model.layers.46.block_sparse_moe.experts.88.w1", "model.layers.46.block_sparse_moe.experts.89.w1", "model.layers.46.block_sparse_moe.experts.90.w1", "model.layers.46.block_sparse_moe.experts.91.w1", "model.layers.46.block_sparse_moe.experts.92.w1", "model.layers.46.block_sparse_moe.experts.93.w1", "model.layers.46.block_sparse_moe.experts.94.w1", "model.layers.46.block_sparse_moe.experts.95.w1", "model.layers.46.block_sparse_moe.experts.96.w1", "model.layers.46.block_sparse_moe.experts.97.w1", "model.layers.46.block_sparse_moe.experts.98.w1", "model.layers.46.block_sparse_moe.experts.99.w1", "model.layers.46.block_sparse_moe.experts.100.w1", "model.layers.46.block_sparse_moe.experts.101.w1", "model.layers.46.block_sparse_moe.experts.102.w1", "model.layers.46.block_sparse_moe.experts.103.w1", "model.layers.46.block_sparse_moe.experts.104.w1", "model.layers.46.block_sparse_moe.experts.105.w1", "model.layers.46.block_sparse_moe.experts.106.w1", "model.layers.46.block_sparse_moe.experts.107.w1", "model.layers.46.block_sparse_moe.experts.108.w1", "model.layers.46.block_sparse_moe.experts.109.w1", "model.layers.46.block_sparse_moe.experts.110.w1", "model.layers.46.block_sparse_moe.experts.111.w1", "model.layers.46.block_sparse_moe.experts.112.w1", "model.layers.46.block_sparse_moe.experts.113.w1", "model.layers.46.block_sparse_moe.experts.114.w1", "model.layers.46.block_sparse_moe.experts.115.w1", "model.layers.46.block_sparse_moe.experts.116.w1", "model.layers.46.block_sparse_moe.experts.117.w1", "model.layers.46.block_sparse_moe.experts.118.w1", "model.layers.46.block_sparse_moe.experts.119.w1", "model.layers.46.block_sparse_moe.experts.120.w1", "model.layers.46.block_sparse_moe.experts.121.w1", "model.layers.46.block_sparse_moe.experts.122.w1", "model.layers.46.block_sparse_moe.experts.123.w1", "model.layers.46.block_sparse_moe.experts.124.w1", "model.layers.46.block_sparse_moe.experts.125.w1", "model.layers.46.block_sparse_moe.experts.126.w1", "model.layers.46.block_sparse_moe.experts.127.w1", "model.layers.46.block_sparse_moe.experts.128.w1", "model.layers.46.block_sparse_moe.experts.129.w1", "model.layers.46.block_sparse_moe.experts.130.w1", "model.layers.46.block_sparse_moe.experts.131.w1", "model.layers.46.block_sparse_moe.experts.132.w1", "model.layers.46.block_sparse_moe.experts.133.w1", "model.layers.46.block_sparse_moe.experts.134.w1", "model.layers.46.block_sparse_moe.experts.135.w1", "model.layers.46.block_sparse_moe.experts.136.w1", "model.layers.46.block_sparse_moe.experts.137.w1", "model.layers.46.block_sparse_moe.experts.138.w1", "model.layers.46.block_sparse_moe.experts.139.w1", "model.layers.46.block_sparse_moe.experts.140.w1", "model.layers.46.block_sparse_moe.experts.141.w1", "model.layers.46.block_sparse_moe.experts.142.w1", "model.layers.46.block_sparse_moe.experts.143.w1", "model.layers.46.block_sparse_moe.experts.144.w1", "model.layers.46.block_sparse_moe.experts.145.w1", "model.layers.46.block_sparse_moe.experts.146.w1", "model.layers.46.block_sparse_moe.experts.147.w1", "model.layers.46.block_sparse_moe.experts.148.w1", "model.layers.46.block_sparse_moe.experts.149.w1", "model.layers.46.block_sparse_moe.experts.150.w1", "model.layers.46.block_sparse_moe.experts.151.w1", "model.layers.46.block_sparse_moe.experts.152.w1", "model.layers.46.block_sparse_moe.experts.153.w1", "model.layers.46.block_sparse_moe.experts.154.w1", "model.layers.46.block_sparse_moe.experts.155.w1", "model.layers.46.block_sparse_moe.experts.156.w1", "model.layers.46.block_sparse_moe.experts.157.w1", "model.layers.46.block_sparse_moe.experts.158.w1", "model.layers.46.block_sparse_moe.experts.159.w1", "model.layers.46.block_sparse_moe.experts.160.w1", "model.layers.46.block_sparse_moe.experts.161.w1", "model.layers.46.block_sparse_moe.experts.162.w1", "model.layers.46.block_sparse_moe.experts.163.w1", "model.layers.46.block_sparse_moe.experts.164.w1", "model.layers.46.block_sparse_moe.experts.165.w1", "model.layers.46.block_sparse_moe.experts.166.w1", "model.layers.46.block_sparse_moe.experts.167.w1", "model.layers.46.block_sparse_moe.experts.168.w1", "model.layers.46.block_sparse_moe.experts.169.w1", "model.layers.46.block_sparse_moe.experts.170.w1", "model.layers.46.block_sparse_moe.experts.171.w1", "model.layers.46.block_sparse_moe.experts.172.w1", "model.layers.46.block_sparse_moe.experts.173.w1", "model.layers.46.block_sparse_moe.experts.174.w1", "model.layers.46.block_sparse_moe.experts.175.w1", "model.layers.46.block_sparse_moe.experts.176.w1", "model.layers.46.block_sparse_moe.experts.177.w1", "model.layers.46.block_sparse_moe.experts.178.w1", "model.layers.46.block_sparse_moe.experts.179.w1", "model.layers.46.block_sparse_moe.experts.180.w1", "model.layers.46.block_sparse_moe.experts.181.w1", "model.layers.46.block_sparse_moe.experts.182.w1", "model.layers.46.block_sparse_moe.experts.183.w1", "model.layers.46.block_sparse_moe.experts.184.w1", "model.layers.46.block_sparse_moe.experts.185.w1", "model.layers.46.block_sparse_moe.experts.186.w1", "model.layers.46.block_sparse_moe.experts.187.w1", "model.layers.46.block_sparse_moe.experts.188.w1", "model.layers.46.block_sparse_moe.experts.189.w1", "model.layers.46.block_sparse_moe.experts.190.w1", "model.layers.46.block_sparse_moe.experts.191.w1", "model.layers.46.block_sparse_moe.experts.192.w1", "model.layers.46.block_sparse_moe.experts.193.w1", "model.layers.46.block_sparse_moe.experts.194.w1", "model.layers.46.block_sparse_moe.experts.195.w1", "model.layers.46.block_sparse_moe.experts.196.w1", "model.layers.46.block_sparse_moe.experts.197.w1", "model.layers.46.block_sparse_moe.experts.198.w1", "model.layers.46.block_sparse_moe.experts.199.w1", "model.layers.46.block_sparse_moe.experts.200.w1", "model.layers.46.block_sparse_moe.experts.201.w1", "model.layers.46.block_sparse_moe.experts.202.w1", "model.layers.46.block_sparse_moe.experts.203.w1", "model.layers.46.block_sparse_moe.experts.204.w1", "model.layers.46.block_sparse_moe.experts.205.w1", "model.layers.46.block_sparse_moe.experts.206.w1", "model.layers.46.block_sparse_moe.experts.207.w1", "model.layers.46.block_sparse_moe.experts.208.w1", "model.layers.46.block_sparse_moe.experts.209.w1", "model.layers.46.block_sparse_moe.experts.210.w1", "model.layers.46.block_sparse_moe.experts.211.w1", "model.layers.46.block_sparse_moe.experts.212.w1", "model.layers.46.block_sparse_moe.experts.213.w1", "model.layers.46.block_sparse_moe.experts.214.w1", "model.layers.46.block_sparse_moe.experts.215.w1", "model.layers.46.block_sparse_moe.experts.216.w1", "model.layers.46.block_sparse_moe.experts.217.w1", "model.layers.46.block_sparse_moe.experts.218.w1", "model.layers.46.block_sparse_moe.experts.219.w1", "model.layers.46.block_sparse_moe.experts.220.w1", "model.layers.46.block_sparse_moe.experts.221.w1", "model.layers.46.block_sparse_moe.experts.222.w1", "model.layers.46.block_sparse_moe.experts.223.w1", "model.layers.46.block_sparse_moe.experts.224.w1", "model.layers.46.block_sparse_moe.experts.225.w1", "model.layers.46.block_sparse_moe.experts.226.w1", "model.layers.46.block_sparse_moe.experts.227.w1", "model.layers.46.block_sparse_moe.experts.228.w1", "model.layers.46.block_sparse_moe.experts.229.w1", "model.layers.46.block_sparse_moe.experts.230.w1", "model.layers.46.block_sparse_moe.experts.231.w1", "model.layers.46.block_sparse_moe.experts.232.w1", "model.layers.46.block_sparse_moe.experts.233.w1", "model.layers.46.block_sparse_moe.experts.234.w1", "model.layers.46.block_sparse_moe.experts.235.w1", "model.layers.46.block_sparse_moe.experts.236.w1", "model.layers.46.block_sparse_moe.experts.237.w1", "model.layers.46.block_sparse_moe.experts.238.w1", "model.layers.46.block_sparse_moe.experts.239.w1", "model.layers.46.block_sparse_moe.experts.240.w1", "model.layers.46.block_sparse_moe.experts.241.w1", "model.layers.46.block_sparse_moe.experts.242.w1", "model.layers.46.block_sparse_moe.experts.243.w1", "model.layers.46.block_sparse_moe.experts.244.w1", "model.layers.46.block_sparse_moe.experts.245.w1", "model.layers.46.block_sparse_moe.experts.246.w1", "model.layers.46.block_sparse_moe.experts.247.w1", "model.layers.46.block_sparse_moe.experts.248.w1", "model.layers.46.block_sparse_moe.experts.249.w1", "model.layers.46.block_sparse_moe.experts.250.w1", "model.layers.46.block_sparse_moe.experts.251.w1", "model.layers.46.block_sparse_moe.experts.252.w1", "model.layers.46.block_sparse_moe.experts.253.w1", "model.layers.46.block_sparse_moe.experts.254.w1", "model.layers.46.block_sparse_moe.experts.255.w1", "model.layers.46.block_sparse_moe.experts.0.w3", "model.layers.46.block_sparse_moe.experts.1.w3", "model.layers.46.block_sparse_moe.experts.2.w3", "model.layers.46.block_sparse_moe.experts.3.w3", "model.layers.46.block_sparse_moe.experts.4.w3", "model.layers.46.block_sparse_moe.experts.5.w3", "model.layers.46.block_sparse_moe.experts.6.w3", "model.layers.46.block_sparse_moe.experts.7.w3", "model.layers.46.block_sparse_moe.experts.8.w3", "model.layers.46.block_sparse_moe.experts.9.w3", "model.layers.46.block_sparse_moe.experts.10.w3", "model.layers.46.block_sparse_moe.experts.11.w3", "model.layers.46.block_sparse_moe.experts.12.w3", "model.layers.46.block_sparse_moe.experts.13.w3", "model.layers.46.block_sparse_moe.experts.14.w3", "model.layers.46.block_sparse_moe.experts.15.w3", "model.layers.46.block_sparse_moe.experts.16.w3", "model.layers.46.block_sparse_moe.experts.17.w3", "model.layers.46.block_sparse_moe.experts.18.w3", "model.layers.46.block_sparse_moe.experts.19.w3", "model.layers.46.block_sparse_moe.experts.20.w3", "model.layers.46.block_sparse_moe.experts.21.w3", "model.layers.46.block_sparse_moe.experts.22.w3", "model.layers.46.block_sparse_moe.experts.23.w3", "model.layers.46.block_sparse_moe.experts.24.w3", "model.layers.46.block_sparse_moe.experts.25.w3", "model.layers.46.block_sparse_moe.experts.26.w3", "model.layers.46.block_sparse_moe.experts.27.w3", "model.layers.46.block_sparse_moe.experts.28.w3", "model.layers.46.block_sparse_moe.experts.29.w3", "model.layers.46.block_sparse_moe.experts.30.w3", "model.layers.46.block_sparse_moe.experts.31.w3", "model.layers.46.block_sparse_moe.experts.32.w3", "model.layers.46.block_sparse_moe.experts.33.w3", "model.layers.46.block_sparse_moe.experts.34.w3", "model.layers.46.block_sparse_moe.experts.35.w3", "model.layers.46.block_sparse_moe.experts.36.w3", "model.layers.46.block_sparse_moe.experts.37.w3", "model.layers.46.block_sparse_moe.experts.38.w3", "model.layers.46.block_sparse_moe.experts.39.w3", "model.layers.46.block_sparse_moe.experts.40.w3", "model.layers.46.block_sparse_moe.experts.41.w3", "model.layers.46.block_sparse_moe.experts.42.w3", "model.layers.46.block_sparse_moe.experts.43.w3", "model.layers.46.block_sparse_moe.experts.44.w3", "model.layers.46.block_sparse_moe.experts.45.w3", "model.layers.46.block_sparse_moe.experts.46.w3", "model.layers.46.block_sparse_moe.experts.47.w3", "model.layers.46.block_sparse_moe.experts.48.w3", "model.layers.46.block_sparse_moe.experts.49.w3", "model.layers.46.block_sparse_moe.experts.50.w3", "model.layers.46.block_sparse_moe.experts.51.w3", "model.layers.46.block_sparse_moe.experts.52.w3", "model.layers.46.block_sparse_moe.experts.53.w3", "model.layers.46.block_sparse_moe.experts.54.w3", "model.layers.46.block_sparse_moe.experts.55.w3", "model.layers.46.block_sparse_moe.experts.56.w3", "model.layers.46.block_sparse_moe.experts.57.w3", "model.layers.46.block_sparse_moe.experts.58.w3", "model.layers.46.block_sparse_moe.experts.59.w3", "model.layers.46.block_sparse_moe.experts.60.w3", "model.layers.46.block_sparse_moe.experts.61.w3", "model.layers.46.block_sparse_moe.experts.62.w3", "model.layers.46.block_sparse_moe.experts.63.w3", "model.layers.46.block_sparse_moe.experts.64.w3", "model.layers.46.block_sparse_moe.experts.65.w3", "model.layers.46.block_sparse_moe.experts.66.w3", "model.layers.46.block_sparse_moe.experts.67.w3", "model.layers.46.block_sparse_moe.experts.68.w3", "model.layers.46.block_sparse_moe.experts.69.w3", "model.layers.46.block_sparse_moe.experts.70.w3", "model.layers.46.block_sparse_moe.experts.71.w3", "model.layers.46.block_sparse_moe.experts.72.w3", "model.layers.46.block_sparse_moe.experts.73.w3", "model.layers.46.block_sparse_moe.experts.74.w3", "model.layers.46.block_sparse_moe.experts.75.w3", "model.layers.46.block_sparse_moe.experts.76.w3", "model.layers.46.block_sparse_moe.experts.77.w3", "model.layers.46.block_sparse_moe.experts.78.w3", "model.layers.46.block_sparse_moe.experts.79.w3", "model.layers.46.block_sparse_moe.experts.80.w3", "model.layers.46.block_sparse_moe.experts.81.w3", "model.layers.46.block_sparse_moe.experts.82.w3", "model.layers.46.block_sparse_moe.experts.83.w3", "model.layers.46.block_sparse_moe.experts.84.w3", "model.layers.46.block_sparse_moe.experts.85.w3", "model.layers.46.block_sparse_moe.experts.86.w3", "model.layers.46.block_sparse_moe.experts.87.w3", "model.layers.46.block_sparse_moe.experts.88.w3", "model.layers.46.block_sparse_moe.experts.89.w3", "model.layers.46.block_sparse_moe.experts.90.w3", "model.layers.46.block_sparse_moe.experts.91.w3", "model.layers.46.block_sparse_moe.experts.92.w3", "model.layers.46.block_sparse_moe.experts.93.w3", "model.layers.46.block_sparse_moe.experts.94.w3", "model.layers.46.block_sparse_moe.experts.95.w3", "model.layers.46.block_sparse_moe.experts.96.w3", "model.layers.46.block_sparse_moe.experts.97.w3", "model.layers.46.block_sparse_moe.experts.98.w3", "model.layers.46.block_sparse_moe.experts.99.w3", "model.layers.46.block_sparse_moe.experts.100.w3", "model.layers.46.block_sparse_moe.experts.101.w3", "model.layers.46.block_sparse_moe.experts.102.w3", "model.layers.46.block_sparse_moe.experts.103.w3", "model.layers.46.block_sparse_moe.experts.104.w3", "model.layers.46.block_sparse_moe.experts.105.w3", "model.layers.46.block_sparse_moe.experts.106.w3", "model.layers.46.block_sparse_moe.experts.107.w3", "model.layers.46.block_sparse_moe.experts.108.w3", "model.layers.46.block_sparse_moe.experts.109.w3", "model.layers.46.block_sparse_moe.experts.110.w3", "model.layers.46.block_sparse_moe.experts.111.w3", "model.layers.46.block_sparse_moe.experts.112.w3", "model.layers.46.block_sparse_moe.experts.113.w3", "model.layers.46.block_sparse_moe.experts.114.w3", "model.layers.46.block_sparse_moe.experts.115.w3", "model.layers.46.block_sparse_moe.experts.116.w3", "model.layers.46.block_sparse_moe.experts.117.w3", "model.layers.46.block_sparse_moe.experts.118.w3", "model.layers.46.block_sparse_moe.experts.119.w3", "model.layers.46.block_sparse_moe.experts.120.w3", "model.layers.46.block_sparse_moe.experts.121.w3", "model.layers.46.block_sparse_moe.experts.122.w3", "model.layers.46.block_sparse_moe.experts.123.w3", "model.layers.46.block_sparse_moe.experts.124.w3", "model.layers.46.block_sparse_moe.experts.125.w3", "model.layers.46.block_sparse_moe.experts.126.w3", "model.layers.46.block_sparse_moe.experts.127.w3", "model.layers.46.block_sparse_moe.experts.128.w3", "model.layers.46.block_sparse_moe.experts.129.w3", "model.layers.46.block_sparse_moe.experts.130.w3", "model.layers.46.block_sparse_moe.experts.131.w3", "model.layers.46.block_sparse_moe.experts.132.w3", "model.layers.46.block_sparse_moe.experts.133.w3", "model.layers.46.block_sparse_moe.experts.134.w3", "model.layers.46.block_sparse_moe.experts.135.w3", "model.layers.46.block_sparse_moe.experts.136.w3", "model.layers.46.block_sparse_moe.experts.137.w3", "model.layers.46.block_sparse_moe.experts.138.w3", "model.layers.46.block_sparse_moe.experts.139.w3", "model.layers.46.block_sparse_moe.experts.140.w3", "model.layers.46.block_sparse_moe.experts.141.w3", "model.layers.46.block_sparse_moe.experts.142.w3", "model.layers.46.block_sparse_moe.experts.143.w3", "model.layers.46.block_sparse_moe.experts.144.w3", "model.layers.46.block_sparse_moe.experts.145.w3", "model.layers.46.block_sparse_moe.experts.146.w3", "model.layers.46.block_sparse_moe.experts.147.w3", "model.layers.46.block_sparse_moe.experts.148.w3", "model.layers.46.block_sparse_moe.experts.149.w3", "model.layers.46.block_sparse_moe.experts.150.w3", "model.layers.46.block_sparse_moe.experts.151.w3", "model.layers.46.block_sparse_moe.experts.152.w3", "model.layers.46.block_sparse_moe.experts.153.w3", "model.layers.46.block_sparse_moe.experts.154.w3", "model.layers.46.block_sparse_moe.experts.155.w3", "model.layers.46.block_sparse_moe.experts.156.w3", "model.layers.46.block_sparse_moe.experts.157.w3", "model.layers.46.block_sparse_moe.experts.158.w3", "model.layers.46.block_sparse_moe.experts.159.w3", "model.layers.46.block_sparse_moe.experts.160.w3", "model.layers.46.block_sparse_moe.experts.161.w3", "model.layers.46.block_sparse_moe.experts.162.w3", "model.layers.46.block_sparse_moe.experts.163.w3", "model.layers.46.block_sparse_moe.experts.164.w3", "model.layers.46.block_sparse_moe.experts.165.w3", "model.layers.46.block_sparse_moe.experts.166.w3", "model.layers.46.block_sparse_moe.experts.167.w3", "model.layers.46.block_sparse_moe.experts.168.w3", "model.layers.46.block_sparse_moe.experts.169.w3", "model.layers.46.block_sparse_moe.experts.170.w3", "model.layers.46.block_sparse_moe.experts.171.w3", "model.layers.46.block_sparse_moe.experts.172.w3", "model.layers.46.block_sparse_moe.experts.173.w3", "model.layers.46.block_sparse_moe.experts.174.w3", "model.layers.46.block_sparse_moe.experts.175.w3", "model.layers.46.block_sparse_moe.experts.176.w3", "model.layers.46.block_sparse_moe.experts.177.w3", "model.layers.46.block_sparse_moe.experts.178.w3", "model.layers.46.block_sparse_moe.experts.179.w3", "model.layers.46.block_sparse_moe.experts.180.w3", "model.layers.46.block_sparse_moe.experts.181.w3", "model.layers.46.block_sparse_moe.experts.182.w3", "model.layers.46.block_sparse_moe.experts.183.w3", "model.layers.46.block_sparse_moe.experts.184.w3", "model.layers.46.block_sparse_moe.experts.185.w3", "model.layers.46.block_sparse_moe.experts.186.w3", "model.layers.46.block_sparse_moe.experts.187.w3", "model.layers.46.block_sparse_moe.experts.188.w3", "model.layers.46.block_sparse_moe.experts.189.w3", "model.layers.46.block_sparse_moe.experts.190.w3", "model.layers.46.block_sparse_moe.experts.191.w3", "model.layers.46.block_sparse_moe.experts.192.w3", "model.layers.46.block_sparse_moe.experts.193.w3", "model.layers.46.block_sparse_moe.experts.194.w3", "model.layers.46.block_sparse_moe.experts.195.w3", "model.layers.46.block_sparse_moe.experts.196.w3", "model.layers.46.block_sparse_moe.experts.197.w3", "model.layers.46.block_sparse_moe.experts.198.w3", "model.layers.46.block_sparse_moe.experts.199.w3", "model.layers.46.block_sparse_moe.experts.200.w3", "model.layers.46.block_sparse_moe.experts.201.w3", "model.layers.46.block_sparse_moe.experts.202.w3", "model.layers.46.block_sparse_moe.experts.203.w3", "model.layers.46.block_sparse_moe.experts.204.w3", "model.layers.46.block_sparse_moe.experts.205.w3", "model.layers.46.block_sparse_moe.experts.206.w3", "model.layers.46.block_sparse_moe.experts.207.w3", "model.layers.46.block_sparse_moe.experts.208.w3", "model.layers.46.block_sparse_moe.experts.209.w3", "model.layers.46.block_sparse_moe.experts.210.w3", "model.layers.46.block_sparse_moe.experts.211.w3", "model.layers.46.block_sparse_moe.experts.212.w3", "model.layers.46.block_sparse_moe.experts.213.w3", "model.layers.46.block_sparse_moe.experts.214.w3", "model.layers.46.block_sparse_moe.experts.215.w3", "model.layers.46.block_sparse_moe.experts.216.w3", "model.layers.46.block_sparse_moe.experts.217.w3", "model.layers.46.block_sparse_moe.experts.218.w3", "model.layers.46.block_sparse_moe.experts.219.w3", "model.layers.46.block_sparse_moe.experts.220.w3", "model.layers.46.block_sparse_moe.experts.221.w3", "model.layers.46.block_sparse_moe.experts.222.w3", "model.layers.46.block_sparse_moe.experts.223.w3", "model.layers.46.block_sparse_moe.experts.224.w3", "model.layers.46.block_sparse_moe.experts.225.w3", "model.layers.46.block_sparse_moe.experts.226.w3", "model.layers.46.block_sparse_moe.experts.227.w3", "model.layers.46.block_sparse_moe.experts.228.w3", "model.layers.46.block_sparse_moe.experts.229.w3", "model.layers.46.block_sparse_moe.experts.230.w3", "model.layers.46.block_sparse_moe.experts.231.w3", "model.layers.46.block_sparse_moe.experts.232.w3", "model.layers.46.block_sparse_moe.experts.233.w3", "model.layers.46.block_sparse_moe.experts.234.w3", "model.layers.46.block_sparse_moe.experts.235.w3", "model.layers.46.block_sparse_moe.experts.236.w3", "model.layers.46.block_sparse_moe.experts.237.w3", "model.layers.46.block_sparse_moe.experts.238.w3", "model.layers.46.block_sparse_moe.experts.239.w3", "model.layers.46.block_sparse_moe.experts.240.w3", "model.layers.46.block_sparse_moe.experts.241.w3", "model.layers.46.block_sparse_moe.experts.242.w3", "model.layers.46.block_sparse_moe.experts.243.w3", "model.layers.46.block_sparse_moe.experts.244.w3", "model.layers.46.block_sparse_moe.experts.245.w3", "model.layers.46.block_sparse_moe.experts.246.w3", "model.layers.46.block_sparse_moe.experts.247.w3", "model.layers.46.block_sparse_moe.experts.248.w3", "model.layers.46.block_sparse_moe.experts.249.w3", "model.layers.46.block_sparse_moe.experts.250.w3", "model.layers.46.block_sparse_moe.experts.251.w3", "model.layers.46.block_sparse_moe.experts.252.w3", "model.layers.46.block_sparse_moe.experts.253.w3", "model.layers.46.block_sparse_moe.experts.254.w3", "model.layers.46.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0009083218872547039, "dbits": 2415919104 } ] }, { "idx": 234, "layers": [ "model.layers.46.block_sparse_moe.experts.0.w2", "model.layers.46.block_sparse_moe.experts.1.w2", "model.layers.46.block_sparse_moe.experts.2.w2", "model.layers.46.block_sparse_moe.experts.3.w2", "model.layers.46.block_sparse_moe.experts.4.w2", "model.layers.46.block_sparse_moe.experts.5.w2", "model.layers.46.block_sparse_moe.experts.6.w2", "model.layers.46.block_sparse_moe.experts.7.w2", "model.layers.46.block_sparse_moe.experts.8.w2", "model.layers.46.block_sparse_moe.experts.9.w2", "model.layers.46.block_sparse_moe.experts.10.w2", "model.layers.46.block_sparse_moe.experts.11.w2", "model.layers.46.block_sparse_moe.experts.12.w2", "model.layers.46.block_sparse_moe.experts.13.w2", "model.layers.46.block_sparse_moe.experts.14.w2", "model.layers.46.block_sparse_moe.experts.15.w2", "model.layers.46.block_sparse_moe.experts.16.w2", "model.layers.46.block_sparse_moe.experts.17.w2", "model.layers.46.block_sparse_moe.experts.18.w2", "model.layers.46.block_sparse_moe.experts.19.w2", "model.layers.46.block_sparse_moe.experts.20.w2", "model.layers.46.block_sparse_moe.experts.21.w2", "model.layers.46.block_sparse_moe.experts.22.w2", "model.layers.46.block_sparse_moe.experts.23.w2", "model.layers.46.block_sparse_moe.experts.24.w2", "model.layers.46.block_sparse_moe.experts.25.w2", "model.layers.46.block_sparse_moe.experts.26.w2", "model.layers.46.block_sparse_moe.experts.27.w2", "model.layers.46.block_sparse_moe.experts.28.w2", "model.layers.46.block_sparse_moe.experts.29.w2", "model.layers.46.block_sparse_moe.experts.30.w2", "model.layers.46.block_sparse_moe.experts.31.w2", "model.layers.46.block_sparse_moe.experts.32.w2", "model.layers.46.block_sparse_moe.experts.33.w2", "model.layers.46.block_sparse_moe.experts.34.w2", "model.layers.46.block_sparse_moe.experts.35.w2", "model.layers.46.block_sparse_moe.experts.36.w2", "model.layers.46.block_sparse_moe.experts.37.w2", "model.layers.46.block_sparse_moe.experts.38.w2", "model.layers.46.block_sparse_moe.experts.39.w2", "model.layers.46.block_sparse_moe.experts.40.w2", "model.layers.46.block_sparse_moe.experts.41.w2", "model.layers.46.block_sparse_moe.experts.42.w2", "model.layers.46.block_sparse_moe.experts.43.w2", "model.layers.46.block_sparse_moe.experts.44.w2", "model.layers.46.block_sparse_moe.experts.45.w2", "model.layers.46.block_sparse_moe.experts.46.w2", "model.layers.46.block_sparse_moe.experts.47.w2", "model.layers.46.block_sparse_moe.experts.48.w2", "model.layers.46.block_sparse_moe.experts.49.w2", "model.layers.46.block_sparse_moe.experts.50.w2", "model.layers.46.block_sparse_moe.experts.51.w2", "model.layers.46.block_sparse_moe.experts.52.w2", "model.layers.46.block_sparse_moe.experts.53.w2", "model.layers.46.block_sparse_moe.experts.54.w2", "model.layers.46.block_sparse_moe.experts.55.w2", "model.layers.46.block_sparse_moe.experts.56.w2", "model.layers.46.block_sparse_moe.experts.57.w2", "model.layers.46.block_sparse_moe.experts.58.w2", "model.layers.46.block_sparse_moe.experts.59.w2", "model.layers.46.block_sparse_moe.experts.60.w2", "model.layers.46.block_sparse_moe.experts.61.w2", "model.layers.46.block_sparse_moe.experts.62.w2", "model.layers.46.block_sparse_moe.experts.63.w2", "model.layers.46.block_sparse_moe.experts.64.w2", "model.layers.46.block_sparse_moe.experts.65.w2", "model.layers.46.block_sparse_moe.experts.66.w2", "model.layers.46.block_sparse_moe.experts.67.w2", "model.layers.46.block_sparse_moe.experts.68.w2", "model.layers.46.block_sparse_moe.experts.69.w2", "model.layers.46.block_sparse_moe.experts.70.w2", "model.layers.46.block_sparse_moe.experts.71.w2", "model.layers.46.block_sparse_moe.experts.72.w2", "model.layers.46.block_sparse_moe.experts.73.w2", "model.layers.46.block_sparse_moe.experts.74.w2", "model.layers.46.block_sparse_moe.experts.75.w2", "model.layers.46.block_sparse_moe.experts.76.w2", "model.layers.46.block_sparse_moe.experts.77.w2", "model.layers.46.block_sparse_moe.experts.78.w2", "model.layers.46.block_sparse_moe.experts.79.w2", "model.layers.46.block_sparse_moe.experts.80.w2", "model.layers.46.block_sparse_moe.experts.81.w2", "model.layers.46.block_sparse_moe.experts.82.w2", "model.layers.46.block_sparse_moe.experts.83.w2", "model.layers.46.block_sparse_moe.experts.84.w2", "model.layers.46.block_sparse_moe.experts.85.w2", "model.layers.46.block_sparse_moe.experts.86.w2", "model.layers.46.block_sparse_moe.experts.87.w2", "model.layers.46.block_sparse_moe.experts.88.w2", "model.layers.46.block_sparse_moe.experts.89.w2", "model.layers.46.block_sparse_moe.experts.90.w2", "model.layers.46.block_sparse_moe.experts.91.w2", "model.layers.46.block_sparse_moe.experts.92.w2", "model.layers.46.block_sparse_moe.experts.93.w2", "model.layers.46.block_sparse_moe.experts.94.w2", "model.layers.46.block_sparse_moe.experts.95.w2", "model.layers.46.block_sparse_moe.experts.96.w2", "model.layers.46.block_sparse_moe.experts.97.w2", "model.layers.46.block_sparse_moe.experts.98.w2", "model.layers.46.block_sparse_moe.experts.99.w2", "model.layers.46.block_sparse_moe.experts.100.w2", "model.layers.46.block_sparse_moe.experts.101.w2", "model.layers.46.block_sparse_moe.experts.102.w2", "model.layers.46.block_sparse_moe.experts.103.w2", "model.layers.46.block_sparse_moe.experts.104.w2", "model.layers.46.block_sparse_moe.experts.105.w2", "model.layers.46.block_sparse_moe.experts.106.w2", "model.layers.46.block_sparse_moe.experts.107.w2", "model.layers.46.block_sparse_moe.experts.108.w2", "model.layers.46.block_sparse_moe.experts.109.w2", "model.layers.46.block_sparse_moe.experts.110.w2", "model.layers.46.block_sparse_moe.experts.111.w2", "model.layers.46.block_sparse_moe.experts.112.w2", "model.layers.46.block_sparse_moe.experts.113.w2", "model.layers.46.block_sparse_moe.experts.114.w2", "model.layers.46.block_sparse_moe.experts.115.w2", "model.layers.46.block_sparse_moe.experts.116.w2", "model.layers.46.block_sparse_moe.experts.117.w2", "model.layers.46.block_sparse_moe.experts.118.w2", "model.layers.46.block_sparse_moe.experts.119.w2", "model.layers.46.block_sparse_moe.experts.120.w2", "model.layers.46.block_sparse_moe.experts.121.w2", "model.layers.46.block_sparse_moe.experts.122.w2", "model.layers.46.block_sparse_moe.experts.123.w2", "model.layers.46.block_sparse_moe.experts.124.w2", "model.layers.46.block_sparse_moe.experts.125.w2", "model.layers.46.block_sparse_moe.experts.126.w2", "model.layers.46.block_sparse_moe.experts.127.w2", "model.layers.46.block_sparse_moe.experts.128.w2", "model.layers.46.block_sparse_moe.experts.129.w2", "model.layers.46.block_sparse_moe.experts.130.w2", "model.layers.46.block_sparse_moe.experts.131.w2", "model.layers.46.block_sparse_moe.experts.132.w2", "model.layers.46.block_sparse_moe.experts.133.w2", "model.layers.46.block_sparse_moe.experts.134.w2", "model.layers.46.block_sparse_moe.experts.135.w2", "model.layers.46.block_sparse_moe.experts.136.w2", "model.layers.46.block_sparse_moe.experts.137.w2", "model.layers.46.block_sparse_moe.experts.138.w2", "model.layers.46.block_sparse_moe.experts.139.w2", "model.layers.46.block_sparse_moe.experts.140.w2", "model.layers.46.block_sparse_moe.experts.141.w2", "model.layers.46.block_sparse_moe.experts.142.w2", "model.layers.46.block_sparse_moe.experts.143.w2", "model.layers.46.block_sparse_moe.experts.144.w2", "model.layers.46.block_sparse_moe.experts.145.w2", "model.layers.46.block_sparse_moe.experts.146.w2", "model.layers.46.block_sparse_moe.experts.147.w2", "model.layers.46.block_sparse_moe.experts.148.w2", "model.layers.46.block_sparse_moe.experts.149.w2", "model.layers.46.block_sparse_moe.experts.150.w2", "model.layers.46.block_sparse_moe.experts.151.w2", "model.layers.46.block_sparse_moe.experts.152.w2", "model.layers.46.block_sparse_moe.experts.153.w2", "model.layers.46.block_sparse_moe.experts.154.w2", "model.layers.46.block_sparse_moe.experts.155.w2", "model.layers.46.block_sparse_moe.experts.156.w2", "model.layers.46.block_sparse_moe.experts.157.w2", "model.layers.46.block_sparse_moe.experts.158.w2", "model.layers.46.block_sparse_moe.experts.159.w2", "model.layers.46.block_sparse_moe.experts.160.w2", "model.layers.46.block_sparse_moe.experts.161.w2", "model.layers.46.block_sparse_moe.experts.162.w2", "model.layers.46.block_sparse_moe.experts.163.w2", "model.layers.46.block_sparse_moe.experts.164.w2", "model.layers.46.block_sparse_moe.experts.165.w2", "model.layers.46.block_sparse_moe.experts.166.w2", "model.layers.46.block_sparse_moe.experts.167.w2", "model.layers.46.block_sparse_moe.experts.168.w2", "model.layers.46.block_sparse_moe.experts.169.w2", "model.layers.46.block_sparse_moe.experts.170.w2", "model.layers.46.block_sparse_moe.experts.171.w2", "model.layers.46.block_sparse_moe.experts.172.w2", "model.layers.46.block_sparse_moe.experts.173.w2", "model.layers.46.block_sparse_moe.experts.174.w2", "model.layers.46.block_sparse_moe.experts.175.w2", "model.layers.46.block_sparse_moe.experts.176.w2", "model.layers.46.block_sparse_moe.experts.177.w2", "model.layers.46.block_sparse_moe.experts.178.w2", "model.layers.46.block_sparse_moe.experts.179.w2", "model.layers.46.block_sparse_moe.experts.180.w2", "model.layers.46.block_sparse_moe.experts.181.w2", "model.layers.46.block_sparse_moe.experts.182.w2", "model.layers.46.block_sparse_moe.experts.183.w2", "model.layers.46.block_sparse_moe.experts.184.w2", "model.layers.46.block_sparse_moe.experts.185.w2", "model.layers.46.block_sparse_moe.experts.186.w2", "model.layers.46.block_sparse_moe.experts.187.w2", "model.layers.46.block_sparse_moe.experts.188.w2", "model.layers.46.block_sparse_moe.experts.189.w2", "model.layers.46.block_sparse_moe.experts.190.w2", "model.layers.46.block_sparse_moe.experts.191.w2", "model.layers.46.block_sparse_moe.experts.192.w2", "model.layers.46.block_sparse_moe.experts.193.w2", "model.layers.46.block_sparse_moe.experts.194.w2", "model.layers.46.block_sparse_moe.experts.195.w2", "model.layers.46.block_sparse_moe.experts.196.w2", "model.layers.46.block_sparse_moe.experts.197.w2", "model.layers.46.block_sparse_moe.experts.198.w2", "model.layers.46.block_sparse_moe.experts.199.w2", "model.layers.46.block_sparse_moe.experts.200.w2", "model.layers.46.block_sparse_moe.experts.201.w2", "model.layers.46.block_sparse_moe.experts.202.w2", "model.layers.46.block_sparse_moe.experts.203.w2", "model.layers.46.block_sparse_moe.experts.204.w2", "model.layers.46.block_sparse_moe.experts.205.w2", "model.layers.46.block_sparse_moe.experts.206.w2", "model.layers.46.block_sparse_moe.experts.207.w2", "model.layers.46.block_sparse_moe.experts.208.w2", "model.layers.46.block_sparse_moe.experts.209.w2", "model.layers.46.block_sparse_moe.experts.210.w2", "model.layers.46.block_sparse_moe.experts.211.w2", "model.layers.46.block_sparse_moe.experts.212.w2", "model.layers.46.block_sparse_moe.experts.213.w2", "model.layers.46.block_sparse_moe.experts.214.w2", "model.layers.46.block_sparse_moe.experts.215.w2", "model.layers.46.block_sparse_moe.experts.216.w2", "model.layers.46.block_sparse_moe.experts.217.w2", "model.layers.46.block_sparse_moe.experts.218.w2", "model.layers.46.block_sparse_moe.experts.219.w2", "model.layers.46.block_sparse_moe.experts.220.w2", "model.layers.46.block_sparse_moe.experts.221.w2", "model.layers.46.block_sparse_moe.experts.222.w2", "model.layers.46.block_sparse_moe.experts.223.w2", "model.layers.46.block_sparse_moe.experts.224.w2", "model.layers.46.block_sparse_moe.experts.225.w2", "model.layers.46.block_sparse_moe.experts.226.w2", "model.layers.46.block_sparse_moe.experts.227.w2", "model.layers.46.block_sparse_moe.experts.228.w2", "model.layers.46.block_sparse_moe.experts.229.w2", "model.layers.46.block_sparse_moe.experts.230.w2", "model.layers.46.block_sparse_moe.experts.231.w2", "model.layers.46.block_sparse_moe.experts.232.w2", "model.layers.46.block_sparse_moe.experts.233.w2", "model.layers.46.block_sparse_moe.experts.234.w2", "model.layers.46.block_sparse_moe.experts.235.w2", "model.layers.46.block_sparse_moe.experts.236.w2", "model.layers.46.block_sparse_moe.experts.237.w2", "model.layers.46.block_sparse_moe.experts.238.w2", "model.layers.46.block_sparse_moe.experts.239.w2", "model.layers.46.block_sparse_moe.experts.240.w2", "model.layers.46.block_sparse_moe.experts.241.w2", "model.layers.46.block_sparse_moe.experts.242.w2", "model.layers.46.block_sparse_moe.experts.243.w2", "model.layers.46.block_sparse_moe.experts.244.w2", "model.layers.46.block_sparse_moe.experts.245.w2", "model.layers.46.block_sparse_moe.experts.246.w2", "model.layers.46.block_sparse_moe.experts.247.w2", "model.layers.46.block_sparse_moe.experts.248.w2", "model.layers.46.block_sparse_moe.experts.249.w2", "model.layers.46.block_sparse_moe.experts.250.w2", "model.layers.46.block_sparse_moe.experts.251.w2", "model.layers.46.block_sparse_moe.experts.252.w2", "model.layers.46.block_sparse_moe.experts.253.w2", "model.layers.46.block_sparse_moe.experts.254.w2", "model.layers.46.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00011643804609778874, "dbits": 1207959552 } ] }, { "idx": 235, "layers": [ "model.layers.47.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0004075121134519688, "dbits": 18874368 } ] }, { "idx": 236, "layers": [ "model.layers.47.self_attn.k_proj", "model.layers.47.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0021294057369232178, "dbits": 6291456 } ] }, { "idx": 237, "layers": [ "model.layers.47.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0005778376013040321, "dbits": 18874368 } ] }, { "idx": 238, "layers": [ "model.layers.47.block_sparse_moe.experts.0.w1", "model.layers.47.block_sparse_moe.experts.1.w1", "model.layers.47.block_sparse_moe.experts.2.w1", "model.layers.47.block_sparse_moe.experts.3.w1", "model.layers.47.block_sparse_moe.experts.4.w1", "model.layers.47.block_sparse_moe.experts.5.w1", "model.layers.47.block_sparse_moe.experts.6.w1", "model.layers.47.block_sparse_moe.experts.7.w1", "model.layers.47.block_sparse_moe.experts.8.w1", "model.layers.47.block_sparse_moe.experts.9.w1", "model.layers.47.block_sparse_moe.experts.10.w1", "model.layers.47.block_sparse_moe.experts.11.w1", "model.layers.47.block_sparse_moe.experts.12.w1", "model.layers.47.block_sparse_moe.experts.13.w1", "model.layers.47.block_sparse_moe.experts.14.w1", "model.layers.47.block_sparse_moe.experts.15.w1", "model.layers.47.block_sparse_moe.experts.16.w1", "model.layers.47.block_sparse_moe.experts.17.w1", "model.layers.47.block_sparse_moe.experts.18.w1", "model.layers.47.block_sparse_moe.experts.19.w1", "model.layers.47.block_sparse_moe.experts.20.w1", "model.layers.47.block_sparse_moe.experts.21.w1", "model.layers.47.block_sparse_moe.experts.22.w1", "model.layers.47.block_sparse_moe.experts.23.w1", "model.layers.47.block_sparse_moe.experts.24.w1", "model.layers.47.block_sparse_moe.experts.25.w1", "model.layers.47.block_sparse_moe.experts.26.w1", "model.layers.47.block_sparse_moe.experts.27.w1", "model.layers.47.block_sparse_moe.experts.28.w1", "model.layers.47.block_sparse_moe.experts.29.w1", "model.layers.47.block_sparse_moe.experts.30.w1", "model.layers.47.block_sparse_moe.experts.31.w1", "model.layers.47.block_sparse_moe.experts.32.w1", "model.layers.47.block_sparse_moe.experts.33.w1", "model.layers.47.block_sparse_moe.experts.34.w1", "model.layers.47.block_sparse_moe.experts.35.w1", "model.layers.47.block_sparse_moe.experts.36.w1", "model.layers.47.block_sparse_moe.experts.37.w1", "model.layers.47.block_sparse_moe.experts.38.w1", "model.layers.47.block_sparse_moe.experts.39.w1", "model.layers.47.block_sparse_moe.experts.40.w1", "model.layers.47.block_sparse_moe.experts.41.w1", "model.layers.47.block_sparse_moe.experts.42.w1", "model.layers.47.block_sparse_moe.experts.43.w1", "model.layers.47.block_sparse_moe.experts.44.w1", "model.layers.47.block_sparse_moe.experts.45.w1", "model.layers.47.block_sparse_moe.experts.46.w1", "model.layers.47.block_sparse_moe.experts.47.w1", "model.layers.47.block_sparse_moe.experts.48.w1", "model.layers.47.block_sparse_moe.experts.49.w1", "model.layers.47.block_sparse_moe.experts.50.w1", "model.layers.47.block_sparse_moe.experts.51.w1", "model.layers.47.block_sparse_moe.experts.52.w1", "model.layers.47.block_sparse_moe.experts.53.w1", "model.layers.47.block_sparse_moe.experts.54.w1", "model.layers.47.block_sparse_moe.experts.55.w1", "model.layers.47.block_sparse_moe.experts.56.w1", "model.layers.47.block_sparse_moe.experts.57.w1", "model.layers.47.block_sparse_moe.experts.58.w1", "model.layers.47.block_sparse_moe.experts.59.w1", "model.layers.47.block_sparse_moe.experts.60.w1", "model.layers.47.block_sparse_moe.experts.61.w1", "model.layers.47.block_sparse_moe.experts.62.w1", "model.layers.47.block_sparse_moe.experts.63.w1", "model.layers.47.block_sparse_moe.experts.64.w1", "model.layers.47.block_sparse_moe.experts.65.w1", "model.layers.47.block_sparse_moe.experts.66.w1", "model.layers.47.block_sparse_moe.experts.67.w1", "model.layers.47.block_sparse_moe.experts.68.w1", "model.layers.47.block_sparse_moe.experts.69.w1", "model.layers.47.block_sparse_moe.experts.70.w1", "model.layers.47.block_sparse_moe.experts.71.w1", "model.layers.47.block_sparse_moe.experts.72.w1", "model.layers.47.block_sparse_moe.experts.73.w1", "model.layers.47.block_sparse_moe.experts.74.w1", "model.layers.47.block_sparse_moe.experts.75.w1", "model.layers.47.block_sparse_moe.experts.76.w1", "model.layers.47.block_sparse_moe.experts.77.w1", "model.layers.47.block_sparse_moe.experts.78.w1", "model.layers.47.block_sparse_moe.experts.79.w1", "model.layers.47.block_sparse_moe.experts.80.w1", "model.layers.47.block_sparse_moe.experts.81.w1", "model.layers.47.block_sparse_moe.experts.82.w1", "model.layers.47.block_sparse_moe.experts.83.w1", "model.layers.47.block_sparse_moe.experts.84.w1", "model.layers.47.block_sparse_moe.experts.85.w1", "model.layers.47.block_sparse_moe.experts.86.w1", "model.layers.47.block_sparse_moe.experts.87.w1", "model.layers.47.block_sparse_moe.experts.88.w1", "model.layers.47.block_sparse_moe.experts.89.w1", "model.layers.47.block_sparse_moe.experts.90.w1", "model.layers.47.block_sparse_moe.experts.91.w1", "model.layers.47.block_sparse_moe.experts.92.w1", "model.layers.47.block_sparse_moe.experts.93.w1", "model.layers.47.block_sparse_moe.experts.94.w1", "model.layers.47.block_sparse_moe.experts.95.w1", "model.layers.47.block_sparse_moe.experts.96.w1", "model.layers.47.block_sparse_moe.experts.97.w1", "model.layers.47.block_sparse_moe.experts.98.w1", "model.layers.47.block_sparse_moe.experts.99.w1", "model.layers.47.block_sparse_moe.experts.100.w1", "model.layers.47.block_sparse_moe.experts.101.w1", "model.layers.47.block_sparse_moe.experts.102.w1", "model.layers.47.block_sparse_moe.experts.103.w1", "model.layers.47.block_sparse_moe.experts.104.w1", "model.layers.47.block_sparse_moe.experts.105.w1", "model.layers.47.block_sparse_moe.experts.106.w1", "model.layers.47.block_sparse_moe.experts.107.w1", "model.layers.47.block_sparse_moe.experts.108.w1", "model.layers.47.block_sparse_moe.experts.109.w1", "model.layers.47.block_sparse_moe.experts.110.w1", "model.layers.47.block_sparse_moe.experts.111.w1", "model.layers.47.block_sparse_moe.experts.112.w1", "model.layers.47.block_sparse_moe.experts.113.w1", "model.layers.47.block_sparse_moe.experts.114.w1", "model.layers.47.block_sparse_moe.experts.115.w1", "model.layers.47.block_sparse_moe.experts.116.w1", "model.layers.47.block_sparse_moe.experts.117.w1", "model.layers.47.block_sparse_moe.experts.118.w1", "model.layers.47.block_sparse_moe.experts.119.w1", "model.layers.47.block_sparse_moe.experts.120.w1", "model.layers.47.block_sparse_moe.experts.121.w1", "model.layers.47.block_sparse_moe.experts.122.w1", "model.layers.47.block_sparse_moe.experts.123.w1", "model.layers.47.block_sparse_moe.experts.124.w1", "model.layers.47.block_sparse_moe.experts.125.w1", "model.layers.47.block_sparse_moe.experts.126.w1", "model.layers.47.block_sparse_moe.experts.127.w1", "model.layers.47.block_sparse_moe.experts.128.w1", "model.layers.47.block_sparse_moe.experts.129.w1", "model.layers.47.block_sparse_moe.experts.130.w1", "model.layers.47.block_sparse_moe.experts.131.w1", "model.layers.47.block_sparse_moe.experts.132.w1", "model.layers.47.block_sparse_moe.experts.133.w1", "model.layers.47.block_sparse_moe.experts.134.w1", "model.layers.47.block_sparse_moe.experts.135.w1", "model.layers.47.block_sparse_moe.experts.136.w1", "model.layers.47.block_sparse_moe.experts.137.w1", "model.layers.47.block_sparse_moe.experts.138.w1", "model.layers.47.block_sparse_moe.experts.139.w1", "model.layers.47.block_sparse_moe.experts.140.w1", "model.layers.47.block_sparse_moe.experts.141.w1", "model.layers.47.block_sparse_moe.experts.142.w1", "model.layers.47.block_sparse_moe.experts.143.w1", "model.layers.47.block_sparse_moe.experts.144.w1", "model.layers.47.block_sparse_moe.experts.145.w1", "model.layers.47.block_sparse_moe.experts.146.w1", "model.layers.47.block_sparse_moe.experts.147.w1", "model.layers.47.block_sparse_moe.experts.148.w1", "model.layers.47.block_sparse_moe.experts.149.w1", "model.layers.47.block_sparse_moe.experts.150.w1", "model.layers.47.block_sparse_moe.experts.151.w1", "model.layers.47.block_sparse_moe.experts.152.w1", "model.layers.47.block_sparse_moe.experts.153.w1", "model.layers.47.block_sparse_moe.experts.154.w1", "model.layers.47.block_sparse_moe.experts.155.w1", "model.layers.47.block_sparse_moe.experts.156.w1", "model.layers.47.block_sparse_moe.experts.157.w1", "model.layers.47.block_sparse_moe.experts.158.w1", "model.layers.47.block_sparse_moe.experts.159.w1", "model.layers.47.block_sparse_moe.experts.160.w1", "model.layers.47.block_sparse_moe.experts.161.w1", "model.layers.47.block_sparse_moe.experts.162.w1", "model.layers.47.block_sparse_moe.experts.163.w1", "model.layers.47.block_sparse_moe.experts.164.w1", "model.layers.47.block_sparse_moe.experts.165.w1", "model.layers.47.block_sparse_moe.experts.166.w1", "model.layers.47.block_sparse_moe.experts.167.w1", "model.layers.47.block_sparse_moe.experts.168.w1", "model.layers.47.block_sparse_moe.experts.169.w1", "model.layers.47.block_sparse_moe.experts.170.w1", "model.layers.47.block_sparse_moe.experts.171.w1", "model.layers.47.block_sparse_moe.experts.172.w1", "model.layers.47.block_sparse_moe.experts.173.w1", "model.layers.47.block_sparse_moe.experts.174.w1", "model.layers.47.block_sparse_moe.experts.175.w1", "model.layers.47.block_sparse_moe.experts.176.w1", "model.layers.47.block_sparse_moe.experts.177.w1", "model.layers.47.block_sparse_moe.experts.178.w1", "model.layers.47.block_sparse_moe.experts.179.w1", "model.layers.47.block_sparse_moe.experts.180.w1", "model.layers.47.block_sparse_moe.experts.181.w1", "model.layers.47.block_sparse_moe.experts.182.w1", "model.layers.47.block_sparse_moe.experts.183.w1", "model.layers.47.block_sparse_moe.experts.184.w1", "model.layers.47.block_sparse_moe.experts.185.w1", "model.layers.47.block_sparse_moe.experts.186.w1", "model.layers.47.block_sparse_moe.experts.187.w1", "model.layers.47.block_sparse_moe.experts.188.w1", "model.layers.47.block_sparse_moe.experts.189.w1", "model.layers.47.block_sparse_moe.experts.190.w1", "model.layers.47.block_sparse_moe.experts.191.w1", "model.layers.47.block_sparse_moe.experts.192.w1", "model.layers.47.block_sparse_moe.experts.193.w1", "model.layers.47.block_sparse_moe.experts.194.w1", "model.layers.47.block_sparse_moe.experts.195.w1", "model.layers.47.block_sparse_moe.experts.196.w1", "model.layers.47.block_sparse_moe.experts.197.w1", "model.layers.47.block_sparse_moe.experts.198.w1", "model.layers.47.block_sparse_moe.experts.199.w1", "model.layers.47.block_sparse_moe.experts.200.w1", "model.layers.47.block_sparse_moe.experts.201.w1", "model.layers.47.block_sparse_moe.experts.202.w1", "model.layers.47.block_sparse_moe.experts.203.w1", "model.layers.47.block_sparse_moe.experts.204.w1", "model.layers.47.block_sparse_moe.experts.205.w1", "model.layers.47.block_sparse_moe.experts.206.w1", "model.layers.47.block_sparse_moe.experts.207.w1", "model.layers.47.block_sparse_moe.experts.208.w1", "model.layers.47.block_sparse_moe.experts.209.w1", "model.layers.47.block_sparse_moe.experts.210.w1", "model.layers.47.block_sparse_moe.experts.211.w1", "model.layers.47.block_sparse_moe.experts.212.w1", "model.layers.47.block_sparse_moe.experts.213.w1", "model.layers.47.block_sparse_moe.experts.214.w1", "model.layers.47.block_sparse_moe.experts.215.w1", "model.layers.47.block_sparse_moe.experts.216.w1", "model.layers.47.block_sparse_moe.experts.217.w1", "model.layers.47.block_sparse_moe.experts.218.w1", "model.layers.47.block_sparse_moe.experts.219.w1", "model.layers.47.block_sparse_moe.experts.220.w1", "model.layers.47.block_sparse_moe.experts.221.w1", "model.layers.47.block_sparse_moe.experts.222.w1", "model.layers.47.block_sparse_moe.experts.223.w1", "model.layers.47.block_sparse_moe.experts.224.w1", "model.layers.47.block_sparse_moe.experts.225.w1", "model.layers.47.block_sparse_moe.experts.226.w1", "model.layers.47.block_sparse_moe.experts.227.w1", "model.layers.47.block_sparse_moe.experts.228.w1", "model.layers.47.block_sparse_moe.experts.229.w1", "model.layers.47.block_sparse_moe.experts.230.w1", "model.layers.47.block_sparse_moe.experts.231.w1", "model.layers.47.block_sparse_moe.experts.232.w1", "model.layers.47.block_sparse_moe.experts.233.w1", "model.layers.47.block_sparse_moe.experts.234.w1", "model.layers.47.block_sparse_moe.experts.235.w1", "model.layers.47.block_sparse_moe.experts.236.w1", "model.layers.47.block_sparse_moe.experts.237.w1", "model.layers.47.block_sparse_moe.experts.238.w1", "model.layers.47.block_sparse_moe.experts.239.w1", "model.layers.47.block_sparse_moe.experts.240.w1", "model.layers.47.block_sparse_moe.experts.241.w1", "model.layers.47.block_sparse_moe.experts.242.w1", "model.layers.47.block_sparse_moe.experts.243.w1", "model.layers.47.block_sparse_moe.experts.244.w1", "model.layers.47.block_sparse_moe.experts.245.w1", "model.layers.47.block_sparse_moe.experts.246.w1", "model.layers.47.block_sparse_moe.experts.247.w1", "model.layers.47.block_sparse_moe.experts.248.w1", "model.layers.47.block_sparse_moe.experts.249.w1", "model.layers.47.block_sparse_moe.experts.250.w1", "model.layers.47.block_sparse_moe.experts.251.w1", "model.layers.47.block_sparse_moe.experts.252.w1", "model.layers.47.block_sparse_moe.experts.253.w1", "model.layers.47.block_sparse_moe.experts.254.w1", "model.layers.47.block_sparse_moe.experts.255.w1", "model.layers.47.block_sparse_moe.experts.0.w3", "model.layers.47.block_sparse_moe.experts.1.w3", "model.layers.47.block_sparse_moe.experts.2.w3", "model.layers.47.block_sparse_moe.experts.3.w3", "model.layers.47.block_sparse_moe.experts.4.w3", "model.layers.47.block_sparse_moe.experts.5.w3", "model.layers.47.block_sparse_moe.experts.6.w3", "model.layers.47.block_sparse_moe.experts.7.w3", "model.layers.47.block_sparse_moe.experts.8.w3", "model.layers.47.block_sparse_moe.experts.9.w3", "model.layers.47.block_sparse_moe.experts.10.w3", "model.layers.47.block_sparse_moe.experts.11.w3", "model.layers.47.block_sparse_moe.experts.12.w3", "model.layers.47.block_sparse_moe.experts.13.w3", "model.layers.47.block_sparse_moe.experts.14.w3", "model.layers.47.block_sparse_moe.experts.15.w3", "model.layers.47.block_sparse_moe.experts.16.w3", "model.layers.47.block_sparse_moe.experts.17.w3", "model.layers.47.block_sparse_moe.experts.18.w3", "model.layers.47.block_sparse_moe.experts.19.w3", "model.layers.47.block_sparse_moe.experts.20.w3", "model.layers.47.block_sparse_moe.experts.21.w3", "model.layers.47.block_sparse_moe.experts.22.w3", "model.layers.47.block_sparse_moe.experts.23.w3", "model.layers.47.block_sparse_moe.experts.24.w3", "model.layers.47.block_sparse_moe.experts.25.w3", "model.layers.47.block_sparse_moe.experts.26.w3", "model.layers.47.block_sparse_moe.experts.27.w3", "model.layers.47.block_sparse_moe.experts.28.w3", "model.layers.47.block_sparse_moe.experts.29.w3", "model.layers.47.block_sparse_moe.experts.30.w3", "model.layers.47.block_sparse_moe.experts.31.w3", "model.layers.47.block_sparse_moe.experts.32.w3", "model.layers.47.block_sparse_moe.experts.33.w3", "model.layers.47.block_sparse_moe.experts.34.w3", "model.layers.47.block_sparse_moe.experts.35.w3", "model.layers.47.block_sparse_moe.experts.36.w3", "model.layers.47.block_sparse_moe.experts.37.w3", "model.layers.47.block_sparse_moe.experts.38.w3", "model.layers.47.block_sparse_moe.experts.39.w3", "model.layers.47.block_sparse_moe.experts.40.w3", "model.layers.47.block_sparse_moe.experts.41.w3", "model.layers.47.block_sparse_moe.experts.42.w3", "model.layers.47.block_sparse_moe.experts.43.w3", "model.layers.47.block_sparse_moe.experts.44.w3", "model.layers.47.block_sparse_moe.experts.45.w3", "model.layers.47.block_sparse_moe.experts.46.w3", "model.layers.47.block_sparse_moe.experts.47.w3", "model.layers.47.block_sparse_moe.experts.48.w3", "model.layers.47.block_sparse_moe.experts.49.w3", "model.layers.47.block_sparse_moe.experts.50.w3", "model.layers.47.block_sparse_moe.experts.51.w3", "model.layers.47.block_sparse_moe.experts.52.w3", "model.layers.47.block_sparse_moe.experts.53.w3", "model.layers.47.block_sparse_moe.experts.54.w3", "model.layers.47.block_sparse_moe.experts.55.w3", "model.layers.47.block_sparse_moe.experts.56.w3", "model.layers.47.block_sparse_moe.experts.57.w3", "model.layers.47.block_sparse_moe.experts.58.w3", "model.layers.47.block_sparse_moe.experts.59.w3", "model.layers.47.block_sparse_moe.experts.60.w3", "model.layers.47.block_sparse_moe.experts.61.w3", "model.layers.47.block_sparse_moe.experts.62.w3", "model.layers.47.block_sparse_moe.experts.63.w3", "model.layers.47.block_sparse_moe.experts.64.w3", "model.layers.47.block_sparse_moe.experts.65.w3", "model.layers.47.block_sparse_moe.experts.66.w3", "model.layers.47.block_sparse_moe.experts.67.w3", "model.layers.47.block_sparse_moe.experts.68.w3", "model.layers.47.block_sparse_moe.experts.69.w3", "model.layers.47.block_sparse_moe.experts.70.w3", "model.layers.47.block_sparse_moe.experts.71.w3", "model.layers.47.block_sparse_moe.experts.72.w3", "model.layers.47.block_sparse_moe.experts.73.w3", "model.layers.47.block_sparse_moe.experts.74.w3", "model.layers.47.block_sparse_moe.experts.75.w3", "model.layers.47.block_sparse_moe.experts.76.w3", "model.layers.47.block_sparse_moe.experts.77.w3", "model.layers.47.block_sparse_moe.experts.78.w3", "model.layers.47.block_sparse_moe.experts.79.w3", "model.layers.47.block_sparse_moe.experts.80.w3", "model.layers.47.block_sparse_moe.experts.81.w3", "model.layers.47.block_sparse_moe.experts.82.w3", "model.layers.47.block_sparse_moe.experts.83.w3", "model.layers.47.block_sparse_moe.experts.84.w3", "model.layers.47.block_sparse_moe.experts.85.w3", "model.layers.47.block_sparse_moe.experts.86.w3", "model.layers.47.block_sparse_moe.experts.87.w3", "model.layers.47.block_sparse_moe.experts.88.w3", "model.layers.47.block_sparse_moe.experts.89.w3", "model.layers.47.block_sparse_moe.experts.90.w3", "model.layers.47.block_sparse_moe.experts.91.w3", "model.layers.47.block_sparse_moe.experts.92.w3", "model.layers.47.block_sparse_moe.experts.93.w3", "model.layers.47.block_sparse_moe.experts.94.w3", "model.layers.47.block_sparse_moe.experts.95.w3", "model.layers.47.block_sparse_moe.experts.96.w3", "model.layers.47.block_sparse_moe.experts.97.w3", "model.layers.47.block_sparse_moe.experts.98.w3", "model.layers.47.block_sparse_moe.experts.99.w3", "model.layers.47.block_sparse_moe.experts.100.w3", "model.layers.47.block_sparse_moe.experts.101.w3", "model.layers.47.block_sparse_moe.experts.102.w3", "model.layers.47.block_sparse_moe.experts.103.w3", "model.layers.47.block_sparse_moe.experts.104.w3", "model.layers.47.block_sparse_moe.experts.105.w3", "model.layers.47.block_sparse_moe.experts.106.w3", "model.layers.47.block_sparse_moe.experts.107.w3", "model.layers.47.block_sparse_moe.experts.108.w3", "model.layers.47.block_sparse_moe.experts.109.w3", "model.layers.47.block_sparse_moe.experts.110.w3", "model.layers.47.block_sparse_moe.experts.111.w3", "model.layers.47.block_sparse_moe.experts.112.w3", "model.layers.47.block_sparse_moe.experts.113.w3", "model.layers.47.block_sparse_moe.experts.114.w3", "model.layers.47.block_sparse_moe.experts.115.w3", "model.layers.47.block_sparse_moe.experts.116.w3", "model.layers.47.block_sparse_moe.experts.117.w3", "model.layers.47.block_sparse_moe.experts.118.w3", "model.layers.47.block_sparse_moe.experts.119.w3", "model.layers.47.block_sparse_moe.experts.120.w3", "model.layers.47.block_sparse_moe.experts.121.w3", "model.layers.47.block_sparse_moe.experts.122.w3", "model.layers.47.block_sparse_moe.experts.123.w3", "model.layers.47.block_sparse_moe.experts.124.w3", "model.layers.47.block_sparse_moe.experts.125.w3", "model.layers.47.block_sparse_moe.experts.126.w3", "model.layers.47.block_sparse_moe.experts.127.w3", "model.layers.47.block_sparse_moe.experts.128.w3", "model.layers.47.block_sparse_moe.experts.129.w3", "model.layers.47.block_sparse_moe.experts.130.w3", "model.layers.47.block_sparse_moe.experts.131.w3", "model.layers.47.block_sparse_moe.experts.132.w3", "model.layers.47.block_sparse_moe.experts.133.w3", "model.layers.47.block_sparse_moe.experts.134.w3", "model.layers.47.block_sparse_moe.experts.135.w3", "model.layers.47.block_sparse_moe.experts.136.w3", "model.layers.47.block_sparse_moe.experts.137.w3", "model.layers.47.block_sparse_moe.experts.138.w3", "model.layers.47.block_sparse_moe.experts.139.w3", "model.layers.47.block_sparse_moe.experts.140.w3", "model.layers.47.block_sparse_moe.experts.141.w3", "model.layers.47.block_sparse_moe.experts.142.w3", "model.layers.47.block_sparse_moe.experts.143.w3", "model.layers.47.block_sparse_moe.experts.144.w3", "model.layers.47.block_sparse_moe.experts.145.w3", "model.layers.47.block_sparse_moe.experts.146.w3", "model.layers.47.block_sparse_moe.experts.147.w3", "model.layers.47.block_sparse_moe.experts.148.w3", "model.layers.47.block_sparse_moe.experts.149.w3", "model.layers.47.block_sparse_moe.experts.150.w3", "model.layers.47.block_sparse_moe.experts.151.w3", "model.layers.47.block_sparse_moe.experts.152.w3", "model.layers.47.block_sparse_moe.experts.153.w3", "model.layers.47.block_sparse_moe.experts.154.w3", "model.layers.47.block_sparse_moe.experts.155.w3", "model.layers.47.block_sparse_moe.experts.156.w3", "model.layers.47.block_sparse_moe.experts.157.w3", "model.layers.47.block_sparse_moe.experts.158.w3", "model.layers.47.block_sparse_moe.experts.159.w3", "model.layers.47.block_sparse_moe.experts.160.w3", "model.layers.47.block_sparse_moe.experts.161.w3", "model.layers.47.block_sparse_moe.experts.162.w3", "model.layers.47.block_sparse_moe.experts.163.w3", "model.layers.47.block_sparse_moe.experts.164.w3", "model.layers.47.block_sparse_moe.experts.165.w3", "model.layers.47.block_sparse_moe.experts.166.w3", "model.layers.47.block_sparse_moe.experts.167.w3", "model.layers.47.block_sparse_moe.experts.168.w3", "model.layers.47.block_sparse_moe.experts.169.w3", "model.layers.47.block_sparse_moe.experts.170.w3", "model.layers.47.block_sparse_moe.experts.171.w3", "model.layers.47.block_sparse_moe.experts.172.w3", "model.layers.47.block_sparse_moe.experts.173.w3", "model.layers.47.block_sparse_moe.experts.174.w3", "model.layers.47.block_sparse_moe.experts.175.w3", "model.layers.47.block_sparse_moe.experts.176.w3", "model.layers.47.block_sparse_moe.experts.177.w3", "model.layers.47.block_sparse_moe.experts.178.w3", "model.layers.47.block_sparse_moe.experts.179.w3", "model.layers.47.block_sparse_moe.experts.180.w3", "model.layers.47.block_sparse_moe.experts.181.w3", "model.layers.47.block_sparse_moe.experts.182.w3", "model.layers.47.block_sparse_moe.experts.183.w3", "model.layers.47.block_sparse_moe.experts.184.w3", "model.layers.47.block_sparse_moe.experts.185.w3", "model.layers.47.block_sparse_moe.experts.186.w3", "model.layers.47.block_sparse_moe.experts.187.w3", "model.layers.47.block_sparse_moe.experts.188.w3", "model.layers.47.block_sparse_moe.experts.189.w3", "model.layers.47.block_sparse_moe.experts.190.w3", "model.layers.47.block_sparse_moe.experts.191.w3", "model.layers.47.block_sparse_moe.experts.192.w3", "model.layers.47.block_sparse_moe.experts.193.w3", "model.layers.47.block_sparse_moe.experts.194.w3", "model.layers.47.block_sparse_moe.experts.195.w3", "model.layers.47.block_sparse_moe.experts.196.w3", "model.layers.47.block_sparse_moe.experts.197.w3", "model.layers.47.block_sparse_moe.experts.198.w3", "model.layers.47.block_sparse_moe.experts.199.w3", "model.layers.47.block_sparse_moe.experts.200.w3", "model.layers.47.block_sparse_moe.experts.201.w3", "model.layers.47.block_sparse_moe.experts.202.w3", "model.layers.47.block_sparse_moe.experts.203.w3", "model.layers.47.block_sparse_moe.experts.204.w3", "model.layers.47.block_sparse_moe.experts.205.w3", "model.layers.47.block_sparse_moe.experts.206.w3", "model.layers.47.block_sparse_moe.experts.207.w3", "model.layers.47.block_sparse_moe.experts.208.w3", "model.layers.47.block_sparse_moe.experts.209.w3", "model.layers.47.block_sparse_moe.experts.210.w3", "model.layers.47.block_sparse_moe.experts.211.w3", "model.layers.47.block_sparse_moe.experts.212.w3", "model.layers.47.block_sparse_moe.experts.213.w3", "model.layers.47.block_sparse_moe.experts.214.w3", "model.layers.47.block_sparse_moe.experts.215.w3", "model.layers.47.block_sparse_moe.experts.216.w3", "model.layers.47.block_sparse_moe.experts.217.w3", "model.layers.47.block_sparse_moe.experts.218.w3", "model.layers.47.block_sparse_moe.experts.219.w3", "model.layers.47.block_sparse_moe.experts.220.w3", "model.layers.47.block_sparse_moe.experts.221.w3", "model.layers.47.block_sparse_moe.experts.222.w3", "model.layers.47.block_sparse_moe.experts.223.w3", "model.layers.47.block_sparse_moe.experts.224.w3", "model.layers.47.block_sparse_moe.experts.225.w3", "model.layers.47.block_sparse_moe.experts.226.w3", "model.layers.47.block_sparse_moe.experts.227.w3", "model.layers.47.block_sparse_moe.experts.228.w3", "model.layers.47.block_sparse_moe.experts.229.w3", "model.layers.47.block_sparse_moe.experts.230.w3", "model.layers.47.block_sparse_moe.experts.231.w3", "model.layers.47.block_sparse_moe.experts.232.w3", "model.layers.47.block_sparse_moe.experts.233.w3", "model.layers.47.block_sparse_moe.experts.234.w3", "model.layers.47.block_sparse_moe.experts.235.w3", "model.layers.47.block_sparse_moe.experts.236.w3", "model.layers.47.block_sparse_moe.experts.237.w3", "model.layers.47.block_sparse_moe.experts.238.w3", "model.layers.47.block_sparse_moe.experts.239.w3", "model.layers.47.block_sparse_moe.experts.240.w3", "model.layers.47.block_sparse_moe.experts.241.w3", "model.layers.47.block_sparse_moe.experts.242.w3", "model.layers.47.block_sparse_moe.experts.243.w3", "model.layers.47.block_sparse_moe.experts.244.w3", "model.layers.47.block_sparse_moe.experts.245.w3", "model.layers.47.block_sparse_moe.experts.246.w3", "model.layers.47.block_sparse_moe.experts.247.w3", "model.layers.47.block_sparse_moe.experts.248.w3", "model.layers.47.block_sparse_moe.experts.249.w3", "model.layers.47.block_sparse_moe.experts.250.w3", "model.layers.47.block_sparse_moe.experts.251.w3", "model.layers.47.block_sparse_moe.experts.252.w3", "model.layers.47.block_sparse_moe.experts.253.w3", "model.layers.47.block_sparse_moe.experts.254.w3", "model.layers.47.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00023653022944930546, "dbits": 2415919104 } ] }, { "idx": 239, "layers": [ "model.layers.47.block_sparse_moe.experts.0.w2", "model.layers.47.block_sparse_moe.experts.1.w2", "model.layers.47.block_sparse_moe.experts.2.w2", "model.layers.47.block_sparse_moe.experts.3.w2", "model.layers.47.block_sparse_moe.experts.4.w2", "model.layers.47.block_sparse_moe.experts.5.w2", "model.layers.47.block_sparse_moe.experts.6.w2", "model.layers.47.block_sparse_moe.experts.7.w2", "model.layers.47.block_sparse_moe.experts.8.w2", "model.layers.47.block_sparse_moe.experts.9.w2", "model.layers.47.block_sparse_moe.experts.10.w2", "model.layers.47.block_sparse_moe.experts.11.w2", "model.layers.47.block_sparse_moe.experts.12.w2", "model.layers.47.block_sparse_moe.experts.13.w2", "model.layers.47.block_sparse_moe.experts.14.w2", "model.layers.47.block_sparse_moe.experts.15.w2", "model.layers.47.block_sparse_moe.experts.16.w2", "model.layers.47.block_sparse_moe.experts.17.w2", "model.layers.47.block_sparse_moe.experts.18.w2", "model.layers.47.block_sparse_moe.experts.19.w2", "model.layers.47.block_sparse_moe.experts.20.w2", "model.layers.47.block_sparse_moe.experts.21.w2", "model.layers.47.block_sparse_moe.experts.22.w2", "model.layers.47.block_sparse_moe.experts.23.w2", "model.layers.47.block_sparse_moe.experts.24.w2", "model.layers.47.block_sparse_moe.experts.25.w2", "model.layers.47.block_sparse_moe.experts.26.w2", "model.layers.47.block_sparse_moe.experts.27.w2", "model.layers.47.block_sparse_moe.experts.28.w2", "model.layers.47.block_sparse_moe.experts.29.w2", "model.layers.47.block_sparse_moe.experts.30.w2", "model.layers.47.block_sparse_moe.experts.31.w2", "model.layers.47.block_sparse_moe.experts.32.w2", "model.layers.47.block_sparse_moe.experts.33.w2", "model.layers.47.block_sparse_moe.experts.34.w2", "model.layers.47.block_sparse_moe.experts.35.w2", "model.layers.47.block_sparse_moe.experts.36.w2", "model.layers.47.block_sparse_moe.experts.37.w2", "model.layers.47.block_sparse_moe.experts.38.w2", "model.layers.47.block_sparse_moe.experts.39.w2", "model.layers.47.block_sparse_moe.experts.40.w2", "model.layers.47.block_sparse_moe.experts.41.w2", "model.layers.47.block_sparse_moe.experts.42.w2", "model.layers.47.block_sparse_moe.experts.43.w2", "model.layers.47.block_sparse_moe.experts.44.w2", "model.layers.47.block_sparse_moe.experts.45.w2", "model.layers.47.block_sparse_moe.experts.46.w2", "model.layers.47.block_sparse_moe.experts.47.w2", "model.layers.47.block_sparse_moe.experts.48.w2", "model.layers.47.block_sparse_moe.experts.49.w2", "model.layers.47.block_sparse_moe.experts.50.w2", "model.layers.47.block_sparse_moe.experts.51.w2", "model.layers.47.block_sparse_moe.experts.52.w2", "model.layers.47.block_sparse_moe.experts.53.w2", "model.layers.47.block_sparse_moe.experts.54.w2", "model.layers.47.block_sparse_moe.experts.55.w2", "model.layers.47.block_sparse_moe.experts.56.w2", "model.layers.47.block_sparse_moe.experts.57.w2", "model.layers.47.block_sparse_moe.experts.58.w2", "model.layers.47.block_sparse_moe.experts.59.w2", "model.layers.47.block_sparse_moe.experts.60.w2", "model.layers.47.block_sparse_moe.experts.61.w2", "model.layers.47.block_sparse_moe.experts.62.w2", "model.layers.47.block_sparse_moe.experts.63.w2", "model.layers.47.block_sparse_moe.experts.64.w2", "model.layers.47.block_sparse_moe.experts.65.w2", "model.layers.47.block_sparse_moe.experts.66.w2", "model.layers.47.block_sparse_moe.experts.67.w2", "model.layers.47.block_sparse_moe.experts.68.w2", "model.layers.47.block_sparse_moe.experts.69.w2", "model.layers.47.block_sparse_moe.experts.70.w2", "model.layers.47.block_sparse_moe.experts.71.w2", "model.layers.47.block_sparse_moe.experts.72.w2", "model.layers.47.block_sparse_moe.experts.73.w2", "model.layers.47.block_sparse_moe.experts.74.w2", "model.layers.47.block_sparse_moe.experts.75.w2", "model.layers.47.block_sparse_moe.experts.76.w2", "model.layers.47.block_sparse_moe.experts.77.w2", "model.layers.47.block_sparse_moe.experts.78.w2", "model.layers.47.block_sparse_moe.experts.79.w2", "model.layers.47.block_sparse_moe.experts.80.w2", "model.layers.47.block_sparse_moe.experts.81.w2", "model.layers.47.block_sparse_moe.experts.82.w2", "model.layers.47.block_sparse_moe.experts.83.w2", "model.layers.47.block_sparse_moe.experts.84.w2", "model.layers.47.block_sparse_moe.experts.85.w2", "model.layers.47.block_sparse_moe.experts.86.w2", "model.layers.47.block_sparse_moe.experts.87.w2", "model.layers.47.block_sparse_moe.experts.88.w2", "model.layers.47.block_sparse_moe.experts.89.w2", "model.layers.47.block_sparse_moe.experts.90.w2", "model.layers.47.block_sparse_moe.experts.91.w2", "model.layers.47.block_sparse_moe.experts.92.w2", "model.layers.47.block_sparse_moe.experts.93.w2", "model.layers.47.block_sparse_moe.experts.94.w2", "model.layers.47.block_sparse_moe.experts.95.w2", "model.layers.47.block_sparse_moe.experts.96.w2", "model.layers.47.block_sparse_moe.experts.97.w2", "model.layers.47.block_sparse_moe.experts.98.w2", "model.layers.47.block_sparse_moe.experts.99.w2", "model.layers.47.block_sparse_moe.experts.100.w2", "model.layers.47.block_sparse_moe.experts.101.w2", "model.layers.47.block_sparse_moe.experts.102.w2", "model.layers.47.block_sparse_moe.experts.103.w2", "model.layers.47.block_sparse_moe.experts.104.w2", "model.layers.47.block_sparse_moe.experts.105.w2", "model.layers.47.block_sparse_moe.experts.106.w2", "model.layers.47.block_sparse_moe.experts.107.w2", "model.layers.47.block_sparse_moe.experts.108.w2", "model.layers.47.block_sparse_moe.experts.109.w2", "model.layers.47.block_sparse_moe.experts.110.w2", "model.layers.47.block_sparse_moe.experts.111.w2", "model.layers.47.block_sparse_moe.experts.112.w2", "model.layers.47.block_sparse_moe.experts.113.w2", "model.layers.47.block_sparse_moe.experts.114.w2", "model.layers.47.block_sparse_moe.experts.115.w2", "model.layers.47.block_sparse_moe.experts.116.w2", "model.layers.47.block_sparse_moe.experts.117.w2", "model.layers.47.block_sparse_moe.experts.118.w2", "model.layers.47.block_sparse_moe.experts.119.w2", "model.layers.47.block_sparse_moe.experts.120.w2", "model.layers.47.block_sparse_moe.experts.121.w2", "model.layers.47.block_sparse_moe.experts.122.w2", "model.layers.47.block_sparse_moe.experts.123.w2", "model.layers.47.block_sparse_moe.experts.124.w2", "model.layers.47.block_sparse_moe.experts.125.w2", "model.layers.47.block_sparse_moe.experts.126.w2", "model.layers.47.block_sparse_moe.experts.127.w2", "model.layers.47.block_sparse_moe.experts.128.w2", "model.layers.47.block_sparse_moe.experts.129.w2", "model.layers.47.block_sparse_moe.experts.130.w2", "model.layers.47.block_sparse_moe.experts.131.w2", "model.layers.47.block_sparse_moe.experts.132.w2", "model.layers.47.block_sparse_moe.experts.133.w2", "model.layers.47.block_sparse_moe.experts.134.w2", "model.layers.47.block_sparse_moe.experts.135.w2", "model.layers.47.block_sparse_moe.experts.136.w2", "model.layers.47.block_sparse_moe.experts.137.w2", "model.layers.47.block_sparse_moe.experts.138.w2", "model.layers.47.block_sparse_moe.experts.139.w2", "model.layers.47.block_sparse_moe.experts.140.w2", "model.layers.47.block_sparse_moe.experts.141.w2", "model.layers.47.block_sparse_moe.experts.142.w2", "model.layers.47.block_sparse_moe.experts.143.w2", "model.layers.47.block_sparse_moe.experts.144.w2", "model.layers.47.block_sparse_moe.experts.145.w2", "model.layers.47.block_sparse_moe.experts.146.w2", "model.layers.47.block_sparse_moe.experts.147.w2", "model.layers.47.block_sparse_moe.experts.148.w2", "model.layers.47.block_sparse_moe.experts.149.w2", "model.layers.47.block_sparse_moe.experts.150.w2", "model.layers.47.block_sparse_moe.experts.151.w2", "model.layers.47.block_sparse_moe.experts.152.w2", "model.layers.47.block_sparse_moe.experts.153.w2", "model.layers.47.block_sparse_moe.experts.154.w2", "model.layers.47.block_sparse_moe.experts.155.w2", "model.layers.47.block_sparse_moe.experts.156.w2", "model.layers.47.block_sparse_moe.experts.157.w2", "model.layers.47.block_sparse_moe.experts.158.w2", "model.layers.47.block_sparse_moe.experts.159.w2", "model.layers.47.block_sparse_moe.experts.160.w2", "model.layers.47.block_sparse_moe.experts.161.w2", "model.layers.47.block_sparse_moe.experts.162.w2", "model.layers.47.block_sparse_moe.experts.163.w2", "model.layers.47.block_sparse_moe.experts.164.w2", "model.layers.47.block_sparse_moe.experts.165.w2", "model.layers.47.block_sparse_moe.experts.166.w2", "model.layers.47.block_sparse_moe.experts.167.w2", "model.layers.47.block_sparse_moe.experts.168.w2", "model.layers.47.block_sparse_moe.experts.169.w2", "model.layers.47.block_sparse_moe.experts.170.w2", "model.layers.47.block_sparse_moe.experts.171.w2", "model.layers.47.block_sparse_moe.experts.172.w2", "model.layers.47.block_sparse_moe.experts.173.w2", "model.layers.47.block_sparse_moe.experts.174.w2", "model.layers.47.block_sparse_moe.experts.175.w2", "model.layers.47.block_sparse_moe.experts.176.w2", "model.layers.47.block_sparse_moe.experts.177.w2", "model.layers.47.block_sparse_moe.experts.178.w2", "model.layers.47.block_sparse_moe.experts.179.w2", "model.layers.47.block_sparse_moe.experts.180.w2", "model.layers.47.block_sparse_moe.experts.181.w2", "model.layers.47.block_sparse_moe.experts.182.w2", "model.layers.47.block_sparse_moe.experts.183.w2", "model.layers.47.block_sparse_moe.experts.184.w2", "model.layers.47.block_sparse_moe.experts.185.w2", "model.layers.47.block_sparse_moe.experts.186.w2", "model.layers.47.block_sparse_moe.experts.187.w2", "model.layers.47.block_sparse_moe.experts.188.w2", "model.layers.47.block_sparse_moe.experts.189.w2", "model.layers.47.block_sparse_moe.experts.190.w2", "model.layers.47.block_sparse_moe.experts.191.w2", "model.layers.47.block_sparse_moe.experts.192.w2", "model.layers.47.block_sparse_moe.experts.193.w2", "model.layers.47.block_sparse_moe.experts.194.w2", "model.layers.47.block_sparse_moe.experts.195.w2", "model.layers.47.block_sparse_moe.experts.196.w2", "model.layers.47.block_sparse_moe.experts.197.w2", "model.layers.47.block_sparse_moe.experts.198.w2", "model.layers.47.block_sparse_moe.experts.199.w2", "model.layers.47.block_sparse_moe.experts.200.w2", "model.layers.47.block_sparse_moe.experts.201.w2", "model.layers.47.block_sparse_moe.experts.202.w2", "model.layers.47.block_sparse_moe.experts.203.w2", "model.layers.47.block_sparse_moe.experts.204.w2", "model.layers.47.block_sparse_moe.experts.205.w2", "model.layers.47.block_sparse_moe.experts.206.w2", "model.layers.47.block_sparse_moe.experts.207.w2", "model.layers.47.block_sparse_moe.experts.208.w2", "model.layers.47.block_sparse_moe.experts.209.w2", "model.layers.47.block_sparse_moe.experts.210.w2", "model.layers.47.block_sparse_moe.experts.211.w2", "model.layers.47.block_sparse_moe.experts.212.w2", "model.layers.47.block_sparse_moe.experts.213.w2", "model.layers.47.block_sparse_moe.experts.214.w2", "model.layers.47.block_sparse_moe.experts.215.w2", "model.layers.47.block_sparse_moe.experts.216.w2", "model.layers.47.block_sparse_moe.experts.217.w2", "model.layers.47.block_sparse_moe.experts.218.w2", "model.layers.47.block_sparse_moe.experts.219.w2", "model.layers.47.block_sparse_moe.experts.220.w2", "model.layers.47.block_sparse_moe.experts.221.w2", "model.layers.47.block_sparse_moe.experts.222.w2", "model.layers.47.block_sparse_moe.experts.223.w2", "model.layers.47.block_sparse_moe.experts.224.w2", "model.layers.47.block_sparse_moe.experts.225.w2", "model.layers.47.block_sparse_moe.experts.226.w2", "model.layers.47.block_sparse_moe.experts.227.w2", "model.layers.47.block_sparse_moe.experts.228.w2", "model.layers.47.block_sparse_moe.experts.229.w2", "model.layers.47.block_sparse_moe.experts.230.w2", "model.layers.47.block_sparse_moe.experts.231.w2", "model.layers.47.block_sparse_moe.experts.232.w2", "model.layers.47.block_sparse_moe.experts.233.w2", "model.layers.47.block_sparse_moe.experts.234.w2", "model.layers.47.block_sparse_moe.experts.235.w2", "model.layers.47.block_sparse_moe.experts.236.w2", "model.layers.47.block_sparse_moe.experts.237.w2", "model.layers.47.block_sparse_moe.experts.238.w2", "model.layers.47.block_sparse_moe.experts.239.w2", "model.layers.47.block_sparse_moe.experts.240.w2", "model.layers.47.block_sparse_moe.experts.241.w2", "model.layers.47.block_sparse_moe.experts.242.w2", "model.layers.47.block_sparse_moe.experts.243.w2", "model.layers.47.block_sparse_moe.experts.244.w2", "model.layers.47.block_sparse_moe.experts.245.w2", "model.layers.47.block_sparse_moe.experts.246.w2", "model.layers.47.block_sparse_moe.experts.247.w2", "model.layers.47.block_sparse_moe.experts.248.w2", "model.layers.47.block_sparse_moe.experts.249.w2", "model.layers.47.block_sparse_moe.experts.250.w2", "model.layers.47.block_sparse_moe.experts.251.w2", "model.layers.47.block_sparse_moe.experts.252.w2", "model.layers.47.block_sparse_moe.experts.253.w2", "model.layers.47.block_sparse_moe.experts.254.w2", "model.layers.47.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.0001275964081287051, "dbits": 1207959552 } ] }, { "idx": 240, "layers": [ "model.layers.48.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0001716710627079343, "dbits": 18874368 } ] }, { "idx": 241, "layers": [ "model.layers.48.self_attn.k_proj", "model.layers.48.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0022698674350977166, "dbits": 6291456 } ] }, { "idx": 242, "layers": [ "model.layers.48.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0013517342507839425, "dbits": 18874368 } ] }, { "idx": 243, "layers": [ "model.layers.48.block_sparse_moe.experts.0.w1", "model.layers.48.block_sparse_moe.experts.1.w1", "model.layers.48.block_sparse_moe.experts.2.w1", "model.layers.48.block_sparse_moe.experts.3.w1", "model.layers.48.block_sparse_moe.experts.4.w1", "model.layers.48.block_sparse_moe.experts.5.w1", "model.layers.48.block_sparse_moe.experts.6.w1", "model.layers.48.block_sparse_moe.experts.7.w1", "model.layers.48.block_sparse_moe.experts.8.w1", "model.layers.48.block_sparse_moe.experts.9.w1", "model.layers.48.block_sparse_moe.experts.10.w1", "model.layers.48.block_sparse_moe.experts.11.w1", "model.layers.48.block_sparse_moe.experts.12.w1", "model.layers.48.block_sparse_moe.experts.13.w1", "model.layers.48.block_sparse_moe.experts.14.w1", "model.layers.48.block_sparse_moe.experts.15.w1", "model.layers.48.block_sparse_moe.experts.16.w1", "model.layers.48.block_sparse_moe.experts.17.w1", "model.layers.48.block_sparse_moe.experts.18.w1", "model.layers.48.block_sparse_moe.experts.19.w1", "model.layers.48.block_sparse_moe.experts.20.w1", "model.layers.48.block_sparse_moe.experts.21.w1", "model.layers.48.block_sparse_moe.experts.22.w1", "model.layers.48.block_sparse_moe.experts.23.w1", "model.layers.48.block_sparse_moe.experts.24.w1", "model.layers.48.block_sparse_moe.experts.25.w1", "model.layers.48.block_sparse_moe.experts.26.w1", "model.layers.48.block_sparse_moe.experts.27.w1", "model.layers.48.block_sparse_moe.experts.28.w1", "model.layers.48.block_sparse_moe.experts.29.w1", "model.layers.48.block_sparse_moe.experts.30.w1", "model.layers.48.block_sparse_moe.experts.31.w1", "model.layers.48.block_sparse_moe.experts.32.w1", "model.layers.48.block_sparse_moe.experts.33.w1", "model.layers.48.block_sparse_moe.experts.34.w1", "model.layers.48.block_sparse_moe.experts.35.w1", "model.layers.48.block_sparse_moe.experts.36.w1", "model.layers.48.block_sparse_moe.experts.37.w1", "model.layers.48.block_sparse_moe.experts.38.w1", "model.layers.48.block_sparse_moe.experts.39.w1", "model.layers.48.block_sparse_moe.experts.40.w1", "model.layers.48.block_sparse_moe.experts.41.w1", "model.layers.48.block_sparse_moe.experts.42.w1", "model.layers.48.block_sparse_moe.experts.43.w1", "model.layers.48.block_sparse_moe.experts.44.w1", "model.layers.48.block_sparse_moe.experts.45.w1", "model.layers.48.block_sparse_moe.experts.46.w1", "model.layers.48.block_sparse_moe.experts.47.w1", "model.layers.48.block_sparse_moe.experts.48.w1", "model.layers.48.block_sparse_moe.experts.49.w1", "model.layers.48.block_sparse_moe.experts.50.w1", "model.layers.48.block_sparse_moe.experts.51.w1", "model.layers.48.block_sparse_moe.experts.52.w1", "model.layers.48.block_sparse_moe.experts.53.w1", "model.layers.48.block_sparse_moe.experts.54.w1", "model.layers.48.block_sparse_moe.experts.55.w1", "model.layers.48.block_sparse_moe.experts.56.w1", "model.layers.48.block_sparse_moe.experts.57.w1", "model.layers.48.block_sparse_moe.experts.58.w1", "model.layers.48.block_sparse_moe.experts.59.w1", "model.layers.48.block_sparse_moe.experts.60.w1", "model.layers.48.block_sparse_moe.experts.61.w1", "model.layers.48.block_sparse_moe.experts.62.w1", "model.layers.48.block_sparse_moe.experts.63.w1", "model.layers.48.block_sparse_moe.experts.64.w1", "model.layers.48.block_sparse_moe.experts.65.w1", "model.layers.48.block_sparse_moe.experts.66.w1", "model.layers.48.block_sparse_moe.experts.67.w1", "model.layers.48.block_sparse_moe.experts.68.w1", "model.layers.48.block_sparse_moe.experts.69.w1", "model.layers.48.block_sparse_moe.experts.70.w1", "model.layers.48.block_sparse_moe.experts.71.w1", "model.layers.48.block_sparse_moe.experts.72.w1", "model.layers.48.block_sparse_moe.experts.73.w1", "model.layers.48.block_sparse_moe.experts.74.w1", "model.layers.48.block_sparse_moe.experts.75.w1", "model.layers.48.block_sparse_moe.experts.76.w1", "model.layers.48.block_sparse_moe.experts.77.w1", "model.layers.48.block_sparse_moe.experts.78.w1", "model.layers.48.block_sparse_moe.experts.79.w1", "model.layers.48.block_sparse_moe.experts.80.w1", "model.layers.48.block_sparse_moe.experts.81.w1", "model.layers.48.block_sparse_moe.experts.82.w1", "model.layers.48.block_sparse_moe.experts.83.w1", "model.layers.48.block_sparse_moe.experts.84.w1", "model.layers.48.block_sparse_moe.experts.85.w1", "model.layers.48.block_sparse_moe.experts.86.w1", "model.layers.48.block_sparse_moe.experts.87.w1", "model.layers.48.block_sparse_moe.experts.88.w1", "model.layers.48.block_sparse_moe.experts.89.w1", "model.layers.48.block_sparse_moe.experts.90.w1", "model.layers.48.block_sparse_moe.experts.91.w1", "model.layers.48.block_sparse_moe.experts.92.w1", "model.layers.48.block_sparse_moe.experts.93.w1", "model.layers.48.block_sparse_moe.experts.94.w1", "model.layers.48.block_sparse_moe.experts.95.w1", "model.layers.48.block_sparse_moe.experts.96.w1", "model.layers.48.block_sparse_moe.experts.97.w1", "model.layers.48.block_sparse_moe.experts.98.w1", "model.layers.48.block_sparse_moe.experts.99.w1", "model.layers.48.block_sparse_moe.experts.100.w1", "model.layers.48.block_sparse_moe.experts.101.w1", "model.layers.48.block_sparse_moe.experts.102.w1", "model.layers.48.block_sparse_moe.experts.103.w1", "model.layers.48.block_sparse_moe.experts.104.w1", "model.layers.48.block_sparse_moe.experts.105.w1", "model.layers.48.block_sparse_moe.experts.106.w1", "model.layers.48.block_sparse_moe.experts.107.w1", "model.layers.48.block_sparse_moe.experts.108.w1", "model.layers.48.block_sparse_moe.experts.109.w1", "model.layers.48.block_sparse_moe.experts.110.w1", "model.layers.48.block_sparse_moe.experts.111.w1", "model.layers.48.block_sparse_moe.experts.112.w1", "model.layers.48.block_sparse_moe.experts.113.w1", "model.layers.48.block_sparse_moe.experts.114.w1", "model.layers.48.block_sparse_moe.experts.115.w1", "model.layers.48.block_sparse_moe.experts.116.w1", "model.layers.48.block_sparse_moe.experts.117.w1", "model.layers.48.block_sparse_moe.experts.118.w1", "model.layers.48.block_sparse_moe.experts.119.w1", "model.layers.48.block_sparse_moe.experts.120.w1", "model.layers.48.block_sparse_moe.experts.121.w1", "model.layers.48.block_sparse_moe.experts.122.w1", "model.layers.48.block_sparse_moe.experts.123.w1", "model.layers.48.block_sparse_moe.experts.124.w1", "model.layers.48.block_sparse_moe.experts.125.w1", "model.layers.48.block_sparse_moe.experts.126.w1", "model.layers.48.block_sparse_moe.experts.127.w1", "model.layers.48.block_sparse_moe.experts.128.w1", "model.layers.48.block_sparse_moe.experts.129.w1", "model.layers.48.block_sparse_moe.experts.130.w1", "model.layers.48.block_sparse_moe.experts.131.w1", "model.layers.48.block_sparse_moe.experts.132.w1", "model.layers.48.block_sparse_moe.experts.133.w1", "model.layers.48.block_sparse_moe.experts.134.w1", "model.layers.48.block_sparse_moe.experts.135.w1", "model.layers.48.block_sparse_moe.experts.136.w1", "model.layers.48.block_sparse_moe.experts.137.w1", "model.layers.48.block_sparse_moe.experts.138.w1", "model.layers.48.block_sparse_moe.experts.139.w1", "model.layers.48.block_sparse_moe.experts.140.w1", "model.layers.48.block_sparse_moe.experts.141.w1", "model.layers.48.block_sparse_moe.experts.142.w1", "model.layers.48.block_sparse_moe.experts.143.w1", "model.layers.48.block_sparse_moe.experts.144.w1", "model.layers.48.block_sparse_moe.experts.145.w1", "model.layers.48.block_sparse_moe.experts.146.w1", "model.layers.48.block_sparse_moe.experts.147.w1", "model.layers.48.block_sparse_moe.experts.148.w1", "model.layers.48.block_sparse_moe.experts.149.w1", "model.layers.48.block_sparse_moe.experts.150.w1", "model.layers.48.block_sparse_moe.experts.151.w1", "model.layers.48.block_sparse_moe.experts.152.w1", "model.layers.48.block_sparse_moe.experts.153.w1", "model.layers.48.block_sparse_moe.experts.154.w1", "model.layers.48.block_sparse_moe.experts.155.w1", "model.layers.48.block_sparse_moe.experts.156.w1", "model.layers.48.block_sparse_moe.experts.157.w1", "model.layers.48.block_sparse_moe.experts.158.w1", "model.layers.48.block_sparse_moe.experts.159.w1", "model.layers.48.block_sparse_moe.experts.160.w1", "model.layers.48.block_sparse_moe.experts.161.w1", "model.layers.48.block_sparse_moe.experts.162.w1", "model.layers.48.block_sparse_moe.experts.163.w1", "model.layers.48.block_sparse_moe.experts.164.w1", "model.layers.48.block_sparse_moe.experts.165.w1", "model.layers.48.block_sparse_moe.experts.166.w1", "model.layers.48.block_sparse_moe.experts.167.w1", "model.layers.48.block_sparse_moe.experts.168.w1", "model.layers.48.block_sparse_moe.experts.169.w1", "model.layers.48.block_sparse_moe.experts.170.w1", "model.layers.48.block_sparse_moe.experts.171.w1", "model.layers.48.block_sparse_moe.experts.172.w1", "model.layers.48.block_sparse_moe.experts.173.w1", "model.layers.48.block_sparse_moe.experts.174.w1", "model.layers.48.block_sparse_moe.experts.175.w1", "model.layers.48.block_sparse_moe.experts.176.w1", "model.layers.48.block_sparse_moe.experts.177.w1", "model.layers.48.block_sparse_moe.experts.178.w1", "model.layers.48.block_sparse_moe.experts.179.w1", "model.layers.48.block_sparse_moe.experts.180.w1", "model.layers.48.block_sparse_moe.experts.181.w1", "model.layers.48.block_sparse_moe.experts.182.w1", "model.layers.48.block_sparse_moe.experts.183.w1", "model.layers.48.block_sparse_moe.experts.184.w1", "model.layers.48.block_sparse_moe.experts.185.w1", "model.layers.48.block_sparse_moe.experts.186.w1", "model.layers.48.block_sparse_moe.experts.187.w1", "model.layers.48.block_sparse_moe.experts.188.w1", "model.layers.48.block_sparse_moe.experts.189.w1", "model.layers.48.block_sparse_moe.experts.190.w1", "model.layers.48.block_sparse_moe.experts.191.w1", "model.layers.48.block_sparse_moe.experts.192.w1", "model.layers.48.block_sparse_moe.experts.193.w1", "model.layers.48.block_sparse_moe.experts.194.w1", "model.layers.48.block_sparse_moe.experts.195.w1", "model.layers.48.block_sparse_moe.experts.196.w1", "model.layers.48.block_sparse_moe.experts.197.w1", "model.layers.48.block_sparse_moe.experts.198.w1", "model.layers.48.block_sparse_moe.experts.199.w1", "model.layers.48.block_sparse_moe.experts.200.w1", "model.layers.48.block_sparse_moe.experts.201.w1", "model.layers.48.block_sparse_moe.experts.202.w1", "model.layers.48.block_sparse_moe.experts.203.w1", "model.layers.48.block_sparse_moe.experts.204.w1", "model.layers.48.block_sparse_moe.experts.205.w1", "model.layers.48.block_sparse_moe.experts.206.w1", "model.layers.48.block_sparse_moe.experts.207.w1", "model.layers.48.block_sparse_moe.experts.208.w1", "model.layers.48.block_sparse_moe.experts.209.w1", "model.layers.48.block_sparse_moe.experts.210.w1", "model.layers.48.block_sparse_moe.experts.211.w1", "model.layers.48.block_sparse_moe.experts.212.w1", "model.layers.48.block_sparse_moe.experts.213.w1", "model.layers.48.block_sparse_moe.experts.214.w1", "model.layers.48.block_sparse_moe.experts.215.w1", "model.layers.48.block_sparse_moe.experts.216.w1", "model.layers.48.block_sparse_moe.experts.217.w1", "model.layers.48.block_sparse_moe.experts.218.w1", "model.layers.48.block_sparse_moe.experts.219.w1", "model.layers.48.block_sparse_moe.experts.220.w1", "model.layers.48.block_sparse_moe.experts.221.w1", "model.layers.48.block_sparse_moe.experts.222.w1", "model.layers.48.block_sparse_moe.experts.223.w1", "model.layers.48.block_sparse_moe.experts.224.w1", "model.layers.48.block_sparse_moe.experts.225.w1", "model.layers.48.block_sparse_moe.experts.226.w1", "model.layers.48.block_sparse_moe.experts.227.w1", "model.layers.48.block_sparse_moe.experts.228.w1", "model.layers.48.block_sparse_moe.experts.229.w1", "model.layers.48.block_sparse_moe.experts.230.w1", "model.layers.48.block_sparse_moe.experts.231.w1", "model.layers.48.block_sparse_moe.experts.232.w1", "model.layers.48.block_sparse_moe.experts.233.w1", "model.layers.48.block_sparse_moe.experts.234.w1", "model.layers.48.block_sparse_moe.experts.235.w1", "model.layers.48.block_sparse_moe.experts.236.w1", "model.layers.48.block_sparse_moe.experts.237.w1", "model.layers.48.block_sparse_moe.experts.238.w1", "model.layers.48.block_sparse_moe.experts.239.w1", "model.layers.48.block_sparse_moe.experts.240.w1", "model.layers.48.block_sparse_moe.experts.241.w1", "model.layers.48.block_sparse_moe.experts.242.w1", "model.layers.48.block_sparse_moe.experts.243.w1", "model.layers.48.block_sparse_moe.experts.244.w1", "model.layers.48.block_sparse_moe.experts.245.w1", "model.layers.48.block_sparse_moe.experts.246.w1", "model.layers.48.block_sparse_moe.experts.247.w1", "model.layers.48.block_sparse_moe.experts.248.w1", "model.layers.48.block_sparse_moe.experts.249.w1", "model.layers.48.block_sparse_moe.experts.250.w1", "model.layers.48.block_sparse_moe.experts.251.w1", "model.layers.48.block_sparse_moe.experts.252.w1", "model.layers.48.block_sparse_moe.experts.253.w1", "model.layers.48.block_sparse_moe.experts.254.w1", "model.layers.48.block_sparse_moe.experts.255.w1", "model.layers.48.block_sparse_moe.experts.0.w3", "model.layers.48.block_sparse_moe.experts.1.w3", "model.layers.48.block_sparse_moe.experts.2.w3", "model.layers.48.block_sparse_moe.experts.3.w3", "model.layers.48.block_sparse_moe.experts.4.w3", "model.layers.48.block_sparse_moe.experts.5.w3", "model.layers.48.block_sparse_moe.experts.6.w3", "model.layers.48.block_sparse_moe.experts.7.w3", "model.layers.48.block_sparse_moe.experts.8.w3", "model.layers.48.block_sparse_moe.experts.9.w3", "model.layers.48.block_sparse_moe.experts.10.w3", "model.layers.48.block_sparse_moe.experts.11.w3", "model.layers.48.block_sparse_moe.experts.12.w3", "model.layers.48.block_sparse_moe.experts.13.w3", "model.layers.48.block_sparse_moe.experts.14.w3", "model.layers.48.block_sparse_moe.experts.15.w3", "model.layers.48.block_sparse_moe.experts.16.w3", "model.layers.48.block_sparse_moe.experts.17.w3", "model.layers.48.block_sparse_moe.experts.18.w3", "model.layers.48.block_sparse_moe.experts.19.w3", "model.layers.48.block_sparse_moe.experts.20.w3", "model.layers.48.block_sparse_moe.experts.21.w3", "model.layers.48.block_sparse_moe.experts.22.w3", "model.layers.48.block_sparse_moe.experts.23.w3", "model.layers.48.block_sparse_moe.experts.24.w3", "model.layers.48.block_sparse_moe.experts.25.w3", "model.layers.48.block_sparse_moe.experts.26.w3", "model.layers.48.block_sparse_moe.experts.27.w3", "model.layers.48.block_sparse_moe.experts.28.w3", "model.layers.48.block_sparse_moe.experts.29.w3", "model.layers.48.block_sparse_moe.experts.30.w3", "model.layers.48.block_sparse_moe.experts.31.w3", "model.layers.48.block_sparse_moe.experts.32.w3", "model.layers.48.block_sparse_moe.experts.33.w3", "model.layers.48.block_sparse_moe.experts.34.w3", "model.layers.48.block_sparse_moe.experts.35.w3", "model.layers.48.block_sparse_moe.experts.36.w3", "model.layers.48.block_sparse_moe.experts.37.w3", "model.layers.48.block_sparse_moe.experts.38.w3", "model.layers.48.block_sparse_moe.experts.39.w3", "model.layers.48.block_sparse_moe.experts.40.w3", "model.layers.48.block_sparse_moe.experts.41.w3", "model.layers.48.block_sparse_moe.experts.42.w3", "model.layers.48.block_sparse_moe.experts.43.w3", "model.layers.48.block_sparse_moe.experts.44.w3", "model.layers.48.block_sparse_moe.experts.45.w3", "model.layers.48.block_sparse_moe.experts.46.w3", "model.layers.48.block_sparse_moe.experts.47.w3", "model.layers.48.block_sparse_moe.experts.48.w3", "model.layers.48.block_sparse_moe.experts.49.w3", "model.layers.48.block_sparse_moe.experts.50.w3", "model.layers.48.block_sparse_moe.experts.51.w3", "model.layers.48.block_sparse_moe.experts.52.w3", "model.layers.48.block_sparse_moe.experts.53.w3", "model.layers.48.block_sparse_moe.experts.54.w3", "model.layers.48.block_sparse_moe.experts.55.w3", "model.layers.48.block_sparse_moe.experts.56.w3", "model.layers.48.block_sparse_moe.experts.57.w3", "model.layers.48.block_sparse_moe.experts.58.w3", "model.layers.48.block_sparse_moe.experts.59.w3", "model.layers.48.block_sparse_moe.experts.60.w3", "model.layers.48.block_sparse_moe.experts.61.w3", "model.layers.48.block_sparse_moe.experts.62.w3", "model.layers.48.block_sparse_moe.experts.63.w3", "model.layers.48.block_sparse_moe.experts.64.w3", "model.layers.48.block_sparse_moe.experts.65.w3", "model.layers.48.block_sparse_moe.experts.66.w3", "model.layers.48.block_sparse_moe.experts.67.w3", "model.layers.48.block_sparse_moe.experts.68.w3", "model.layers.48.block_sparse_moe.experts.69.w3", "model.layers.48.block_sparse_moe.experts.70.w3", "model.layers.48.block_sparse_moe.experts.71.w3", "model.layers.48.block_sparse_moe.experts.72.w3", "model.layers.48.block_sparse_moe.experts.73.w3", "model.layers.48.block_sparse_moe.experts.74.w3", "model.layers.48.block_sparse_moe.experts.75.w3", "model.layers.48.block_sparse_moe.experts.76.w3", "model.layers.48.block_sparse_moe.experts.77.w3", "model.layers.48.block_sparse_moe.experts.78.w3", "model.layers.48.block_sparse_moe.experts.79.w3", "model.layers.48.block_sparse_moe.experts.80.w3", "model.layers.48.block_sparse_moe.experts.81.w3", "model.layers.48.block_sparse_moe.experts.82.w3", "model.layers.48.block_sparse_moe.experts.83.w3", "model.layers.48.block_sparse_moe.experts.84.w3", "model.layers.48.block_sparse_moe.experts.85.w3", "model.layers.48.block_sparse_moe.experts.86.w3", "model.layers.48.block_sparse_moe.experts.87.w3", "model.layers.48.block_sparse_moe.experts.88.w3", "model.layers.48.block_sparse_moe.experts.89.w3", "model.layers.48.block_sparse_moe.experts.90.w3", "model.layers.48.block_sparse_moe.experts.91.w3", "model.layers.48.block_sparse_moe.experts.92.w3", "model.layers.48.block_sparse_moe.experts.93.w3", "model.layers.48.block_sparse_moe.experts.94.w3", "model.layers.48.block_sparse_moe.experts.95.w3", "model.layers.48.block_sparse_moe.experts.96.w3", "model.layers.48.block_sparse_moe.experts.97.w3", "model.layers.48.block_sparse_moe.experts.98.w3", "model.layers.48.block_sparse_moe.experts.99.w3", "model.layers.48.block_sparse_moe.experts.100.w3", "model.layers.48.block_sparse_moe.experts.101.w3", "model.layers.48.block_sparse_moe.experts.102.w3", "model.layers.48.block_sparse_moe.experts.103.w3", "model.layers.48.block_sparse_moe.experts.104.w3", "model.layers.48.block_sparse_moe.experts.105.w3", "model.layers.48.block_sparse_moe.experts.106.w3", "model.layers.48.block_sparse_moe.experts.107.w3", "model.layers.48.block_sparse_moe.experts.108.w3", "model.layers.48.block_sparse_moe.experts.109.w3", "model.layers.48.block_sparse_moe.experts.110.w3", "model.layers.48.block_sparse_moe.experts.111.w3", "model.layers.48.block_sparse_moe.experts.112.w3", "model.layers.48.block_sparse_moe.experts.113.w3", "model.layers.48.block_sparse_moe.experts.114.w3", "model.layers.48.block_sparse_moe.experts.115.w3", "model.layers.48.block_sparse_moe.experts.116.w3", "model.layers.48.block_sparse_moe.experts.117.w3", "model.layers.48.block_sparse_moe.experts.118.w3", "model.layers.48.block_sparse_moe.experts.119.w3", "model.layers.48.block_sparse_moe.experts.120.w3", "model.layers.48.block_sparse_moe.experts.121.w3", "model.layers.48.block_sparse_moe.experts.122.w3", "model.layers.48.block_sparse_moe.experts.123.w3", "model.layers.48.block_sparse_moe.experts.124.w3", "model.layers.48.block_sparse_moe.experts.125.w3", "model.layers.48.block_sparse_moe.experts.126.w3", "model.layers.48.block_sparse_moe.experts.127.w3", "model.layers.48.block_sparse_moe.experts.128.w3", "model.layers.48.block_sparse_moe.experts.129.w3", "model.layers.48.block_sparse_moe.experts.130.w3", "model.layers.48.block_sparse_moe.experts.131.w3", "model.layers.48.block_sparse_moe.experts.132.w3", "model.layers.48.block_sparse_moe.experts.133.w3", "model.layers.48.block_sparse_moe.experts.134.w3", "model.layers.48.block_sparse_moe.experts.135.w3", "model.layers.48.block_sparse_moe.experts.136.w3", "model.layers.48.block_sparse_moe.experts.137.w3", "model.layers.48.block_sparse_moe.experts.138.w3", "model.layers.48.block_sparse_moe.experts.139.w3", "model.layers.48.block_sparse_moe.experts.140.w3", "model.layers.48.block_sparse_moe.experts.141.w3", "model.layers.48.block_sparse_moe.experts.142.w3", "model.layers.48.block_sparse_moe.experts.143.w3", "model.layers.48.block_sparse_moe.experts.144.w3", "model.layers.48.block_sparse_moe.experts.145.w3", "model.layers.48.block_sparse_moe.experts.146.w3", "model.layers.48.block_sparse_moe.experts.147.w3", "model.layers.48.block_sparse_moe.experts.148.w3", "model.layers.48.block_sparse_moe.experts.149.w3", "model.layers.48.block_sparse_moe.experts.150.w3", "model.layers.48.block_sparse_moe.experts.151.w3", "model.layers.48.block_sparse_moe.experts.152.w3", "model.layers.48.block_sparse_moe.experts.153.w3", "model.layers.48.block_sparse_moe.experts.154.w3", "model.layers.48.block_sparse_moe.experts.155.w3", "model.layers.48.block_sparse_moe.experts.156.w3", "model.layers.48.block_sparse_moe.experts.157.w3", "model.layers.48.block_sparse_moe.experts.158.w3", "model.layers.48.block_sparse_moe.experts.159.w3", "model.layers.48.block_sparse_moe.experts.160.w3", "model.layers.48.block_sparse_moe.experts.161.w3", "model.layers.48.block_sparse_moe.experts.162.w3", "model.layers.48.block_sparse_moe.experts.163.w3", "model.layers.48.block_sparse_moe.experts.164.w3", "model.layers.48.block_sparse_moe.experts.165.w3", "model.layers.48.block_sparse_moe.experts.166.w3", "model.layers.48.block_sparse_moe.experts.167.w3", "model.layers.48.block_sparse_moe.experts.168.w3", "model.layers.48.block_sparse_moe.experts.169.w3", "model.layers.48.block_sparse_moe.experts.170.w3", "model.layers.48.block_sparse_moe.experts.171.w3", "model.layers.48.block_sparse_moe.experts.172.w3", "model.layers.48.block_sparse_moe.experts.173.w3", "model.layers.48.block_sparse_moe.experts.174.w3", "model.layers.48.block_sparse_moe.experts.175.w3", "model.layers.48.block_sparse_moe.experts.176.w3", "model.layers.48.block_sparse_moe.experts.177.w3", "model.layers.48.block_sparse_moe.experts.178.w3", "model.layers.48.block_sparse_moe.experts.179.w3", "model.layers.48.block_sparse_moe.experts.180.w3", "model.layers.48.block_sparse_moe.experts.181.w3", "model.layers.48.block_sparse_moe.experts.182.w3", "model.layers.48.block_sparse_moe.experts.183.w3", "model.layers.48.block_sparse_moe.experts.184.w3", "model.layers.48.block_sparse_moe.experts.185.w3", "model.layers.48.block_sparse_moe.experts.186.w3", "model.layers.48.block_sparse_moe.experts.187.w3", "model.layers.48.block_sparse_moe.experts.188.w3", "model.layers.48.block_sparse_moe.experts.189.w3", "model.layers.48.block_sparse_moe.experts.190.w3", "model.layers.48.block_sparse_moe.experts.191.w3", "model.layers.48.block_sparse_moe.experts.192.w3", "model.layers.48.block_sparse_moe.experts.193.w3", "model.layers.48.block_sparse_moe.experts.194.w3", "model.layers.48.block_sparse_moe.experts.195.w3", "model.layers.48.block_sparse_moe.experts.196.w3", "model.layers.48.block_sparse_moe.experts.197.w3", "model.layers.48.block_sparse_moe.experts.198.w3", "model.layers.48.block_sparse_moe.experts.199.w3", "model.layers.48.block_sparse_moe.experts.200.w3", "model.layers.48.block_sparse_moe.experts.201.w3", "model.layers.48.block_sparse_moe.experts.202.w3", "model.layers.48.block_sparse_moe.experts.203.w3", "model.layers.48.block_sparse_moe.experts.204.w3", "model.layers.48.block_sparse_moe.experts.205.w3", "model.layers.48.block_sparse_moe.experts.206.w3", "model.layers.48.block_sparse_moe.experts.207.w3", "model.layers.48.block_sparse_moe.experts.208.w3", "model.layers.48.block_sparse_moe.experts.209.w3", "model.layers.48.block_sparse_moe.experts.210.w3", "model.layers.48.block_sparse_moe.experts.211.w3", "model.layers.48.block_sparse_moe.experts.212.w3", "model.layers.48.block_sparse_moe.experts.213.w3", "model.layers.48.block_sparse_moe.experts.214.w3", "model.layers.48.block_sparse_moe.experts.215.w3", "model.layers.48.block_sparse_moe.experts.216.w3", "model.layers.48.block_sparse_moe.experts.217.w3", "model.layers.48.block_sparse_moe.experts.218.w3", "model.layers.48.block_sparse_moe.experts.219.w3", "model.layers.48.block_sparse_moe.experts.220.w3", "model.layers.48.block_sparse_moe.experts.221.w3", "model.layers.48.block_sparse_moe.experts.222.w3", "model.layers.48.block_sparse_moe.experts.223.w3", "model.layers.48.block_sparse_moe.experts.224.w3", "model.layers.48.block_sparse_moe.experts.225.w3", "model.layers.48.block_sparse_moe.experts.226.w3", "model.layers.48.block_sparse_moe.experts.227.w3", "model.layers.48.block_sparse_moe.experts.228.w3", "model.layers.48.block_sparse_moe.experts.229.w3", "model.layers.48.block_sparse_moe.experts.230.w3", "model.layers.48.block_sparse_moe.experts.231.w3", "model.layers.48.block_sparse_moe.experts.232.w3", "model.layers.48.block_sparse_moe.experts.233.w3", "model.layers.48.block_sparse_moe.experts.234.w3", "model.layers.48.block_sparse_moe.experts.235.w3", "model.layers.48.block_sparse_moe.experts.236.w3", "model.layers.48.block_sparse_moe.experts.237.w3", "model.layers.48.block_sparse_moe.experts.238.w3", "model.layers.48.block_sparse_moe.experts.239.w3", "model.layers.48.block_sparse_moe.experts.240.w3", "model.layers.48.block_sparse_moe.experts.241.w3", "model.layers.48.block_sparse_moe.experts.242.w3", "model.layers.48.block_sparse_moe.experts.243.w3", "model.layers.48.block_sparse_moe.experts.244.w3", "model.layers.48.block_sparse_moe.experts.245.w3", "model.layers.48.block_sparse_moe.experts.246.w3", "model.layers.48.block_sparse_moe.experts.247.w3", "model.layers.48.block_sparse_moe.experts.248.w3", "model.layers.48.block_sparse_moe.experts.249.w3", "model.layers.48.block_sparse_moe.experts.250.w3", "model.layers.48.block_sparse_moe.experts.251.w3", "model.layers.48.block_sparse_moe.experts.252.w3", "model.layers.48.block_sparse_moe.experts.253.w3", "model.layers.48.block_sparse_moe.experts.254.w3", "model.layers.48.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0002629220485687589, "dbits": 2415919104 } ] }, { "idx": 244, "layers": [ "model.layers.48.block_sparse_moe.experts.0.w2", "model.layers.48.block_sparse_moe.experts.1.w2", "model.layers.48.block_sparse_moe.experts.2.w2", "model.layers.48.block_sparse_moe.experts.3.w2", "model.layers.48.block_sparse_moe.experts.4.w2", "model.layers.48.block_sparse_moe.experts.5.w2", "model.layers.48.block_sparse_moe.experts.6.w2", "model.layers.48.block_sparse_moe.experts.7.w2", "model.layers.48.block_sparse_moe.experts.8.w2", "model.layers.48.block_sparse_moe.experts.9.w2", "model.layers.48.block_sparse_moe.experts.10.w2", "model.layers.48.block_sparse_moe.experts.11.w2", "model.layers.48.block_sparse_moe.experts.12.w2", "model.layers.48.block_sparse_moe.experts.13.w2", "model.layers.48.block_sparse_moe.experts.14.w2", "model.layers.48.block_sparse_moe.experts.15.w2", "model.layers.48.block_sparse_moe.experts.16.w2", "model.layers.48.block_sparse_moe.experts.17.w2", "model.layers.48.block_sparse_moe.experts.18.w2", "model.layers.48.block_sparse_moe.experts.19.w2", "model.layers.48.block_sparse_moe.experts.20.w2", "model.layers.48.block_sparse_moe.experts.21.w2", "model.layers.48.block_sparse_moe.experts.22.w2", "model.layers.48.block_sparse_moe.experts.23.w2", "model.layers.48.block_sparse_moe.experts.24.w2", "model.layers.48.block_sparse_moe.experts.25.w2", "model.layers.48.block_sparse_moe.experts.26.w2", "model.layers.48.block_sparse_moe.experts.27.w2", "model.layers.48.block_sparse_moe.experts.28.w2", "model.layers.48.block_sparse_moe.experts.29.w2", "model.layers.48.block_sparse_moe.experts.30.w2", "model.layers.48.block_sparse_moe.experts.31.w2", "model.layers.48.block_sparse_moe.experts.32.w2", "model.layers.48.block_sparse_moe.experts.33.w2", "model.layers.48.block_sparse_moe.experts.34.w2", "model.layers.48.block_sparse_moe.experts.35.w2", "model.layers.48.block_sparse_moe.experts.36.w2", "model.layers.48.block_sparse_moe.experts.37.w2", "model.layers.48.block_sparse_moe.experts.38.w2", "model.layers.48.block_sparse_moe.experts.39.w2", "model.layers.48.block_sparse_moe.experts.40.w2", "model.layers.48.block_sparse_moe.experts.41.w2", "model.layers.48.block_sparse_moe.experts.42.w2", "model.layers.48.block_sparse_moe.experts.43.w2", "model.layers.48.block_sparse_moe.experts.44.w2", "model.layers.48.block_sparse_moe.experts.45.w2", "model.layers.48.block_sparse_moe.experts.46.w2", "model.layers.48.block_sparse_moe.experts.47.w2", "model.layers.48.block_sparse_moe.experts.48.w2", "model.layers.48.block_sparse_moe.experts.49.w2", "model.layers.48.block_sparse_moe.experts.50.w2", "model.layers.48.block_sparse_moe.experts.51.w2", "model.layers.48.block_sparse_moe.experts.52.w2", "model.layers.48.block_sparse_moe.experts.53.w2", "model.layers.48.block_sparse_moe.experts.54.w2", "model.layers.48.block_sparse_moe.experts.55.w2", "model.layers.48.block_sparse_moe.experts.56.w2", "model.layers.48.block_sparse_moe.experts.57.w2", "model.layers.48.block_sparse_moe.experts.58.w2", "model.layers.48.block_sparse_moe.experts.59.w2", "model.layers.48.block_sparse_moe.experts.60.w2", "model.layers.48.block_sparse_moe.experts.61.w2", "model.layers.48.block_sparse_moe.experts.62.w2", "model.layers.48.block_sparse_moe.experts.63.w2", "model.layers.48.block_sparse_moe.experts.64.w2", "model.layers.48.block_sparse_moe.experts.65.w2", "model.layers.48.block_sparse_moe.experts.66.w2", "model.layers.48.block_sparse_moe.experts.67.w2", "model.layers.48.block_sparse_moe.experts.68.w2", "model.layers.48.block_sparse_moe.experts.69.w2", "model.layers.48.block_sparse_moe.experts.70.w2", "model.layers.48.block_sparse_moe.experts.71.w2", "model.layers.48.block_sparse_moe.experts.72.w2", "model.layers.48.block_sparse_moe.experts.73.w2", "model.layers.48.block_sparse_moe.experts.74.w2", "model.layers.48.block_sparse_moe.experts.75.w2", "model.layers.48.block_sparse_moe.experts.76.w2", "model.layers.48.block_sparse_moe.experts.77.w2", "model.layers.48.block_sparse_moe.experts.78.w2", "model.layers.48.block_sparse_moe.experts.79.w2", "model.layers.48.block_sparse_moe.experts.80.w2", "model.layers.48.block_sparse_moe.experts.81.w2", "model.layers.48.block_sparse_moe.experts.82.w2", "model.layers.48.block_sparse_moe.experts.83.w2", "model.layers.48.block_sparse_moe.experts.84.w2", "model.layers.48.block_sparse_moe.experts.85.w2", "model.layers.48.block_sparse_moe.experts.86.w2", "model.layers.48.block_sparse_moe.experts.87.w2", "model.layers.48.block_sparse_moe.experts.88.w2", "model.layers.48.block_sparse_moe.experts.89.w2", "model.layers.48.block_sparse_moe.experts.90.w2", "model.layers.48.block_sparse_moe.experts.91.w2", "model.layers.48.block_sparse_moe.experts.92.w2", "model.layers.48.block_sparse_moe.experts.93.w2", "model.layers.48.block_sparse_moe.experts.94.w2", "model.layers.48.block_sparse_moe.experts.95.w2", "model.layers.48.block_sparse_moe.experts.96.w2", "model.layers.48.block_sparse_moe.experts.97.w2", "model.layers.48.block_sparse_moe.experts.98.w2", "model.layers.48.block_sparse_moe.experts.99.w2", "model.layers.48.block_sparse_moe.experts.100.w2", "model.layers.48.block_sparse_moe.experts.101.w2", "model.layers.48.block_sparse_moe.experts.102.w2", "model.layers.48.block_sparse_moe.experts.103.w2", "model.layers.48.block_sparse_moe.experts.104.w2", "model.layers.48.block_sparse_moe.experts.105.w2", "model.layers.48.block_sparse_moe.experts.106.w2", "model.layers.48.block_sparse_moe.experts.107.w2", "model.layers.48.block_sparse_moe.experts.108.w2", "model.layers.48.block_sparse_moe.experts.109.w2", "model.layers.48.block_sparse_moe.experts.110.w2", "model.layers.48.block_sparse_moe.experts.111.w2", "model.layers.48.block_sparse_moe.experts.112.w2", "model.layers.48.block_sparse_moe.experts.113.w2", "model.layers.48.block_sparse_moe.experts.114.w2", "model.layers.48.block_sparse_moe.experts.115.w2", "model.layers.48.block_sparse_moe.experts.116.w2", "model.layers.48.block_sparse_moe.experts.117.w2", "model.layers.48.block_sparse_moe.experts.118.w2", "model.layers.48.block_sparse_moe.experts.119.w2", "model.layers.48.block_sparse_moe.experts.120.w2", "model.layers.48.block_sparse_moe.experts.121.w2", "model.layers.48.block_sparse_moe.experts.122.w2", "model.layers.48.block_sparse_moe.experts.123.w2", "model.layers.48.block_sparse_moe.experts.124.w2", "model.layers.48.block_sparse_moe.experts.125.w2", "model.layers.48.block_sparse_moe.experts.126.w2", "model.layers.48.block_sparse_moe.experts.127.w2", "model.layers.48.block_sparse_moe.experts.128.w2", "model.layers.48.block_sparse_moe.experts.129.w2", "model.layers.48.block_sparse_moe.experts.130.w2", "model.layers.48.block_sparse_moe.experts.131.w2", "model.layers.48.block_sparse_moe.experts.132.w2", "model.layers.48.block_sparse_moe.experts.133.w2", "model.layers.48.block_sparse_moe.experts.134.w2", "model.layers.48.block_sparse_moe.experts.135.w2", "model.layers.48.block_sparse_moe.experts.136.w2", "model.layers.48.block_sparse_moe.experts.137.w2", "model.layers.48.block_sparse_moe.experts.138.w2", "model.layers.48.block_sparse_moe.experts.139.w2", "model.layers.48.block_sparse_moe.experts.140.w2", "model.layers.48.block_sparse_moe.experts.141.w2", "model.layers.48.block_sparse_moe.experts.142.w2", "model.layers.48.block_sparse_moe.experts.143.w2", "model.layers.48.block_sparse_moe.experts.144.w2", "model.layers.48.block_sparse_moe.experts.145.w2", "model.layers.48.block_sparse_moe.experts.146.w2", "model.layers.48.block_sparse_moe.experts.147.w2", "model.layers.48.block_sparse_moe.experts.148.w2", "model.layers.48.block_sparse_moe.experts.149.w2", "model.layers.48.block_sparse_moe.experts.150.w2", "model.layers.48.block_sparse_moe.experts.151.w2", "model.layers.48.block_sparse_moe.experts.152.w2", "model.layers.48.block_sparse_moe.experts.153.w2", "model.layers.48.block_sparse_moe.experts.154.w2", "model.layers.48.block_sparse_moe.experts.155.w2", "model.layers.48.block_sparse_moe.experts.156.w2", "model.layers.48.block_sparse_moe.experts.157.w2", "model.layers.48.block_sparse_moe.experts.158.w2", "model.layers.48.block_sparse_moe.experts.159.w2", "model.layers.48.block_sparse_moe.experts.160.w2", "model.layers.48.block_sparse_moe.experts.161.w2", "model.layers.48.block_sparse_moe.experts.162.w2", "model.layers.48.block_sparse_moe.experts.163.w2", "model.layers.48.block_sparse_moe.experts.164.w2", "model.layers.48.block_sparse_moe.experts.165.w2", "model.layers.48.block_sparse_moe.experts.166.w2", "model.layers.48.block_sparse_moe.experts.167.w2", "model.layers.48.block_sparse_moe.experts.168.w2", "model.layers.48.block_sparse_moe.experts.169.w2", "model.layers.48.block_sparse_moe.experts.170.w2", "model.layers.48.block_sparse_moe.experts.171.w2", "model.layers.48.block_sparse_moe.experts.172.w2", "model.layers.48.block_sparse_moe.experts.173.w2", "model.layers.48.block_sparse_moe.experts.174.w2", "model.layers.48.block_sparse_moe.experts.175.w2", "model.layers.48.block_sparse_moe.experts.176.w2", "model.layers.48.block_sparse_moe.experts.177.w2", "model.layers.48.block_sparse_moe.experts.178.w2", "model.layers.48.block_sparse_moe.experts.179.w2", "model.layers.48.block_sparse_moe.experts.180.w2", "model.layers.48.block_sparse_moe.experts.181.w2", "model.layers.48.block_sparse_moe.experts.182.w2", "model.layers.48.block_sparse_moe.experts.183.w2", "model.layers.48.block_sparse_moe.experts.184.w2", "model.layers.48.block_sparse_moe.experts.185.w2", "model.layers.48.block_sparse_moe.experts.186.w2", "model.layers.48.block_sparse_moe.experts.187.w2", "model.layers.48.block_sparse_moe.experts.188.w2", "model.layers.48.block_sparse_moe.experts.189.w2", "model.layers.48.block_sparse_moe.experts.190.w2", "model.layers.48.block_sparse_moe.experts.191.w2", "model.layers.48.block_sparse_moe.experts.192.w2", "model.layers.48.block_sparse_moe.experts.193.w2", "model.layers.48.block_sparse_moe.experts.194.w2", "model.layers.48.block_sparse_moe.experts.195.w2", "model.layers.48.block_sparse_moe.experts.196.w2", "model.layers.48.block_sparse_moe.experts.197.w2", "model.layers.48.block_sparse_moe.experts.198.w2", "model.layers.48.block_sparse_moe.experts.199.w2", "model.layers.48.block_sparse_moe.experts.200.w2", "model.layers.48.block_sparse_moe.experts.201.w2", "model.layers.48.block_sparse_moe.experts.202.w2", "model.layers.48.block_sparse_moe.experts.203.w2", "model.layers.48.block_sparse_moe.experts.204.w2", "model.layers.48.block_sparse_moe.experts.205.w2", "model.layers.48.block_sparse_moe.experts.206.w2", "model.layers.48.block_sparse_moe.experts.207.w2", "model.layers.48.block_sparse_moe.experts.208.w2", "model.layers.48.block_sparse_moe.experts.209.w2", "model.layers.48.block_sparse_moe.experts.210.w2", "model.layers.48.block_sparse_moe.experts.211.w2", "model.layers.48.block_sparse_moe.experts.212.w2", "model.layers.48.block_sparse_moe.experts.213.w2", "model.layers.48.block_sparse_moe.experts.214.w2", "model.layers.48.block_sparse_moe.experts.215.w2", "model.layers.48.block_sparse_moe.experts.216.w2", "model.layers.48.block_sparse_moe.experts.217.w2", "model.layers.48.block_sparse_moe.experts.218.w2", "model.layers.48.block_sparse_moe.experts.219.w2", "model.layers.48.block_sparse_moe.experts.220.w2", "model.layers.48.block_sparse_moe.experts.221.w2", "model.layers.48.block_sparse_moe.experts.222.w2", "model.layers.48.block_sparse_moe.experts.223.w2", "model.layers.48.block_sparse_moe.experts.224.w2", "model.layers.48.block_sparse_moe.experts.225.w2", "model.layers.48.block_sparse_moe.experts.226.w2", "model.layers.48.block_sparse_moe.experts.227.w2", "model.layers.48.block_sparse_moe.experts.228.w2", "model.layers.48.block_sparse_moe.experts.229.w2", "model.layers.48.block_sparse_moe.experts.230.w2", "model.layers.48.block_sparse_moe.experts.231.w2", "model.layers.48.block_sparse_moe.experts.232.w2", "model.layers.48.block_sparse_moe.experts.233.w2", "model.layers.48.block_sparse_moe.experts.234.w2", "model.layers.48.block_sparse_moe.experts.235.w2", "model.layers.48.block_sparse_moe.experts.236.w2", "model.layers.48.block_sparse_moe.experts.237.w2", "model.layers.48.block_sparse_moe.experts.238.w2", "model.layers.48.block_sparse_moe.experts.239.w2", "model.layers.48.block_sparse_moe.experts.240.w2", "model.layers.48.block_sparse_moe.experts.241.w2", "model.layers.48.block_sparse_moe.experts.242.w2", "model.layers.48.block_sparse_moe.experts.243.w2", "model.layers.48.block_sparse_moe.experts.244.w2", "model.layers.48.block_sparse_moe.experts.245.w2", "model.layers.48.block_sparse_moe.experts.246.w2", "model.layers.48.block_sparse_moe.experts.247.w2", "model.layers.48.block_sparse_moe.experts.248.w2", "model.layers.48.block_sparse_moe.experts.249.w2", "model.layers.48.block_sparse_moe.experts.250.w2", "model.layers.48.block_sparse_moe.experts.251.w2", "model.layers.48.block_sparse_moe.experts.252.w2", "model.layers.48.block_sparse_moe.experts.253.w2", "model.layers.48.block_sparse_moe.experts.254.w2", "model.layers.48.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -2.578310668471806e-05, "dbits": 1207959552 } ] }, { "idx": 245, "layers": [ "model.layers.49.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0002612471580505371, "dbits": 18874368 } ] }, { "idx": 246, "layers": [ "model.layers.49.self_attn.k_proj", "model.layers.49.self_attn.v_proj" ], "candidates": [ { "dkld": -0.000947619602084171, "dbits": 6291456 } ] }, { "idx": 247, "layers": [ "model.layers.49.self_attn.o_proj" ], "candidates": [ { "dkld": 0.000841556116938591, "dbits": 18874368 } ] }, { "idx": 248, "layers": [ "model.layers.49.block_sparse_moe.experts.0.w1", "model.layers.49.block_sparse_moe.experts.1.w1", "model.layers.49.block_sparse_moe.experts.2.w1", "model.layers.49.block_sparse_moe.experts.3.w1", "model.layers.49.block_sparse_moe.experts.4.w1", "model.layers.49.block_sparse_moe.experts.5.w1", "model.layers.49.block_sparse_moe.experts.6.w1", "model.layers.49.block_sparse_moe.experts.7.w1", "model.layers.49.block_sparse_moe.experts.8.w1", "model.layers.49.block_sparse_moe.experts.9.w1", "model.layers.49.block_sparse_moe.experts.10.w1", "model.layers.49.block_sparse_moe.experts.11.w1", "model.layers.49.block_sparse_moe.experts.12.w1", "model.layers.49.block_sparse_moe.experts.13.w1", "model.layers.49.block_sparse_moe.experts.14.w1", "model.layers.49.block_sparse_moe.experts.15.w1", "model.layers.49.block_sparse_moe.experts.16.w1", "model.layers.49.block_sparse_moe.experts.17.w1", "model.layers.49.block_sparse_moe.experts.18.w1", "model.layers.49.block_sparse_moe.experts.19.w1", "model.layers.49.block_sparse_moe.experts.20.w1", "model.layers.49.block_sparse_moe.experts.21.w1", "model.layers.49.block_sparse_moe.experts.22.w1", "model.layers.49.block_sparse_moe.experts.23.w1", "model.layers.49.block_sparse_moe.experts.24.w1", "model.layers.49.block_sparse_moe.experts.25.w1", "model.layers.49.block_sparse_moe.experts.26.w1", "model.layers.49.block_sparse_moe.experts.27.w1", "model.layers.49.block_sparse_moe.experts.28.w1", "model.layers.49.block_sparse_moe.experts.29.w1", "model.layers.49.block_sparse_moe.experts.30.w1", "model.layers.49.block_sparse_moe.experts.31.w1", "model.layers.49.block_sparse_moe.experts.32.w1", "model.layers.49.block_sparse_moe.experts.33.w1", "model.layers.49.block_sparse_moe.experts.34.w1", "model.layers.49.block_sparse_moe.experts.35.w1", "model.layers.49.block_sparse_moe.experts.36.w1", "model.layers.49.block_sparse_moe.experts.37.w1", "model.layers.49.block_sparse_moe.experts.38.w1", "model.layers.49.block_sparse_moe.experts.39.w1", "model.layers.49.block_sparse_moe.experts.40.w1", "model.layers.49.block_sparse_moe.experts.41.w1", "model.layers.49.block_sparse_moe.experts.42.w1", "model.layers.49.block_sparse_moe.experts.43.w1", "model.layers.49.block_sparse_moe.experts.44.w1", "model.layers.49.block_sparse_moe.experts.45.w1", "model.layers.49.block_sparse_moe.experts.46.w1", "model.layers.49.block_sparse_moe.experts.47.w1", "model.layers.49.block_sparse_moe.experts.48.w1", "model.layers.49.block_sparse_moe.experts.49.w1", "model.layers.49.block_sparse_moe.experts.50.w1", "model.layers.49.block_sparse_moe.experts.51.w1", "model.layers.49.block_sparse_moe.experts.52.w1", "model.layers.49.block_sparse_moe.experts.53.w1", "model.layers.49.block_sparse_moe.experts.54.w1", "model.layers.49.block_sparse_moe.experts.55.w1", "model.layers.49.block_sparse_moe.experts.56.w1", "model.layers.49.block_sparse_moe.experts.57.w1", "model.layers.49.block_sparse_moe.experts.58.w1", "model.layers.49.block_sparse_moe.experts.59.w1", "model.layers.49.block_sparse_moe.experts.60.w1", "model.layers.49.block_sparse_moe.experts.61.w1", "model.layers.49.block_sparse_moe.experts.62.w1", "model.layers.49.block_sparse_moe.experts.63.w1", "model.layers.49.block_sparse_moe.experts.64.w1", "model.layers.49.block_sparse_moe.experts.65.w1", "model.layers.49.block_sparse_moe.experts.66.w1", "model.layers.49.block_sparse_moe.experts.67.w1", "model.layers.49.block_sparse_moe.experts.68.w1", "model.layers.49.block_sparse_moe.experts.69.w1", "model.layers.49.block_sparse_moe.experts.70.w1", "model.layers.49.block_sparse_moe.experts.71.w1", "model.layers.49.block_sparse_moe.experts.72.w1", "model.layers.49.block_sparse_moe.experts.73.w1", "model.layers.49.block_sparse_moe.experts.74.w1", "model.layers.49.block_sparse_moe.experts.75.w1", "model.layers.49.block_sparse_moe.experts.76.w1", "model.layers.49.block_sparse_moe.experts.77.w1", "model.layers.49.block_sparse_moe.experts.78.w1", "model.layers.49.block_sparse_moe.experts.79.w1", "model.layers.49.block_sparse_moe.experts.80.w1", "model.layers.49.block_sparse_moe.experts.81.w1", "model.layers.49.block_sparse_moe.experts.82.w1", "model.layers.49.block_sparse_moe.experts.83.w1", "model.layers.49.block_sparse_moe.experts.84.w1", "model.layers.49.block_sparse_moe.experts.85.w1", "model.layers.49.block_sparse_moe.experts.86.w1", "model.layers.49.block_sparse_moe.experts.87.w1", "model.layers.49.block_sparse_moe.experts.88.w1", "model.layers.49.block_sparse_moe.experts.89.w1", "model.layers.49.block_sparse_moe.experts.90.w1", "model.layers.49.block_sparse_moe.experts.91.w1", "model.layers.49.block_sparse_moe.experts.92.w1", "model.layers.49.block_sparse_moe.experts.93.w1", "model.layers.49.block_sparse_moe.experts.94.w1", "model.layers.49.block_sparse_moe.experts.95.w1", "model.layers.49.block_sparse_moe.experts.96.w1", "model.layers.49.block_sparse_moe.experts.97.w1", "model.layers.49.block_sparse_moe.experts.98.w1", "model.layers.49.block_sparse_moe.experts.99.w1", "model.layers.49.block_sparse_moe.experts.100.w1", "model.layers.49.block_sparse_moe.experts.101.w1", "model.layers.49.block_sparse_moe.experts.102.w1", "model.layers.49.block_sparse_moe.experts.103.w1", "model.layers.49.block_sparse_moe.experts.104.w1", "model.layers.49.block_sparse_moe.experts.105.w1", "model.layers.49.block_sparse_moe.experts.106.w1", "model.layers.49.block_sparse_moe.experts.107.w1", "model.layers.49.block_sparse_moe.experts.108.w1", "model.layers.49.block_sparse_moe.experts.109.w1", "model.layers.49.block_sparse_moe.experts.110.w1", "model.layers.49.block_sparse_moe.experts.111.w1", "model.layers.49.block_sparse_moe.experts.112.w1", "model.layers.49.block_sparse_moe.experts.113.w1", "model.layers.49.block_sparse_moe.experts.114.w1", "model.layers.49.block_sparse_moe.experts.115.w1", "model.layers.49.block_sparse_moe.experts.116.w1", "model.layers.49.block_sparse_moe.experts.117.w1", "model.layers.49.block_sparse_moe.experts.118.w1", "model.layers.49.block_sparse_moe.experts.119.w1", "model.layers.49.block_sparse_moe.experts.120.w1", "model.layers.49.block_sparse_moe.experts.121.w1", "model.layers.49.block_sparse_moe.experts.122.w1", "model.layers.49.block_sparse_moe.experts.123.w1", "model.layers.49.block_sparse_moe.experts.124.w1", "model.layers.49.block_sparse_moe.experts.125.w1", "model.layers.49.block_sparse_moe.experts.126.w1", "model.layers.49.block_sparse_moe.experts.127.w1", "model.layers.49.block_sparse_moe.experts.128.w1", "model.layers.49.block_sparse_moe.experts.129.w1", "model.layers.49.block_sparse_moe.experts.130.w1", "model.layers.49.block_sparse_moe.experts.131.w1", "model.layers.49.block_sparse_moe.experts.132.w1", "model.layers.49.block_sparse_moe.experts.133.w1", "model.layers.49.block_sparse_moe.experts.134.w1", "model.layers.49.block_sparse_moe.experts.135.w1", "model.layers.49.block_sparse_moe.experts.136.w1", "model.layers.49.block_sparse_moe.experts.137.w1", "model.layers.49.block_sparse_moe.experts.138.w1", "model.layers.49.block_sparse_moe.experts.139.w1", "model.layers.49.block_sparse_moe.experts.140.w1", "model.layers.49.block_sparse_moe.experts.141.w1", "model.layers.49.block_sparse_moe.experts.142.w1", "model.layers.49.block_sparse_moe.experts.143.w1", "model.layers.49.block_sparse_moe.experts.144.w1", "model.layers.49.block_sparse_moe.experts.145.w1", "model.layers.49.block_sparse_moe.experts.146.w1", "model.layers.49.block_sparse_moe.experts.147.w1", "model.layers.49.block_sparse_moe.experts.148.w1", "model.layers.49.block_sparse_moe.experts.149.w1", "model.layers.49.block_sparse_moe.experts.150.w1", "model.layers.49.block_sparse_moe.experts.151.w1", "model.layers.49.block_sparse_moe.experts.152.w1", "model.layers.49.block_sparse_moe.experts.153.w1", "model.layers.49.block_sparse_moe.experts.154.w1", "model.layers.49.block_sparse_moe.experts.155.w1", "model.layers.49.block_sparse_moe.experts.156.w1", "model.layers.49.block_sparse_moe.experts.157.w1", "model.layers.49.block_sparse_moe.experts.158.w1", "model.layers.49.block_sparse_moe.experts.159.w1", "model.layers.49.block_sparse_moe.experts.160.w1", "model.layers.49.block_sparse_moe.experts.161.w1", "model.layers.49.block_sparse_moe.experts.162.w1", "model.layers.49.block_sparse_moe.experts.163.w1", "model.layers.49.block_sparse_moe.experts.164.w1", "model.layers.49.block_sparse_moe.experts.165.w1", "model.layers.49.block_sparse_moe.experts.166.w1", "model.layers.49.block_sparse_moe.experts.167.w1", "model.layers.49.block_sparse_moe.experts.168.w1", "model.layers.49.block_sparse_moe.experts.169.w1", "model.layers.49.block_sparse_moe.experts.170.w1", "model.layers.49.block_sparse_moe.experts.171.w1", "model.layers.49.block_sparse_moe.experts.172.w1", "model.layers.49.block_sparse_moe.experts.173.w1", "model.layers.49.block_sparse_moe.experts.174.w1", "model.layers.49.block_sparse_moe.experts.175.w1", "model.layers.49.block_sparse_moe.experts.176.w1", "model.layers.49.block_sparse_moe.experts.177.w1", "model.layers.49.block_sparse_moe.experts.178.w1", "model.layers.49.block_sparse_moe.experts.179.w1", "model.layers.49.block_sparse_moe.experts.180.w1", "model.layers.49.block_sparse_moe.experts.181.w1", "model.layers.49.block_sparse_moe.experts.182.w1", "model.layers.49.block_sparse_moe.experts.183.w1", "model.layers.49.block_sparse_moe.experts.184.w1", "model.layers.49.block_sparse_moe.experts.185.w1", "model.layers.49.block_sparse_moe.experts.186.w1", "model.layers.49.block_sparse_moe.experts.187.w1", "model.layers.49.block_sparse_moe.experts.188.w1", "model.layers.49.block_sparse_moe.experts.189.w1", "model.layers.49.block_sparse_moe.experts.190.w1", "model.layers.49.block_sparse_moe.experts.191.w1", "model.layers.49.block_sparse_moe.experts.192.w1", "model.layers.49.block_sparse_moe.experts.193.w1", "model.layers.49.block_sparse_moe.experts.194.w1", "model.layers.49.block_sparse_moe.experts.195.w1", "model.layers.49.block_sparse_moe.experts.196.w1", "model.layers.49.block_sparse_moe.experts.197.w1", "model.layers.49.block_sparse_moe.experts.198.w1", "model.layers.49.block_sparse_moe.experts.199.w1", "model.layers.49.block_sparse_moe.experts.200.w1", "model.layers.49.block_sparse_moe.experts.201.w1", "model.layers.49.block_sparse_moe.experts.202.w1", "model.layers.49.block_sparse_moe.experts.203.w1", "model.layers.49.block_sparse_moe.experts.204.w1", "model.layers.49.block_sparse_moe.experts.205.w1", "model.layers.49.block_sparse_moe.experts.206.w1", "model.layers.49.block_sparse_moe.experts.207.w1", "model.layers.49.block_sparse_moe.experts.208.w1", "model.layers.49.block_sparse_moe.experts.209.w1", "model.layers.49.block_sparse_moe.experts.210.w1", "model.layers.49.block_sparse_moe.experts.211.w1", "model.layers.49.block_sparse_moe.experts.212.w1", "model.layers.49.block_sparse_moe.experts.213.w1", "model.layers.49.block_sparse_moe.experts.214.w1", "model.layers.49.block_sparse_moe.experts.215.w1", "model.layers.49.block_sparse_moe.experts.216.w1", "model.layers.49.block_sparse_moe.experts.217.w1", "model.layers.49.block_sparse_moe.experts.218.w1", "model.layers.49.block_sparse_moe.experts.219.w1", "model.layers.49.block_sparse_moe.experts.220.w1", "model.layers.49.block_sparse_moe.experts.221.w1", "model.layers.49.block_sparse_moe.experts.222.w1", "model.layers.49.block_sparse_moe.experts.223.w1", "model.layers.49.block_sparse_moe.experts.224.w1", "model.layers.49.block_sparse_moe.experts.225.w1", "model.layers.49.block_sparse_moe.experts.226.w1", "model.layers.49.block_sparse_moe.experts.227.w1", "model.layers.49.block_sparse_moe.experts.228.w1", "model.layers.49.block_sparse_moe.experts.229.w1", "model.layers.49.block_sparse_moe.experts.230.w1", "model.layers.49.block_sparse_moe.experts.231.w1", "model.layers.49.block_sparse_moe.experts.232.w1", "model.layers.49.block_sparse_moe.experts.233.w1", "model.layers.49.block_sparse_moe.experts.234.w1", "model.layers.49.block_sparse_moe.experts.235.w1", "model.layers.49.block_sparse_moe.experts.236.w1", "model.layers.49.block_sparse_moe.experts.237.w1", "model.layers.49.block_sparse_moe.experts.238.w1", "model.layers.49.block_sparse_moe.experts.239.w1", "model.layers.49.block_sparse_moe.experts.240.w1", "model.layers.49.block_sparse_moe.experts.241.w1", "model.layers.49.block_sparse_moe.experts.242.w1", "model.layers.49.block_sparse_moe.experts.243.w1", "model.layers.49.block_sparse_moe.experts.244.w1", "model.layers.49.block_sparse_moe.experts.245.w1", "model.layers.49.block_sparse_moe.experts.246.w1", "model.layers.49.block_sparse_moe.experts.247.w1", "model.layers.49.block_sparse_moe.experts.248.w1", "model.layers.49.block_sparse_moe.experts.249.w1", "model.layers.49.block_sparse_moe.experts.250.w1", "model.layers.49.block_sparse_moe.experts.251.w1", "model.layers.49.block_sparse_moe.experts.252.w1", "model.layers.49.block_sparse_moe.experts.253.w1", "model.layers.49.block_sparse_moe.experts.254.w1", "model.layers.49.block_sparse_moe.experts.255.w1", "model.layers.49.block_sparse_moe.experts.0.w3", "model.layers.49.block_sparse_moe.experts.1.w3", "model.layers.49.block_sparse_moe.experts.2.w3", "model.layers.49.block_sparse_moe.experts.3.w3", "model.layers.49.block_sparse_moe.experts.4.w3", "model.layers.49.block_sparse_moe.experts.5.w3", "model.layers.49.block_sparse_moe.experts.6.w3", "model.layers.49.block_sparse_moe.experts.7.w3", "model.layers.49.block_sparse_moe.experts.8.w3", "model.layers.49.block_sparse_moe.experts.9.w3", "model.layers.49.block_sparse_moe.experts.10.w3", "model.layers.49.block_sparse_moe.experts.11.w3", "model.layers.49.block_sparse_moe.experts.12.w3", "model.layers.49.block_sparse_moe.experts.13.w3", "model.layers.49.block_sparse_moe.experts.14.w3", "model.layers.49.block_sparse_moe.experts.15.w3", "model.layers.49.block_sparse_moe.experts.16.w3", "model.layers.49.block_sparse_moe.experts.17.w3", "model.layers.49.block_sparse_moe.experts.18.w3", "model.layers.49.block_sparse_moe.experts.19.w3", "model.layers.49.block_sparse_moe.experts.20.w3", "model.layers.49.block_sparse_moe.experts.21.w3", "model.layers.49.block_sparse_moe.experts.22.w3", "model.layers.49.block_sparse_moe.experts.23.w3", "model.layers.49.block_sparse_moe.experts.24.w3", "model.layers.49.block_sparse_moe.experts.25.w3", "model.layers.49.block_sparse_moe.experts.26.w3", "model.layers.49.block_sparse_moe.experts.27.w3", "model.layers.49.block_sparse_moe.experts.28.w3", "model.layers.49.block_sparse_moe.experts.29.w3", "model.layers.49.block_sparse_moe.experts.30.w3", "model.layers.49.block_sparse_moe.experts.31.w3", "model.layers.49.block_sparse_moe.experts.32.w3", "model.layers.49.block_sparse_moe.experts.33.w3", "model.layers.49.block_sparse_moe.experts.34.w3", "model.layers.49.block_sparse_moe.experts.35.w3", "model.layers.49.block_sparse_moe.experts.36.w3", "model.layers.49.block_sparse_moe.experts.37.w3", "model.layers.49.block_sparse_moe.experts.38.w3", "model.layers.49.block_sparse_moe.experts.39.w3", "model.layers.49.block_sparse_moe.experts.40.w3", "model.layers.49.block_sparse_moe.experts.41.w3", "model.layers.49.block_sparse_moe.experts.42.w3", "model.layers.49.block_sparse_moe.experts.43.w3", "model.layers.49.block_sparse_moe.experts.44.w3", "model.layers.49.block_sparse_moe.experts.45.w3", "model.layers.49.block_sparse_moe.experts.46.w3", "model.layers.49.block_sparse_moe.experts.47.w3", "model.layers.49.block_sparse_moe.experts.48.w3", "model.layers.49.block_sparse_moe.experts.49.w3", "model.layers.49.block_sparse_moe.experts.50.w3", "model.layers.49.block_sparse_moe.experts.51.w3", "model.layers.49.block_sparse_moe.experts.52.w3", "model.layers.49.block_sparse_moe.experts.53.w3", "model.layers.49.block_sparse_moe.experts.54.w3", "model.layers.49.block_sparse_moe.experts.55.w3", "model.layers.49.block_sparse_moe.experts.56.w3", "model.layers.49.block_sparse_moe.experts.57.w3", "model.layers.49.block_sparse_moe.experts.58.w3", "model.layers.49.block_sparse_moe.experts.59.w3", "model.layers.49.block_sparse_moe.experts.60.w3", "model.layers.49.block_sparse_moe.experts.61.w3", "model.layers.49.block_sparse_moe.experts.62.w3", "model.layers.49.block_sparse_moe.experts.63.w3", "model.layers.49.block_sparse_moe.experts.64.w3", "model.layers.49.block_sparse_moe.experts.65.w3", "model.layers.49.block_sparse_moe.experts.66.w3", "model.layers.49.block_sparse_moe.experts.67.w3", "model.layers.49.block_sparse_moe.experts.68.w3", "model.layers.49.block_sparse_moe.experts.69.w3", "model.layers.49.block_sparse_moe.experts.70.w3", "model.layers.49.block_sparse_moe.experts.71.w3", "model.layers.49.block_sparse_moe.experts.72.w3", "model.layers.49.block_sparse_moe.experts.73.w3", "model.layers.49.block_sparse_moe.experts.74.w3", "model.layers.49.block_sparse_moe.experts.75.w3", "model.layers.49.block_sparse_moe.experts.76.w3", "model.layers.49.block_sparse_moe.experts.77.w3", "model.layers.49.block_sparse_moe.experts.78.w3", "model.layers.49.block_sparse_moe.experts.79.w3", "model.layers.49.block_sparse_moe.experts.80.w3", "model.layers.49.block_sparse_moe.experts.81.w3", "model.layers.49.block_sparse_moe.experts.82.w3", "model.layers.49.block_sparse_moe.experts.83.w3", "model.layers.49.block_sparse_moe.experts.84.w3", "model.layers.49.block_sparse_moe.experts.85.w3", "model.layers.49.block_sparse_moe.experts.86.w3", "model.layers.49.block_sparse_moe.experts.87.w3", "model.layers.49.block_sparse_moe.experts.88.w3", "model.layers.49.block_sparse_moe.experts.89.w3", "model.layers.49.block_sparse_moe.experts.90.w3", "model.layers.49.block_sparse_moe.experts.91.w3", "model.layers.49.block_sparse_moe.experts.92.w3", "model.layers.49.block_sparse_moe.experts.93.w3", "model.layers.49.block_sparse_moe.experts.94.w3", "model.layers.49.block_sparse_moe.experts.95.w3", "model.layers.49.block_sparse_moe.experts.96.w3", "model.layers.49.block_sparse_moe.experts.97.w3", "model.layers.49.block_sparse_moe.experts.98.w3", "model.layers.49.block_sparse_moe.experts.99.w3", "model.layers.49.block_sparse_moe.experts.100.w3", "model.layers.49.block_sparse_moe.experts.101.w3", "model.layers.49.block_sparse_moe.experts.102.w3", "model.layers.49.block_sparse_moe.experts.103.w3", "model.layers.49.block_sparse_moe.experts.104.w3", "model.layers.49.block_sparse_moe.experts.105.w3", "model.layers.49.block_sparse_moe.experts.106.w3", "model.layers.49.block_sparse_moe.experts.107.w3", "model.layers.49.block_sparse_moe.experts.108.w3", "model.layers.49.block_sparse_moe.experts.109.w3", "model.layers.49.block_sparse_moe.experts.110.w3", "model.layers.49.block_sparse_moe.experts.111.w3", "model.layers.49.block_sparse_moe.experts.112.w3", "model.layers.49.block_sparse_moe.experts.113.w3", "model.layers.49.block_sparse_moe.experts.114.w3", "model.layers.49.block_sparse_moe.experts.115.w3", "model.layers.49.block_sparse_moe.experts.116.w3", "model.layers.49.block_sparse_moe.experts.117.w3", "model.layers.49.block_sparse_moe.experts.118.w3", "model.layers.49.block_sparse_moe.experts.119.w3", "model.layers.49.block_sparse_moe.experts.120.w3", "model.layers.49.block_sparse_moe.experts.121.w3", "model.layers.49.block_sparse_moe.experts.122.w3", "model.layers.49.block_sparse_moe.experts.123.w3", "model.layers.49.block_sparse_moe.experts.124.w3", "model.layers.49.block_sparse_moe.experts.125.w3", "model.layers.49.block_sparse_moe.experts.126.w3", "model.layers.49.block_sparse_moe.experts.127.w3", "model.layers.49.block_sparse_moe.experts.128.w3", "model.layers.49.block_sparse_moe.experts.129.w3", "model.layers.49.block_sparse_moe.experts.130.w3", "model.layers.49.block_sparse_moe.experts.131.w3", "model.layers.49.block_sparse_moe.experts.132.w3", "model.layers.49.block_sparse_moe.experts.133.w3", "model.layers.49.block_sparse_moe.experts.134.w3", "model.layers.49.block_sparse_moe.experts.135.w3", "model.layers.49.block_sparse_moe.experts.136.w3", "model.layers.49.block_sparse_moe.experts.137.w3", "model.layers.49.block_sparse_moe.experts.138.w3", "model.layers.49.block_sparse_moe.experts.139.w3", "model.layers.49.block_sparse_moe.experts.140.w3", "model.layers.49.block_sparse_moe.experts.141.w3", "model.layers.49.block_sparse_moe.experts.142.w3", "model.layers.49.block_sparse_moe.experts.143.w3", "model.layers.49.block_sparse_moe.experts.144.w3", "model.layers.49.block_sparse_moe.experts.145.w3", "model.layers.49.block_sparse_moe.experts.146.w3", "model.layers.49.block_sparse_moe.experts.147.w3", "model.layers.49.block_sparse_moe.experts.148.w3", "model.layers.49.block_sparse_moe.experts.149.w3", "model.layers.49.block_sparse_moe.experts.150.w3", "model.layers.49.block_sparse_moe.experts.151.w3", "model.layers.49.block_sparse_moe.experts.152.w3", "model.layers.49.block_sparse_moe.experts.153.w3", "model.layers.49.block_sparse_moe.experts.154.w3", "model.layers.49.block_sparse_moe.experts.155.w3", "model.layers.49.block_sparse_moe.experts.156.w3", "model.layers.49.block_sparse_moe.experts.157.w3", "model.layers.49.block_sparse_moe.experts.158.w3", "model.layers.49.block_sparse_moe.experts.159.w3", "model.layers.49.block_sparse_moe.experts.160.w3", "model.layers.49.block_sparse_moe.experts.161.w3", "model.layers.49.block_sparse_moe.experts.162.w3", "model.layers.49.block_sparse_moe.experts.163.w3", "model.layers.49.block_sparse_moe.experts.164.w3", "model.layers.49.block_sparse_moe.experts.165.w3", "model.layers.49.block_sparse_moe.experts.166.w3", "model.layers.49.block_sparse_moe.experts.167.w3", "model.layers.49.block_sparse_moe.experts.168.w3", "model.layers.49.block_sparse_moe.experts.169.w3", "model.layers.49.block_sparse_moe.experts.170.w3", "model.layers.49.block_sparse_moe.experts.171.w3", "model.layers.49.block_sparse_moe.experts.172.w3", "model.layers.49.block_sparse_moe.experts.173.w3", "model.layers.49.block_sparse_moe.experts.174.w3", "model.layers.49.block_sparse_moe.experts.175.w3", "model.layers.49.block_sparse_moe.experts.176.w3", "model.layers.49.block_sparse_moe.experts.177.w3", "model.layers.49.block_sparse_moe.experts.178.w3", "model.layers.49.block_sparse_moe.experts.179.w3", "model.layers.49.block_sparse_moe.experts.180.w3", "model.layers.49.block_sparse_moe.experts.181.w3", "model.layers.49.block_sparse_moe.experts.182.w3", "model.layers.49.block_sparse_moe.experts.183.w3", "model.layers.49.block_sparse_moe.experts.184.w3", "model.layers.49.block_sparse_moe.experts.185.w3", "model.layers.49.block_sparse_moe.experts.186.w3", "model.layers.49.block_sparse_moe.experts.187.w3", "model.layers.49.block_sparse_moe.experts.188.w3", "model.layers.49.block_sparse_moe.experts.189.w3", "model.layers.49.block_sparse_moe.experts.190.w3", "model.layers.49.block_sparse_moe.experts.191.w3", "model.layers.49.block_sparse_moe.experts.192.w3", "model.layers.49.block_sparse_moe.experts.193.w3", "model.layers.49.block_sparse_moe.experts.194.w3", "model.layers.49.block_sparse_moe.experts.195.w3", "model.layers.49.block_sparse_moe.experts.196.w3", "model.layers.49.block_sparse_moe.experts.197.w3", "model.layers.49.block_sparse_moe.experts.198.w3", "model.layers.49.block_sparse_moe.experts.199.w3", "model.layers.49.block_sparse_moe.experts.200.w3", "model.layers.49.block_sparse_moe.experts.201.w3", "model.layers.49.block_sparse_moe.experts.202.w3", "model.layers.49.block_sparse_moe.experts.203.w3", "model.layers.49.block_sparse_moe.experts.204.w3", "model.layers.49.block_sparse_moe.experts.205.w3", "model.layers.49.block_sparse_moe.experts.206.w3", "model.layers.49.block_sparse_moe.experts.207.w3", "model.layers.49.block_sparse_moe.experts.208.w3", "model.layers.49.block_sparse_moe.experts.209.w3", "model.layers.49.block_sparse_moe.experts.210.w3", "model.layers.49.block_sparse_moe.experts.211.w3", "model.layers.49.block_sparse_moe.experts.212.w3", "model.layers.49.block_sparse_moe.experts.213.w3", "model.layers.49.block_sparse_moe.experts.214.w3", "model.layers.49.block_sparse_moe.experts.215.w3", "model.layers.49.block_sparse_moe.experts.216.w3", "model.layers.49.block_sparse_moe.experts.217.w3", "model.layers.49.block_sparse_moe.experts.218.w3", "model.layers.49.block_sparse_moe.experts.219.w3", "model.layers.49.block_sparse_moe.experts.220.w3", "model.layers.49.block_sparse_moe.experts.221.w3", "model.layers.49.block_sparse_moe.experts.222.w3", "model.layers.49.block_sparse_moe.experts.223.w3", "model.layers.49.block_sparse_moe.experts.224.w3", "model.layers.49.block_sparse_moe.experts.225.w3", "model.layers.49.block_sparse_moe.experts.226.w3", "model.layers.49.block_sparse_moe.experts.227.w3", "model.layers.49.block_sparse_moe.experts.228.w3", "model.layers.49.block_sparse_moe.experts.229.w3", "model.layers.49.block_sparse_moe.experts.230.w3", "model.layers.49.block_sparse_moe.experts.231.w3", "model.layers.49.block_sparse_moe.experts.232.w3", "model.layers.49.block_sparse_moe.experts.233.w3", "model.layers.49.block_sparse_moe.experts.234.w3", "model.layers.49.block_sparse_moe.experts.235.w3", "model.layers.49.block_sparse_moe.experts.236.w3", "model.layers.49.block_sparse_moe.experts.237.w3", "model.layers.49.block_sparse_moe.experts.238.w3", "model.layers.49.block_sparse_moe.experts.239.w3", "model.layers.49.block_sparse_moe.experts.240.w3", "model.layers.49.block_sparse_moe.experts.241.w3", "model.layers.49.block_sparse_moe.experts.242.w3", "model.layers.49.block_sparse_moe.experts.243.w3", "model.layers.49.block_sparse_moe.experts.244.w3", "model.layers.49.block_sparse_moe.experts.245.w3", "model.layers.49.block_sparse_moe.experts.246.w3", "model.layers.49.block_sparse_moe.experts.247.w3", "model.layers.49.block_sparse_moe.experts.248.w3", "model.layers.49.block_sparse_moe.experts.249.w3", "model.layers.49.block_sparse_moe.experts.250.w3", "model.layers.49.block_sparse_moe.experts.251.w3", "model.layers.49.block_sparse_moe.experts.252.w3", "model.layers.49.block_sparse_moe.experts.253.w3", "model.layers.49.block_sparse_moe.experts.254.w3", "model.layers.49.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 2.8590857982624396e-05, "dbits": 2415919104 } ] }, { "idx": 249, "layers": [ "model.layers.49.block_sparse_moe.experts.0.w2", "model.layers.49.block_sparse_moe.experts.1.w2", "model.layers.49.block_sparse_moe.experts.2.w2", "model.layers.49.block_sparse_moe.experts.3.w2", "model.layers.49.block_sparse_moe.experts.4.w2", "model.layers.49.block_sparse_moe.experts.5.w2", "model.layers.49.block_sparse_moe.experts.6.w2", "model.layers.49.block_sparse_moe.experts.7.w2", "model.layers.49.block_sparse_moe.experts.8.w2", "model.layers.49.block_sparse_moe.experts.9.w2", "model.layers.49.block_sparse_moe.experts.10.w2", "model.layers.49.block_sparse_moe.experts.11.w2", "model.layers.49.block_sparse_moe.experts.12.w2", "model.layers.49.block_sparse_moe.experts.13.w2", "model.layers.49.block_sparse_moe.experts.14.w2", "model.layers.49.block_sparse_moe.experts.15.w2", "model.layers.49.block_sparse_moe.experts.16.w2", "model.layers.49.block_sparse_moe.experts.17.w2", "model.layers.49.block_sparse_moe.experts.18.w2", "model.layers.49.block_sparse_moe.experts.19.w2", "model.layers.49.block_sparse_moe.experts.20.w2", "model.layers.49.block_sparse_moe.experts.21.w2", "model.layers.49.block_sparse_moe.experts.22.w2", "model.layers.49.block_sparse_moe.experts.23.w2", "model.layers.49.block_sparse_moe.experts.24.w2", "model.layers.49.block_sparse_moe.experts.25.w2", "model.layers.49.block_sparse_moe.experts.26.w2", "model.layers.49.block_sparse_moe.experts.27.w2", "model.layers.49.block_sparse_moe.experts.28.w2", "model.layers.49.block_sparse_moe.experts.29.w2", "model.layers.49.block_sparse_moe.experts.30.w2", "model.layers.49.block_sparse_moe.experts.31.w2", "model.layers.49.block_sparse_moe.experts.32.w2", "model.layers.49.block_sparse_moe.experts.33.w2", "model.layers.49.block_sparse_moe.experts.34.w2", "model.layers.49.block_sparse_moe.experts.35.w2", "model.layers.49.block_sparse_moe.experts.36.w2", "model.layers.49.block_sparse_moe.experts.37.w2", "model.layers.49.block_sparse_moe.experts.38.w2", "model.layers.49.block_sparse_moe.experts.39.w2", "model.layers.49.block_sparse_moe.experts.40.w2", "model.layers.49.block_sparse_moe.experts.41.w2", "model.layers.49.block_sparse_moe.experts.42.w2", "model.layers.49.block_sparse_moe.experts.43.w2", "model.layers.49.block_sparse_moe.experts.44.w2", "model.layers.49.block_sparse_moe.experts.45.w2", "model.layers.49.block_sparse_moe.experts.46.w2", "model.layers.49.block_sparse_moe.experts.47.w2", "model.layers.49.block_sparse_moe.experts.48.w2", "model.layers.49.block_sparse_moe.experts.49.w2", "model.layers.49.block_sparse_moe.experts.50.w2", "model.layers.49.block_sparse_moe.experts.51.w2", "model.layers.49.block_sparse_moe.experts.52.w2", "model.layers.49.block_sparse_moe.experts.53.w2", "model.layers.49.block_sparse_moe.experts.54.w2", "model.layers.49.block_sparse_moe.experts.55.w2", "model.layers.49.block_sparse_moe.experts.56.w2", "model.layers.49.block_sparse_moe.experts.57.w2", "model.layers.49.block_sparse_moe.experts.58.w2", "model.layers.49.block_sparse_moe.experts.59.w2", "model.layers.49.block_sparse_moe.experts.60.w2", "model.layers.49.block_sparse_moe.experts.61.w2", "model.layers.49.block_sparse_moe.experts.62.w2", "model.layers.49.block_sparse_moe.experts.63.w2", "model.layers.49.block_sparse_moe.experts.64.w2", "model.layers.49.block_sparse_moe.experts.65.w2", "model.layers.49.block_sparse_moe.experts.66.w2", "model.layers.49.block_sparse_moe.experts.67.w2", "model.layers.49.block_sparse_moe.experts.68.w2", "model.layers.49.block_sparse_moe.experts.69.w2", "model.layers.49.block_sparse_moe.experts.70.w2", "model.layers.49.block_sparse_moe.experts.71.w2", "model.layers.49.block_sparse_moe.experts.72.w2", "model.layers.49.block_sparse_moe.experts.73.w2", "model.layers.49.block_sparse_moe.experts.74.w2", "model.layers.49.block_sparse_moe.experts.75.w2", "model.layers.49.block_sparse_moe.experts.76.w2", "model.layers.49.block_sparse_moe.experts.77.w2", "model.layers.49.block_sparse_moe.experts.78.w2", "model.layers.49.block_sparse_moe.experts.79.w2", "model.layers.49.block_sparse_moe.experts.80.w2", "model.layers.49.block_sparse_moe.experts.81.w2", "model.layers.49.block_sparse_moe.experts.82.w2", "model.layers.49.block_sparse_moe.experts.83.w2", "model.layers.49.block_sparse_moe.experts.84.w2", "model.layers.49.block_sparse_moe.experts.85.w2", "model.layers.49.block_sparse_moe.experts.86.w2", "model.layers.49.block_sparse_moe.experts.87.w2", "model.layers.49.block_sparse_moe.experts.88.w2", "model.layers.49.block_sparse_moe.experts.89.w2", "model.layers.49.block_sparse_moe.experts.90.w2", "model.layers.49.block_sparse_moe.experts.91.w2", "model.layers.49.block_sparse_moe.experts.92.w2", "model.layers.49.block_sparse_moe.experts.93.w2", "model.layers.49.block_sparse_moe.experts.94.w2", "model.layers.49.block_sparse_moe.experts.95.w2", "model.layers.49.block_sparse_moe.experts.96.w2", "model.layers.49.block_sparse_moe.experts.97.w2", "model.layers.49.block_sparse_moe.experts.98.w2", "model.layers.49.block_sparse_moe.experts.99.w2", "model.layers.49.block_sparse_moe.experts.100.w2", "model.layers.49.block_sparse_moe.experts.101.w2", "model.layers.49.block_sparse_moe.experts.102.w2", "model.layers.49.block_sparse_moe.experts.103.w2", "model.layers.49.block_sparse_moe.experts.104.w2", "model.layers.49.block_sparse_moe.experts.105.w2", "model.layers.49.block_sparse_moe.experts.106.w2", "model.layers.49.block_sparse_moe.experts.107.w2", "model.layers.49.block_sparse_moe.experts.108.w2", "model.layers.49.block_sparse_moe.experts.109.w2", "model.layers.49.block_sparse_moe.experts.110.w2", "model.layers.49.block_sparse_moe.experts.111.w2", "model.layers.49.block_sparse_moe.experts.112.w2", "model.layers.49.block_sparse_moe.experts.113.w2", "model.layers.49.block_sparse_moe.experts.114.w2", "model.layers.49.block_sparse_moe.experts.115.w2", "model.layers.49.block_sparse_moe.experts.116.w2", "model.layers.49.block_sparse_moe.experts.117.w2", "model.layers.49.block_sparse_moe.experts.118.w2", "model.layers.49.block_sparse_moe.experts.119.w2", "model.layers.49.block_sparse_moe.experts.120.w2", "model.layers.49.block_sparse_moe.experts.121.w2", "model.layers.49.block_sparse_moe.experts.122.w2", "model.layers.49.block_sparse_moe.experts.123.w2", "model.layers.49.block_sparse_moe.experts.124.w2", "model.layers.49.block_sparse_moe.experts.125.w2", "model.layers.49.block_sparse_moe.experts.126.w2", "model.layers.49.block_sparse_moe.experts.127.w2", "model.layers.49.block_sparse_moe.experts.128.w2", "model.layers.49.block_sparse_moe.experts.129.w2", "model.layers.49.block_sparse_moe.experts.130.w2", "model.layers.49.block_sparse_moe.experts.131.w2", "model.layers.49.block_sparse_moe.experts.132.w2", "model.layers.49.block_sparse_moe.experts.133.w2", "model.layers.49.block_sparse_moe.experts.134.w2", "model.layers.49.block_sparse_moe.experts.135.w2", "model.layers.49.block_sparse_moe.experts.136.w2", "model.layers.49.block_sparse_moe.experts.137.w2", "model.layers.49.block_sparse_moe.experts.138.w2", "model.layers.49.block_sparse_moe.experts.139.w2", "model.layers.49.block_sparse_moe.experts.140.w2", "model.layers.49.block_sparse_moe.experts.141.w2", "model.layers.49.block_sparse_moe.experts.142.w2", "model.layers.49.block_sparse_moe.experts.143.w2", "model.layers.49.block_sparse_moe.experts.144.w2", "model.layers.49.block_sparse_moe.experts.145.w2", "model.layers.49.block_sparse_moe.experts.146.w2", "model.layers.49.block_sparse_moe.experts.147.w2", "model.layers.49.block_sparse_moe.experts.148.w2", "model.layers.49.block_sparse_moe.experts.149.w2", "model.layers.49.block_sparse_moe.experts.150.w2", "model.layers.49.block_sparse_moe.experts.151.w2", "model.layers.49.block_sparse_moe.experts.152.w2", "model.layers.49.block_sparse_moe.experts.153.w2", "model.layers.49.block_sparse_moe.experts.154.w2", "model.layers.49.block_sparse_moe.experts.155.w2", "model.layers.49.block_sparse_moe.experts.156.w2", "model.layers.49.block_sparse_moe.experts.157.w2", "model.layers.49.block_sparse_moe.experts.158.w2", "model.layers.49.block_sparse_moe.experts.159.w2", "model.layers.49.block_sparse_moe.experts.160.w2", "model.layers.49.block_sparse_moe.experts.161.w2", "model.layers.49.block_sparse_moe.experts.162.w2", "model.layers.49.block_sparse_moe.experts.163.w2", "model.layers.49.block_sparse_moe.experts.164.w2", "model.layers.49.block_sparse_moe.experts.165.w2", "model.layers.49.block_sparse_moe.experts.166.w2", "model.layers.49.block_sparse_moe.experts.167.w2", "model.layers.49.block_sparse_moe.experts.168.w2", "model.layers.49.block_sparse_moe.experts.169.w2", "model.layers.49.block_sparse_moe.experts.170.w2", "model.layers.49.block_sparse_moe.experts.171.w2", "model.layers.49.block_sparse_moe.experts.172.w2", "model.layers.49.block_sparse_moe.experts.173.w2", "model.layers.49.block_sparse_moe.experts.174.w2", "model.layers.49.block_sparse_moe.experts.175.w2", "model.layers.49.block_sparse_moe.experts.176.w2", "model.layers.49.block_sparse_moe.experts.177.w2", "model.layers.49.block_sparse_moe.experts.178.w2", "model.layers.49.block_sparse_moe.experts.179.w2", "model.layers.49.block_sparse_moe.experts.180.w2", "model.layers.49.block_sparse_moe.experts.181.w2", "model.layers.49.block_sparse_moe.experts.182.w2", "model.layers.49.block_sparse_moe.experts.183.w2", "model.layers.49.block_sparse_moe.experts.184.w2", "model.layers.49.block_sparse_moe.experts.185.w2", "model.layers.49.block_sparse_moe.experts.186.w2", "model.layers.49.block_sparse_moe.experts.187.w2", "model.layers.49.block_sparse_moe.experts.188.w2", "model.layers.49.block_sparse_moe.experts.189.w2", "model.layers.49.block_sparse_moe.experts.190.w2", "model.layers.49.block_sparse_moe.experts.191.w2", "model.layers.49.block_sparse_moe.experts.192.w2", "model.layers.49.block_sparse_moe.experts.193.w2", "model.layers.49.block_sparse_moe.experts.194.w2", "model.layers.49.block_sparse_moe.experts.195.w2", "model.layers.49.block_sparse_moe.experts.196.w2", "model.layers.49.block_sparse_moe.experts.197.w2", "model.layers.49.block_sparse_moe.experts.198.w2", "model.layers.49.block_sparse_moe.experts.199.w2", "model.layers.49.block_sparse_moe.experts.200.w2", "model.layers.49.block_sparse_moe.experts.201.w2", "model.layers.49.block_sparse_moe.experts.202.w2", "model.layers.49.block_sparse_moe.experts.203.w2", "model.layers.49.block_sparse_moe.experts.204.w2", "model.layers.49.block_sparse_moe.experts.205.w2", "model.layers.49.block_sparse_moe.experts.206.w2", "model.layers.49.block_sparse_moe.experts.207.w2", "model.layers.49.block_sparse_moe.experts.208.w2", "model.layers.49.block_sparse_moe.experts.209.w2", "model.layers.49.block_sparse_moe.experts.210.w2", "model.layers.49.block_sparse_moe.experts.211.w2", "model.layers.49.block_sparse_moe.experts.212.w2", "model.layers.49.block_sparse_moe.experts.213.w2", "model.layers.49.block_sparse_moe.experts.214.w2", "model.layers.49.block_sparse_moe.experts.215.w2", "model.layers.49.block_sparse_moe.experts.216.w2", "model.layers.49.block_sparse_moe.experts.217.w2", "model.layers.49.block_sparse_moe.experts.218.w2", "model.layers.49.block_sparse_moe.experts.219.w2", "model.layers.49.block_sparse_moe.experts.220.w2", "model.layers.49.block_sparse_moe.experts.221.w2", "model.layers.49.block_sparse_moe.experts.222.w2", "model.layers.49.block_sparse_moe.experts.223.w2", "model.layers.49.block_sparse_moe.experts.224.w2", "model.layers.49.block_sparse_moe.experts.225.w2", "model.layers.49.block_sparse_moe.experts.226.w2", "model.layers.49.block_sparse_moe.experts.227.w2", "model.layers.49.block_sparse_moe.experts.228.w2", "model.layers.49.block_sparse_moe.experts.229.w2", "model.layers.49.block_sparse_moe.experts.230.w2", "model.layers.49.block_sparse_moe.experts.231.w2", "model.layers.49.block_sparse_moe.experts.232.w2", "model.layers.49.block_sparse_moe.experts.233.w2", "model.layers.49.block_sparse_moe.experts.234.w2", "model.layers.49.block_sparse_moe.experts.235.w2", "model.layers.49.block_sparse_moe.experts.236.w2", "model.layers.49.block_sparse_moe.experts.237.w2", "model.layers.49.block_sparse_moe.experts.238.w2", "model.layers.49.block_sparse_moe.experts.239.w2", "model.layers.49.block_sparse_moe.experts.240.w2", "model.layers.49.block_sparse_moe.experts.241.w2", "model.layers.49.block_sparse_moe.experts.242.w2", "model.layers.49.block_sparse_moe.experts.243.w2", "model.layers.49.block_sparse_moe.experts.244.w2", "model.layers.49.block_sparse_moe.experts.245.w2", "model.layers.49.block_sparse_moe.experts.246.w2", "model.layers.49.block_sparse_moe.experts.247.w2", "model.layers.49.block_sparse_moe.experts.248.w2", "model.layers.49.block_sparse_moe.experts.249.w2", "model.layers.49.block_sparse_moe.experts.250.w2", "model.layers.49.block_sparse_moe.experts.251.w2", "model.layers.49.block_sparse_moe.experts.252.w2", "model.layers.49.block_sparse_moe.experts.253.w2", "model.layers.49.block_sparse_moe.experts.254.w2", "model.layers.49.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 0.00013251639902589485, "dbits": 1207959552 } ] }, { "idx": 250, "layers": [ "model.layers.50.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0002988036721944809, "dbits": 18874368 } ] }, { "idx": 251, "layers": [ "model.layers.50.self_attn.k_proj", "model.layers.50.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0013406947255134583, "dbits": 6291456 } ] }, { "idx": 252, "layers": [ "model.layers.50.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0007862664759158977, "dbits": 18874368 } ] }, { "idx": 253, "layers": [ "model.layers.50.block_sparse_moe.experts.0.w1", "model.layers.50.block_sparse_moe.experts.1.w1", "model.layers.50.block_sparse_moe.experts.2.w1", "model.layers.50.block_sparse_moe.experts.3.w1", "model.layers.50.block_sparse_moe.experts.4.w1", "model.layers.50.block_sparse_moe.experts.5.w1", "model.layers.50.block_sparse_moe.experts.6.w1", "model.layers.50.block_sparse_moe.experts.7.w1", "model.layers.50.block_sparse_moe.experts.8.w1", "model.layers.50.block_sparse_moe.experts.9.w1", "model.layers.50.block_sparse_moe.experts.10.w1", "model.layers.50.block_sparse_moe.experts.11.w1", "model.layers.50.block_sparse_moe.experts.12.w1", "model.layers.50.block_sparse_moe.experts.13.w1", "model.layers.50.block_sparse_moe.experts.14.w1", "model.layers.50.block_sparse_moe.experts.15.w1", "model.layers.50.block_sparse_moe.experts.16.w1", "model.layers.50.block_sparse_moe.experts.17.w1", "model.layers.50.block_sparse_moe.experts.18.w1", "model.layers.50.block_sparse_moe.experts.19.w1", "model.layers.50.block_sparse_moe.experts.20.w1", "model.layers.50.block_sparse_moe.experts.21.w1", "model.layers.50.block_sparse_moe.experts.22.w1", "model.layers.50.block_sparse_moe.experts.23.w1", "model.layers.50.block_sparse_moe.experts.24.w1", "model.layers.50.block_sparse_moe.experts.25.w1", "model.layers.50.block_sparse_moe.experts.26.w1", "model.layers.50.block_sparse_moe.experts.27.w1", "model.layers.50.block_sparse_moe.experts.28.w1", "model.layers.50.block_sparse_moe.experts.29.w1", "model.layers.50.block_sparse_moe.experts.30.w1", "model.layers.50.block_sparse_moe.experts.31.w1", "model.layers.50.block_sparse_moe.experts.32.w1", "model.layers.50.block_sparse_moe.experts.33.w1", "model.layers.50.block_sparse_moe.experts.34.w1", "model.layers.50.block_sparse_moe.experts.35.w1", "model.layers.50.block_sparse_moe.experts.36.w1", "model.layers.50.block_sparse_moe.experts.37.w1", "model.layers.50.block_sparse_moe.experts.38.w1", "model.layers.50.block_sparse_moe.experts.39.w1", "model.layers.50.block_sparse_moe.experts.40.w1", "model.layers.50.block_sparse_moe.experts.41.w1", "model.layers.50.block_sparse_moe.experts.42.w1", "model.layers.50.block_sparse_moe.experts.43.w1", "model.layers.50.block_sparse_moe.experts.44.w1", "model.layers.50.block_sparse_moe.experts.45.w1", "model.layers.50.block_sparse_moe.experts.46.w1", "model.layers.50.block_sparse_moe.experts.47.w1", "model.layers.50.block_sparse_moe.experts.48.w1", "model.layers.50.block_sparse_moe.experts.49.w1", "model.layers.50.block_sparse_moe.experts.50.w1", "model.layers.50.block_sparse_moe.experts.51.w1", "model.layers.50.block_sparse_moe.experts.52.w1", "model.layers.50.block_sparse_moe.experts.53.w1", "model.layers.50.block_sparse_moe.experts.54.w1", "model.layers.50.block_sparse_moe.experts.55.w1", "model.layers.50.block_sparse_moe.experts.56.w1", "model.layers.50.block_sparse_moe.experts.57.w1", "model.layers.50.block_sparse_moe.experts.58.w1", "model.layers.50.block_sparse_moe.experts.59.w1", "model.layers.50.block_sparse_moe.experts.60.w1", "model.layers.50.block_sparse_moe.experts.61.w1", "model.layers.50.block_sparse_moe.experts.62.w1", "model.layers.50.block_sparse_moe.experts.63.w1", "model.layers.50.block_sparse_moe.experts.64.w1", "model.layers.50.block_sparse_moe.experts.65.w1", "model.layers.50.block_sparse_moe.experts.66.w1", "model.layers.50.block_sparse_moe.experts.67.w1", "model.layers.50.block_sparse_moe.experts.68.w1", "model.layers.50.block_sparse_moe.experts.69.w1", "model.layers.50.block_sparse_moe.experts.70.w1", "model.layers.50.block_sparse_moe.experts.71.w1", "model.layers.50.block_sparse_moe.experts.72.w1", "model.layers.50.block_sparse_moe.experts.73.w1", "model.layers.50.block_sparse_moe.experts.74.w1", "model.layers.50.block_sparse_moe.experts.75.w1", "model.layers.50.block_sparse_moe.experts.76.w1", "model.layers.50.block_sparse_moe.experts.77.w1", "model.layers.50.block_sparse_moe.experts.78.w1", "model.layers.50.block_sparse_moe.experts.79.w1", "model.layers.50.block_sparse_moe.experts.80.w1", "model.layers.50.block_sparse_moe.experts.81.w1", "model.layers.50.block_sparse_moe.experts.82.w1", "model.layers.50.block_sparse_moe.experts.83.w1", "model.layers.50.block_sparse_moe.experts.84.w1", "model.layers.50.block_sparse_moe.experts.85.w1", "model.layers.50.block_sparse_moe.experts.86.w1", "model.layers.50.block_sparse_moe.experts.87.w1", "model.layers.50.block_sparse_moe.experts.88.w1", "model.layers.50.block_sparse_moe.experts.89.w1", "model.layers.50.block_sparse_moe.experts.90.w1", "model.layers.50.block_sparse_moe.experts.91.w1", "model.layers.50.block_sparse_moe.experts.92.w1", "model.layers.50.block_sparse_moe.experts.93.w1", "model.layers.50.block_sparse_moe.experts.94.w1", "model.layers.50.block_sparse_moe.experts.95.w1", "model.layers.50.block_sparse_moe.experts.96.w1", "model.layers.50.block_sparse_moe.experts.97.w1", "model.layers.50.block_sparse_moe.experts.98.w1", "model.layers.50.block_sparse_moe.experts.99.w1", "model.layers.50.block_sparse_moe.experts.100.w1", "model.layers.50.block_sparse_moe.experts.101.w1", "model.layers.50.block_sparse_moe.experts.102.w1", "model.layers.50.block_sparse_moe.experts.103.w1", "model.layers.50.block_sparse_moe.experts.104.w1", "model.layers.50.block_sparse_moe.experts.105.w1", "model.layers.50.block_sparse_moe.experts.106.w1", "model.layers.50.block_sparse_moe.experts.107.w1", "model.layers.50.block_sparse_moe.experts.108.w1", "model.layers.50.block_sparse_moe.experts.109.w1", "model.layers.50.block_sparse_moe.experts.110.w1", "model.layers.50.block_sparse_moe.experts.111.w1", "model.layers.50.block_sparse_moe.experts.112.w1", "model.layers.50.block_sparse_moe.experts.113.w1", "model.layers.50.block_sparse_moe.experts.114.w1", "model.layers.50.block_sparse_moe.experts.115.w1", "model.layers.50.block_sparse_moe.experts.116.w1", "model.layers.50.block_sparse_moe.experts.117.w1", "model.layers.50.block_sparse_moe.experts.118.w1", "model.layers.50.block_sparse_moe.experts.119.w1", "model.layers.50.block_sparse_moe.experts.120.w1", "model.layers.50.block_sparse_moe.experts.121.w1", "model.layers.50.block_sparse_moe.experts.122.w1", "model.layers.50.block_sparse_moe.experts.123.w1", "model.layers.50.block_sparse_moe.experts.124.w1", "model.layers.50.block_sparse_moe.experts.125.w1", "model.layers.50.block_sparse_moe.experts.126.w1", "model.layers.50.block_sparse_moe.experts.127.w1", "model.layers.50.block_sparse_moe.experts.128.w1", "model.layers.50.block_sparse_moe.experts.129.w1", "model.layers.50.block_sparse_moe.experts.130.w1", "model.layers.50.block_sparse_moe.experts.131.w1", "model.layers.50.block_sparse_moe.experts.132.w1", "model.layers.50.block_sparse_moe.experts.133.w1", "model.layers.50.block_sparse_moe.experts.134.w1", "model.layers.50.block_sparse_moe.experts.135.w1", "model.layers.50.block_sparse_moe.experts.136.w1", "model.layers.50.block_sparse_moe.experts.137.w1", "model.layers.50.block_sparse_moe.experts.138.w1", "model.layers.50.block_sparse_moe.experts.139.w1", "model.layers.50.block_sparse_moe.experts.140.w1", "model.layers.50.block_sparse_moe.experts.141.w1", "model.layers.50.block_sparse_moe.experts.142.w1", "model.layers.50.block_sparse_moe.experts.143.w1", "model.layers.50.block_sparse_moe.experts.144.w1", "model.layers.50.block_sparse_moe.experts.145.w1", "model.layers.50.block_sparse_moe.experts.146.w1", "model.layers.50.block_sparse_moe.experts.147.w1", "model.layers.50.block_sparse_moe.experts.148.w1", "model.layers.50.block_sparse_moe.experts.149.w1", "model.layers.50.block_sparse_moe.experts.150.w1", "model.layers.50.block_sparse_moe.experts.151.w1", "model.layers.50.block_sparse_moe.experts.152.w1", "model.layers.50.block_sparse_moe.experts.153.w1", "model.layers.50.block_sparse_moe.experts.154.w1", "model.layers.50.block_sparse_moe.experts.155.w1", "model.layers.50.block_sparse_moe.experts.156.w1", "model.layers.50.block_sparse_moe.experts.157.w1", "model.layers.50.block_sparse_moe.experts.158.w1", "model.layers.50.block_sparse_moe.experts.159.w1", "model.layers.50.block_sparse_moe.experts.160.w1", "model.layers.50.block_sparse_moe.experts.161.w1", "model.layers.50.block_sparse_moe.experts.162.w1", "model.layers.50.block_sparse_moe.experts.163.w1", "model.layers.50.block_sparse_moe.experts.164.w1", "model.layers.50.block_sparse_moe.experts.165.w1", "model.layers.50.block_sparse_moe.experts.166.w1", "model.layers.50.block_sparse_moe.experts.167.w1", "model.layers.50.block_sparse_moe.experts.168.w1", "model.layers.50.block_sparse_moe.experts.169.w1", "model.layers.50.block_sparse_moe.experts.170.w1", "model.layers.50.block_sparse_moe.experts.171.w1", "model.layers.50.block_sparse_moe.experts.172.w1", "model.layers.50.block_sparse_moe.experts.173.w1", "model.layers.50.block_sparse_moe.experts.174.w1", "model.layers.50.block_sparse_moe.experts.175.w1", "model.layers.50.block_sparse_moe.experts.176.w1", "model.layers.50.block_sparse_moe.experts.177.w1", "model.layers.50.block_sparse_moe.experts.178.w1", "model.layers.50.block_sparse_moe.experts.179.w1", "model.layers.50.block_sparse_moe.experts.180.w1", "model.layers.50.block_sparse_moe.experts.181.w1", "model.layers.50.block_sparse_moe.experts.182.w1", "model.layers.50.block_sparse_moe.experts.183.w1", "model.layers.50.block_sparse_moe.experts.184.w1", "model.layers.50.block_sparse_moe.experts.185.w1", "model.layers.50.block_sparse_moe.experts.186.w1", "model.layers.50.block_sparse_moe.experts.187.w1", "model.layers.50.block_sparse_moe.experts.188.w1", "model.layers.50.block_sparse_moe.experts.189.w1", "model.layers.50.block_sparse_moe.experts.190.w1", "model.layers.50.block_sparse_moe.experts.191.w1", "model.layers.50.block_sparse_moe.experts.192.w1", "model.layers.50.block_sparse_moe.experts.193.w1", "model.layers.50.block_sparse_moe.experts.194.w1", "model.layers.50.block_sparse_moe.experts.195.w1", "model.layers.50.block_sparse_moe.experts.196.w1", "model.layers.50.block_sparse_moe.experts.197.w1", "model.layers.50.block_sparse_moe.experts.198.w1", "model.layers.50.block_sparse_moe.experts.199.w1", "model.layers.50.block_sparse_moe.experts.200.w1", "model.layers.50.block_sparse_moe.experts.201.w1", "model.layers.50.block_sparse_moe.experts.202.w1", "model.layers.50.block_sparse_moe.experts.203.w1", "model.layers.50.block_sparse_moe.experts.204.w1", "model.layers.50.block_sparse_moe.experts.205.w1", "model.layers.50.block_sparse_moe.experts.206.w1", "model.layers.50.block_sparse_moe.experts.207.w1", "model.layers.50.block_sparse_moe.experts.208.w1", "model.layers.50.block_sparse_moe.experts.209.w1", "model.layers.50.block_sparse_moe.experts.210.w1", "model.layers.50.block_sparse_moe.experts.211.w1", "model.layers.50.block_sparse_moe.experts.212.w1", "model.layers.50.block_sparse_moe.experts.213.w1", "model.layers.50.block_sparse_moe.experts.214.w1", "model.layers.50.block_sparse_moe.experts.215.w1", "model.layers.50.block_sparse_moe.experts.216.w1", "model.layers.50.block_sparse_moe.experts.217.w1", "model.layers.50.block_sparse_moe.experts.218.w1", "model.layers.50.block_sparse_moe.experts.219.w1", "model.layers.50.block_sparse_moe.experts.220.w1", "model.layers.50.block_sparse_moe.experts.221.w1", "model.layers.50.block_sparse_moe.experts.222.w1", "model.layers.50.block_sparse_moe.experts.223.w1", "model.layers.50.block_sparse_moe.experts.224.w1", "model.layers.50.block_sparse_moe.experts.225.w1", "model.layers.50.block_sparse_moe.experts.226.w1", "model.layers.50.block_sparse_moe.experts.227.w1", "model.layers.50.block_sparse_moe.experts.228.w1", "model.layers.50.block_sparse_moe.experts.229.w1", "model.layers.50.block_sparse_moe.experts.230.w1", "model.layers.50.block_sparse_moe.experts.231.w1", "model.layers.50.block_sparse_moe.experts.232.w1", "model.layers.50.block_sparse_moe.experts.233.w1", "model.layers.50.block_sparse_moe.experts.234.w1", "model.layers.50.block_sparse_moe.experts.235.w1", "model.layers.50.block_sparse_moe.experts.236.w1", "model.layers.50.block_sparse_moe.experts.237.w1", "model.layers.50.block_sparse_moe.experts.238.w1", "model.layers.50.block_sparse_moe.experts.239.w1", "model.layers.50.block_sparse_moe.experts.240.w1", "model.layers.50.block_sparse_moe.experts.241.w1", "model.layers.50.block_sparse_moe.experts.242.w1", "model.layers.50.block_sparse_moe.experts.243.w1", "model.layers.50.block_sparse_moe.experts.244.w1", "model.layers.50.block_sparse_moe.experts.245.w1", "model.layers.50.block_sparse_moe.experts.246.w1", "model.layers.50.block_sparse_moe.experts.247.w1", "model.layers.50.block_sparse_moe.experts.248.w1", "model.layers.50.block_sparse_moe.experts.249.w1", "model.layers.50.block_sparse_moe.experts.250.w1", "model.layers.50.block_sparse_moe.experts.251.w1", "model.layers.50.block_sparse_moe.experts.252.w1", "model.layers.50.block_sparse_moe.experts.253.w1", "model.layers.50.block_sparse_moe.experts.254.w1", "model.layers.50.block_sparse_moe.experts.255.w1", "model.layers.50.block_sparse_moe.experts.0.w3", "model.layers.50.block_sparse_moe.experts.1.w3", "model.layers.50.block_sparse_moe.experts.2.w3", "model.layers.50.block_sparse_moe.experts.3.w3", "model.layers.50.block_sparse_moe.experts.4.w3", "model.layers.50.block_sparse_moe.experts.5.w3", "model.layers.50.block_sparse_moe.experts.6.w3", "model.layers.50.block_sparse_moe.experts.7.w3", "model.layers.50.block_sparse_moe.experts.8.w3", "model.layers.50.block_sparse_moe.experts.9.w3", "model.layers.50.block_sparse_moe.experts.10.w3", "model.layers.50.block_sparse_moe.experts.11.w3", "model.layers.50.block_sparse_moe.experts.12.w3", "model.layers.50.block_sparse_moe.experts.13.w3", "model.layers.50.block_sparse_moe.experts.14.w3", "model.layers.50.block_sparse_moe.experts.15.w3", "model.layers.50.block_sparse_moe.experts.16.w3", "model.layers.50.block_sparse_moe.experts.17.w3", "model.layers.50.block_sparse_moe.experts.18.w3", "model.layers.50.block_sparse_moe.experts.19.w3", "model.layers.50.block_sparse_moe.experts.20.w3", "model.layers.50.block_sparse_moe.experts.21.w3", "model.layers.50.block_sparse_moe.experts.22.w3", "model.layers.50.block_sparse_moe.experts.23.w3", "model.layers.50.block_sparse_moe.experts.24.w3", "model.layers.50.block_sparse_moe.experts.25.w3", "model.layers.50.block_sparse_moe.experts.26.w3", "model.layers.50.block_sparse_moe.experts.27.w3", "model.layers.50.block_sparse_moe.experts.28.w3", "model.layers.50.block_sparse_moe.experts.29.w3", "model.layers.50.block_sparse_moe.experts.30.w3", "model.layers.50.block_sparse_moe.experts.31.w3", "model.layers.50.block_sparse_moe.experts.32.w3", "model.layers.50.block_sparse_moe.experts.33.w3", "model.layers.50.block_sparse_moe.experts.34.w3", "model.layers.50.block_sparse_moe.experts.35.w3", "model.layers.50.block_sparse_moe.experts.36.w3", "model.layers.50.block_sparse_moe.experts.37.w3", "model.layers.50.block_sparse_moe.experts.38.w3", "model.layers.50.block_sparse_moe.experts.39.w3", "model.layers.50.block_sparse_moe.experts.40.w3", "model.layers.50.block_sparse_moe.experts.41.w3", "model.layers.50.block_sparse_moe.experts.42.w3", "model.layers.50.block_sparse_moe.experts.43.w3", "model.layers.50.block_sparse_moe.experts.44.w3", "model.layers.50.block_sparse_moe.experts.45.w3", "model.layers.50.block_sparse_moe.experts.46.w3", "model.layers.50.block_sparse_moe.experts.47.w3", "model.layers.50.block_sparse_moe.experts.48.w3", "model.layers.50.block_sparse_moe.experts.49.w3", "model.layers.50.block_sparse_moe.experts.50.w3", "model.layers.50.block_sparse_moe.experts.51.w3", "model.layers.50.block_sparse_moe.experts.52.w3", "model.layers.50.block_sparse_moe.experts.53.w3", "model.layers.50.block_sparse_moe.experts.54.w3", "model.layers.50.block_sparse_moe.experts.55.w3", "model.layers.50.block_sparse_moe.experts.56.w3", "model.layers.50.block_sparse_moe.experts.57.w3", "model.layers.50.block_sparse_moe.experts.58.w3", "model.layers.50.block_sparse_moe.experts.59.w3", "model.layers.50.block_sparse_moe.experts.60.w3", "model.layers.50.block_sparse_moe.experts.61.w3", "model.layers.50.block_sparse_moe.experts.62.w3", "model.layers.50.block_sparse_moe.experts.63.w3", "model.layers.50.block_sparse_moe.experts.64.w3", "model.layers.50.block_sparse_moe.experts.65.w3", "model.layers.50.block_sparse_moe.experts.66.w3", "model.layers.50.block_sparse_moe.experts.67.w3", "model.layers.50.block_sparse_moe.experts.68.w3", "model.layers.50.block_sparse_moe.experts.69.w3", "model.layers.50.block_sparse_moe.experts.70.w3", "model.layers.50.block_sparse_moe.experts.71.w3", "model.layers.50.block_sparse_moe.experts.72.w3", "model.layers.50.block_sparse_moe.experts.73.w3", "model.layers.50.block_sparse_moe.experts.74.w3", "model.layers.50.block_sparse_moe.experts.75.w3", "model.layers.50.block_sparse_moe.experts.76.w3", "model.layers.50.block_sparse_moe.experts.77.w3", "model.layers.50.block_sparse_moe.experts.78.w3", "model.layers.50.block_sparse_moe.experts.79.w3", "model.layers.50.block_sparse_moe.experts.80.w3", "model.layers.50.block_sparse_moe.experts.81.w3", "model.layers.50.block_sparse_moe.experts.82.w3", "model.layers.50.block_sparse_moe.experts.83.w3", "model.layers.50.block_sparse_moe.experts.84.w3", "model.layers.50.block_sparse_moe.experts.85.w3", "model.layers.50.block_sparse_moe.experts.86.w3", "model.layers.50.block_sparse_moe.experts.87.w3", "model.layers.50.block_sparse_moe.experts.88.w3", "model.layers.50.block_sparse_moe.experts.89.w3", "model.layers.50.block_sparse_moe.experts.90.w3", "model.layers.50.block_sparse_moe.experts.91.w3", "model.layers.50.block_sparse_moe.experts.92.w3", "model.layers.50.block_sparse_moe.experts.93.w3", "model.layers.50.block_sparse_moe.experts.94.w3", "model.layers.50.block_sparse_moe.experts.95.w3", "model.layers.50.block_sparse_moe.experts.96.w3", "model.layers.50.block_sparse_moe.experts.97.w3", "model.layers.50.block_sparse_moe.experts.98.w3", "model.layers.50.block_sparse_moe.experts.99.w3", "model.layers.50.block_sparse_moe.experts.100.w3", "model.layers.50.block_sparse_moe.experts.101.w3", "model.layers.50.block_sparse_moe.experts.102.w3", "model.layers.50.block_sparse_moe.experts.103.w3", "model.layers.50.block_sparse_moe.experts.104.w3", "model.layers.50.block_sparse_moe.experts.105.w3", "model.layers.50.block_sparse_moe.experts.106.w3", "model.layers.50.block_sparse_moe.experts.107.w3", "model.layers.50.block_sparse_moe.experts.108.w3", "model.layers.50.block_sparse_moe.experts.109.w3", "model.layers.50.block_sparse_moe.experts.110.w3", "model.layers.50.block_sparse_moe.experts.111.w3", "model.layers.50.block_sparse_moe.experts.112.w3", "model.layers.50.block_sparse_moe.experts.113.w3", "model.layers.50.block_sparse_moe.experts.114.w3", "model.layers.50.block_sparse_moe.experts.115.w3", "model.layers.50.block_sparse_moe.experts.116.w3", "model.layers.50.block_sparse_moe.experts.117.w3", "model.layers.50.block_sparse_moe.experts.118.w3", "model.layers.50.block_sparse_moe.experts.119.w3", "model.layers.50.block_sparse_moe.experts.120.w3", "model.layers.50.block_sparse_moe.experts.121.w3", "model.layers.50.block_sparse_moe.experts.122.w3", "model.layers.50.block_sparse_moe.experts.123.w3", "model.layers.50.block_sparse_moe.experts.124.w3", "model.layers.50.block_sparse_moe.experts.125.w3", "model.layers.50.block_sparse_moe.experts.126.w3", "model.layers.50.block_sparse_moe.experts.127.w3", "model.layers.50.block_sparse_moe.experts.128.w3", "model.layers.50.block_sparse_moe.experts.129.w3", "model.layers.50.block_sparse_moe.experts.130.w3", "model.layers.50.block_sparse_moe.experts.131.w3", "model.layers.50.block_sparse_moe.experts.132.w3", "model.layers.50.block_sparse_moe.experts.133.w3", "model.layers.50.block_sparse_moe.experts.134.w3", "model.layers.50.block_sparse_moe.experts.135.w3", "model.layers.50.block_sparse_moe.experts.136.w3", "model.layers.50.block_sparse_moe.experts.137.w3", "model.layers.50.block_sparse_moe.experts.138.w3", "model.layers.50.block_sparse_moe.experts.139.w3", "model.layers.50.block_sparse_moe.experts.140.w3", "model.layers.50.block_sparse_moe.experts.141.w3", "model.layers.50.block_sparse_moe.experts.142.w3", "model.layers.50.block_sparse_moe.experts.143.w3", "model.layers.50.block_sparse_moe.experts.144.w3", "model.layers.50.block_sparse_moe.experts.145.w3", "model.layers.50.block_sparse_moe.experts.146.w3", "model.layers.50.block_sparse_moe.experts.147.w3", "model.layers.50.block_sparse_moe.experts.148.w3", "model.layers.50.block_sparse_moe.experts.149.w3", "model.layers.50.block_sparse_moe.experts.150.w3", "model.layers.50.block_sparse_moe.experts.151.w3", "model.layers.50.block_sparse_moe.experts.152.w3", "model.layers.50.block_sparse_moe.experts.153.w3", "model.layers.50.block_sparse_moe.experts.154.w3", "model.layers.50.block_sparse_moe.experts.155.w3", "model.layers.50.block_sparse_moe.experts.156.w3", "model.layers.50.block_sparse_moe.experts.157.w3", "model.layers.50.block_sparse_moe.experts.158.w3", "model.layers.50.block_sparse_moe.experts.159.w3", "model.layers.50.block_sparse_moe.experts.160.w3", "model.layers.50.block_sparse_moe.experts.161.w3", "model.layers.50.block_sparse_moe.experts.162.w3", "model.layers.50.block_sparse_moe.experts.163.w3", "model.layers.50.block_sparse_moe.experts.164.w3", "model.layers.50.block_sparse_moe.experts.165.w3", "model.layers.50.block_sparse_moe.experts.166.w3", "model.layers.50.block_sparse_moe.experts.167.w3", "model.layers.50.block_sparse_moe.experts.168.w3", "model.layers.50.block_sparse_moe.experts.169.w3", "model.layers.50.block_sparse_moe.experts.170.w3", "model.layers.50.block_sparse_moe.experts.171.w3", "model.layers.50.block_sparse_moe.experts.172.w3", "model.layers.50.block_sparse_moe.experts.173.w3", "model.layers.50.block_sparse_moe.experts.174.w3", "model.layers.50.block_sparse_moe.experts.175.w3", "model.layers.50.block_sparse_moe.experts.176.w3", "model.layers.50.block_sparse_moe.experts.177.w3", "model.layers.50.block_sparse_moe.experts.178.w3", "model.layers.50.block_sparse_moe.experts.179.w3", "model.layers.50.block_sparse_moe.experts.180.w3", "model.layers.50.block_sparse_moe.experts.181.w3", "model.layers.50.block_sparse_moe.experts.182.w3", "model.layers.50.block_sparse_moe.experts.183.w3", "model.layers.50.block_sparse_moe.experts.184.w3", "model.layers.50.block_sparse_moe.experts.185.w3", "model.layers.50.block_sparse_moe.experts.186.w3", "model.layers.50.block_sparse_moe.experts.187.w3", "model.layers.50.block_sparse_moe.experts.188.w3", "model.layers.50.block_sparse_moe.experts.189.w3", "model.layers.50.block_sparse_moe.experts.190.w3", "model.layers.50.block_sparse_moe.experts.191.w3", "model.layers.50.block_sparse_moe.experts.192.w3", "model.layers.50.block_sparse_moe.experts.193.w3", "model.layers.50.block_sparse_moe.experts.194.w3", "model.layers.50.block_sparse_moe.experts.195.w3", "model.layers.50.block_sparse_moe.experts.196.w3", "model.layers.50.block_sparse_moe.experts.197.w3", "model.layers.50.block_sparse_moe.experts.198.w3", "model.layers.50.block_sparse_moe.experts.199.w3", "model.layers.50.block_sparse_moe.experts.200.w3", "model.layers.50.block_sparse_moe.experts.201.w3", "model.layers.50.block_sparse_moe.experts.202.w3", "model.layers.50.block_sparse_moe.experts.203.w3", "model.layers.50.block_sparse_moe.experts.204.w3", "model.layers.50.block_sparse_moe.experts.205.w3", "model.layers.50.block_sparse_moe.experts.206.w3", "model.layers.50.block_sparse_moe.experts.207.w3", "model.layers.50.block_sparse_moe.experts.208.w3", "model.layers.50.block_sparse_moe.experts.209.w3", "model.layers.50.block_sparse_moe.experts.210.w3", "model.layers.50.block_sparse_moe.experts.211.w3", "model.layers.50.block_sparse_moe.experts.212.w3", "model.layers.50.block_sparse_moe.experts.213.w3", "model.layers.50.block_sparse_moe.experts.214.w3", "model.layers.50.block_sparse_moe.experts.215.w3", "model.layers.50.block_sparse_moe.experts.216.w3", "model.layers.50.block_sparse_moe.experts.217.w3", "model.layers.50.block_sparse_moe.experts.218.w3", "model.layers.50.block_sparse_moe.experts.219.w3", "model.layers.50.block_sparse_moe.experts.220.w3", "model.layers.50.block_sparse_moe.experts.221.w3", "model.layers.50.block_sparse_moe.experts.222.w3", "model.layers.50.block_sparse_moe.experts.223.w3", "model.layers.50.block_sparse_moe.experts.224.w3", "model.layers.50.block_sparse_moe.experts.225.w3", "model.layers.50.block_sparse_moe.experts.226.w3", "model.layers.50.block_sparse_moe.experts.227.w3", "model.layers.50.block_sparse_moe.experts.228.w3", "model.layers.50.block_sparse_moe.experts.229.w3", "model.layers.50.block_sparse_moe.experts.230.w3", "model.layers.50.block_sparse_moe.experts.231.w3", "model.layers.50.block_sparse_moe.experts.232.w3", "model.layers.50.block_sparse_moe.experts.233.w3", "model.layers.50.block_sparse_moe.experts.234.w3", "model.layers.50.block_sparse_moe.experts.235.w3", "model.layers.50.block_sparse_moe.experts.236.w3", "model.layers.50.block_sparse_moe.experts.237.w3", "model.layers.50.block_sparse_moe.experts.238.w3", "model.layers.50.block_sparse_moe.experts.239.w3", "model.layers.50.block_sparse_moe.experts.240.w3", "model.layers.50.block_sparse_moe.experts.241.w3", "model.layers.50.block_sparse_moe.experts.242.w3", "model.layers.50.block_sparse_moe.experts.243.w3", "model.layers.50.block_sparse_moe.experts.244.w3", "model.layers.50.block_sparse_moe.experts.245.w3", "model.layers.50.block_sparse_moe.experts.246.w3", "model.layers.50.block_sparse_moe.experts.247.w3", "model.layers.50.block_sparse_moe.experts.248.w3", "model.layers.50.block_sparse_moe.experts.249.w3", "model.layers.50.block_sparse_moe.experts.250.w3", "model.layers.50.block_sparse_moe.experts.251.w3", "model.layers.50.block_sparse_moe.experts.252.w3", "model.layers.50.block_sparse_moe.experts.253.w3", "model.layers.50.block_sparse_moe.experts.254.w3", "model.layers.50.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00022094659507276848, "dbits": 2415919104 } ] }, { "idx": 254, "layers": [ "model.layers.50.block_sparse_moe.experts.0.w2", "model.layers.50.block_sparse_moe.experts.1.w2", "model.layers.50.block_sparse_moe.experts.2.w2", "model.layers.50.block_sparse_moe.experts.3.w2", "model.layers.50.block_sparse_moe.experts.4.w2", "model.layers.50.block_sparse_moe.experts.5.w2", "model.layers.50.block_sparse_moe.experts.6.w2", "model.layers.50.block_sparse_moe.experts.7.w2", "model.layers.50.block_sparse_moe.experts.8.w2", "model.layers.50.block_sparse_moe.experts.9.w2", "model.layers.50.block_sparse_moe.experts.10.w2", "model.layers.50.block_sparse_moe.experts.11.w2", "model.layers.50.block_sparse_moe.experts.12.w2", "model.layers.50.block_sparse_moe.experts.13.w2", "model.layers.50.block_sparse_moe.experts.14.w2", "model.layers.50.block_sparse_moe.experts.15.w2", "model.layers.50.block_sparse_moe.experts.16.w2", "model.layers.50.block_sparse_moe.experts.17.w2", "model.layers.50.block_sparse_moe.experts.18.w2", "model.layers.50.block_sparse_moe.experts.19.w2", "model.layers.50.block_sparse_moe.experts.20.w2", "model.layers.50.block_sparse_moe.experts.21.w2", "model.layers.50.block_sparse_moe.experts.22.w2", "model.layers.50.block_sparse_moe.experts.23.w2", "model.layers.50.block_sparse_moe.experts.24.w2", "model.layers.50.block_sparse_moe.experts.25.w2", "model.layers.50.block_sparse_moe.experts.26.w2", "model.layers.50.block_sparse_moe.experts.27.w2", "model.layers.50.block_sparse_moe.experts.28.w2", "model.layers.50.block_sparse_moe.experts.29.w2", "model.layers.50.block_sparse_moe.experts.30.w2", "model.layers.50.block_sparse_moe.experts.31.w2", "model.layers.50.block_sparse_moe.experts.32.w2", "model.layers.50.block_sparse_moe.experts.33.w2", "model.layers.50.block_sparse_moe.experts.34.w2", "model.layers.50.block_sparse_moe.experts.35.w2", "model.layers.50.block_sparse_moe.experts.36.w2", "model.layers.50.block_sparse_moe.experts.37.w2", "model.layers.50.block_sparse_moe.experts.38.w2", "model.layers.50.block_sparse_moe.experts.39.w2", "model.layers.50.block_sparse_moe.experts.40.w2", "model.layers.50.block_sparse_moe.experts.41.w2", "model.layers.50.block_sparse_moe.experts.42.w2", "model.layers.50.block_sparse_moe.experts.43.w2", "model.layers.50.block_sparse_moe.experts.44.w2", "model.layers.50.block_sparse_moe.experts.45.w2", "model.layers.50.block_sparse_moe.experts.46.w2", "model.layers.50.block_sparse_moe.experts.47.w2", "model.layers.50.block_sparse_moe.experts.48.w2", "model.layers.50.block_sparse_moe.experts.49.w2", "model.layers.50.block_sparse_moe.experts.50.w2", "model.layers.50.block_sparse_moe.experts.51.w2", "model.layers.50.block_sparse_moe.experts.52.w2", "model.layers.50.block_sparse_moe.experts.53.w2", "model.layers.50.block_sparse_moe.experts.54.w2", "model.layers.50.block_sparse_moe.experts.55.w2", "model.layers.50.block_sparse_moe.experts.56.w2", "model.layers.50.block_sparse_moe.experts.57.w2", "model.layers.50.block_sparse_moe.experts.58.w2", "model.layers.50.block_sparse_moe.experts.59.w2", "model.layers.50.block_sparse_moe.experts.60.w2", "model.layers.50.block_sparse_moe.experts.61.w2", "model.layers.50.block_sparse_moe.experts.62.w2", "model.layers.50.block_sparse_moe.experts.63.w2", "model.layers.50.block_sparse_moe.experts.64.w2", "model.layers.50.block_sparse_moe.experts.65.w2", "model.layers.50.block_sparse_moe.experts.66.w2", "model.layers.50.block_sparse_moe.experts.67.w2", "model.layers.50.block_sparse_moe.experts.68.w2", "model.layers.50.block_sparse_moe.experts.69.w2", "model.layers.50.block_sparse_moe.experts.70.w2", "model.layers.50.block_sparse_moe.experts.71.w2", "model.layers.50.block_sparse_moe.experts.72.w2", "model.layers.50.block_sparse_moe.experts.73.w2", "model.layers.50.block_sparse_moe.experts.74.w2", "model.layers.50.block_sparse_moe.experts.75.w2", "model.layers.50.block_sparse_moe.experts.76.w2", "model.layers.50.block_sparse_moe.experts.77.w2", "model.layers.50.block_sparse_moe.experts.78.w2", "model.layers.50.block_sparse_moe.experts.79.w2", "model.layers.50.block_sparse_moe.experts.80.w2", "model.layers.50.block_sparse_moe.experts.81.w2", "model.layers.50.block_sparse_moe.experts.82.w2", "model.layers.50.block_sparse_moe.experts.83.w2", "model.layers.50.block_sparse_moe.experts.84.w2", "model.layers.50.block_sparse_moe.experts.85.w2", "model.layers.50.block_sparse_moe.experts.86.w2", "model.layers.50.block_sparse_moe.experts.87.w2", "model.layers.50.block_sparse_moe.experts.88.w2", "model.layers.50.block_sparse_moe.experts.89.w2", "model.layers.50.block_sparse_moe.experts.90.w2", "model.layers.50.block_sparse_moe.experts.91.w2", "model.layers.50.block_sparse_moe.experts.92.w2", "model.layers.50.block_sparse_moe.experts.93.w2", "model.layers.50.block_sparse_moe.experts.94.w2", "model.layers.50.block_sparse_moe.experts.95.w2", "model.layers.50.block_sparse_moe.experts.96.w2", "model.layers.50.block_sparse_moe.experts.97.w2", "model.layers.50.block_sparse_moe.experts.98.w2", "model.layers.50.block_sparse_moe.experts.99.w2", "model.layers.50.block_sparse_moe.experts.100.w2", "model.layers.50.block_sparse_moe.experts.101.w2", "model.layers.50.block_sparse_moe.experts.102.w2", "model.layers.50.block_sparse_moe.experts.103.w2", "model.layers.50.block_sparse_moe.experts.104.w2", "model.layers.50.block_sparse_moe.experts.105.w2", "model.layers.50.block_sparse_moe.experts.106.w2", "model.layers.50.block_sparse_moe.experts.107.w2", "model.layers.50.block_sparse_moe.experts.108.w2", "model.layers.50.block_sparse_moe.experts.109.w2", "model.layers.50.block_sparse_moe.experts.110.w2", "model.layers.50.block_sparse_moe.experts.111.w2", "model.layers.50.block_sparse_moe.experts.112.w2", "model.layers.50.block_sparse_moe.experts.113.w2", "model.layers.50.block_sparse_moe.experts.114.w2", "model.layers.50.block_sparse_moe.experts.115.w2", "model.layers.50.block_sparse_moe.experts.116.w2", "model.layers.50.block_sparse_moe.experts.117.w2", "model.layers.50.block_sparse_moe.experts.118.w2", "model.layers.50.block_sparse_moe.experts.119.w2", "model.layers.50.block_sparse_moe.experts.120.w2", "model.layers.50.block_sparse_moe.experts.121.w2", "model.layers.50.block_sparse_moe.experts.122.w2", "model.layers.50.block_sparse_moe.experts.123.w2", "model.layers.50.block_sparse_moe.experts.124.w2", "model.layers.50.block_sparse_moe.experts.125.w2", "model.layers.50.block_sparse_moe.experts.126.w2", "model.layers.50.block_sparse_moe.experts.127.w2", "model.layers.50.block_sparse_moe.experts.128.w2", "model.layers.50.block_sparse_moe.experts.129.w2", "model.layers.50.block_sparse_moe.experts.130.w2", "model.layers.50.block_sparse_moe.experts.131.w2", "model.layers.50.block_sparse_moe.experts.132.w2", "model.layers.50.block_sparse_moe.experts.133.w2", "model.layers.50.block_sparse_moe.experts.134.w2", "model.layers.50.block_sparse_moe.experts.135.w2", "model.layers.50.block_sparse_moe.experts.136.w2", "model.layers.50.block_sparse_moe.experts.137.w2", "model.layers.50.block_sparse_moe.experts.138.w2", "model.layers.50.block_sparse_moe.experts.139.w2", "model.layers.50.block_sparse_moe.experts.140.w2", "model.layers.50.block_sparse_moe.experts.141.w2", "model.layers.50.block_sparse_moe.experts.142.w2", "model.layers.50.block_sparse_moe.experts.143.w2", "model.layers.50.block_sparse_moe.experts.144.w2", "model.layers.50.block_sparse_moe.experts.145.w2", "model.layers.50.block_sparse_moe.experts.146.w2", "model.layers.50.block_sparse_moe.experts.147.w2", "model.layers.50.block_sparse_moe.experts.148.w2", "model.layers.50.block_sparse_moe.experts.149.w2", "model.layers.50.block_sparse_moe.experts.150.w2", "model.layers.50.block_sparse_moe.experts.151.w2", "model.layers.50.block_sparse_moe.experts.152.w2", "model.layers.50.block_sparse_moe.experts.153.w2", "model.layers.50.block_sparse_moe.experts.154.w2", "model.layers.50.block_sparse_moe.experts.155.w2", "model.layers.50.block_sparse_moe.experts.156.w2", "model.layers.50.block_sparse_moe.experts.157.w2", "model.layers.50.block_sparse_moe.experts.158.w2", "model.layers.50.block_sparse_moe.experts.159.w2", "model.layers.50.block_sparse_moe.experts.160.w2", "model.layers.50.block_sparse_moe.experts.161.w2", "model.layers.50.block_sparse_moe.experts.162.w2", "model.layers.50.block_sparse_moe.experts.163.w2", "model.layers.50.block_sparse_moe.experts.164.w2", "model.layers.50.block_sparse_moe.experts.165.w2", "model.layers.50.block_sparse_moe.experts.166.w2", "model.layers.50.block_sparse_moe.experts.167.w2", "model.layers.50.block_sparse_moe.experts.168.w2", "model.layers.50.block_sparse_moe.experts.169.w2", "model.layers.50.block_sparse_moe.experts.170.w2", "model.layers.50.block_sparse_moe.experts.171.w2", "model.layers.50.block_sparse_moe.experts.172.w2", "model.layers.50.block_sparse_moe.experts.173.w2", "model.layers.50.block_sparse_moe.experts.174.w2", "model.layers.50.block_sparse_moe.experts.175.w2", "model.layers.50.block_sparse_moe.experts.176.w2", "model.layers.50.block_sparse_moe.experts.177.w2", "model.layers.50.block_sparse_moe.experts.178.w2", "model.layers.50.block_sparse_moe.experts.179.w2", "model.layers.50.block_sparse_moe.experts.180.w2", "model.layers.50.block_sparse_moe.experts.181.w2", "model.layers.50.block_sparse_moe.experts.182.w2", "model.layers.50.block_sparse_moe.experts.183.w2", "model.layers.50.block_sparse_moe.experts.184.w2", "model.layers.50.block_sparse_moe.experts.185.w2", "model.layers.50.block_sparse_moe.experts.186.w2", "model.layers.50.block_sparse_moe.experts.187.w2", "model.layers.50.block_sparse_moe.experts.188.w2", "model.layers.50.block_sparse_moe.experts.189.w2", "model.layers.50.block_sparse_moe.experts.190.w2", "model.layers.50.block_sparse_moe.experts.191.w2", "model.layers.50.block_sparse_moe.experts.192.w2", "model.layers.50.block_sparse_moe.experts.193.w2", "model.layers.50.block_sparse_moe.experts.194.w2", "model.layers.50.block_sparse_moe.experts.195.w2", "model.layers.50.block_sparse_moe.experts.196.w2", "model.layers.50.block_sparse_moe.experts.197.w2", "model.layers.50.block_sparse_moe.experts.198.w2", "model.layers.50.block_sparse_moe.experts.199.w2", "model.layers.50.block_sparse_moe.experts.200.w2", "model.layers.50.block_sparse_moe.experts.201.w2", "model.layers.50.block_sparse_moe.experts.202.w2", "model.layers.50.block_sparse_moe.experts.203.w2", "model.layers.50.block_sparse_moe.experts.204.w2", "model.layers.50.block_sparse_moe.experts.205.w2", "model.layers.50.block_sparse_moe.experts.206.w2", "model.layers.50.block_sparse_moe.experts.207.w2", "model.layers.50.block_sparse_moe.experts.208.w2", "model.layers.50.block_sparse_moe.experts.209.w2", "model.layers.50.block_sparse_moe.experts.210.w2", "model.layers.50.block_sparse_moe.experts.211.w2", "model.layers.50.block_sparse_moe.experts.212.w2", "model.layers.50.block_sparse_moe.experts.213.w2", "model.layers.50.block_sparse_moe.experts.214.w2", "model.layers.50.block_sparse_moe.experts.215.w2", "model.layers.50.block_sparse_moe.experts.216.w2", "model.layers.50.block_sparse_moe.experts.217.w2", "model.layers.50.block_sparse_moe.experts.218.w2", "model.layers.50.block_sparse_moe.experts.219.w2", "model.layers.50.block_sparse_moe.experts.220.w2", "model.layers.50.block_sparse_moe.experts.221.w2", "model.layers.50.block_sparse_moe.experts.222.w2", "model.layers.50.block_sparse_moe.experts.223.w2", "model.layers.50.block_sparse_moe.experts.224.w2", "model.layers.50.block_sparse_moe.experts.225.w2", "model.layers.50.block_sparse_moe.experts.226.w2", "model.layers.50.block_sparse_moe.experts.227.w2", "model.layers.50.block_sparse_moe.experts.228.w2", "model.layers.50.block_sparse_moe.experts.229.w2", "model.layers.50.block_sparse_moe.experts.230.w2", "model.layers.50.block_sparse_moe.experts.231.w2", "model.layers.50.block_sparse_moe.experts.232.w2", "model.layers.50.block_sparse_moe.experts.233.w2", "model.layers.50.block_sparse_moe.experts.234.w2", "model.layers.50.block_sparse_moe.experts.235.w2", "model.layers.50.block_sparse_moe.experts.236.w2", "model.layers.50.block_sparse_moe.experts.237.w2", "model.layers.50.block_sparse_moe.experts.238.w2", "model.layers.50.block_sparse_moe.experts.239.w2", "model.layers.50.block_sparse_moe.experts.240.w2", "model.layers.50.block_sparse_moe.experts.241.w2", "model.layers.50.block_sparse_moe.experts.242.w2", "model.layers.50.block_sparse_moe.experts.243.w2", "model.layers.50.block_sparse_moe.experts.244.w2", "model.layers.50.block_sparse_moe.experts.245.w2", "model.layers.50.block_sparse_moe.experts.246.w2", "model.layers.50.block_sparse_moe.experts.247.w2", "model.layers.50.block_sparse_moe.experts.248.w2", "model.layers.50.block_sparse_moe.experts.249.w2", "model.layers.50.block_sparse_moe.experts.250.w2", "model.layers.50.block_sparse_moe.experts.251.w2", "model.layers.50.block_sparse_moe.experts.252.w2", "model.layers.50.block_sparse_moe.experts.253.w2", "model.layers.50.block_sparse_moe.experts.254.w2", "model.layers.50.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -0.00019381381571292877, "dbits": 1207959552 } ] }, { "idx": 255, "layers": [ "model.layers.51.self_attn.q_proj" ], "candidates": [ { "dkld": 0.0004410792142152675, "dbits": 18874368 } ] }, { "idx": 256, "layers": [ "model.layers.51.self_attn.k_proj", "model.layers.51.self_attn.v_proj" ], "candidates": [ { "dkld": 0.004680319502949726, "dbits": 6291456 } ] }, { "idx": 257, "layers": [ "model.layers.51.self_attn.o_proj" ], "candidates": [ { "dkld": -0.004042946174740825, "dbits": 18874368 } ] }, { "idx": 258, "layers": [ "model.layers.51.block_sparse_moe.experts.0.w1", "model.layers.51.block_sparse_moe.experts.1.w1", "model.layers.51.block_sparse_moe.experts.2.w1", "model.layers.51.block_sparse_moe.experts.3.w1", "model.layers.51.block_sparse_moe.experts.4.w1", "model.layers.51.block_sparse_moe.experts.5.w1", "model.layers.51.block_sparse_moe.experts.6.w1", "model.layers.51.block_sparse_moe.experts.7.w1", "model.layers.51.block_sparse_moe.experts.8.w1", "model.layers.51.block_sparse_moe.experts.9.w1", "model.layers.51.block_sparse_moe.experts.10.w1", "model.layers.51.block_sparse_moe.experts.11.w1", "model.layers.51.block_sparse_moe.experts.12.w1", "model.layers.51.block_sparse_moe.experts.13.w1", "model.layers.51.block_sparse_moe.experts.14.w1", "model.layers.51.block_sparse_moe.experts.15.w1", "model.layers.51.block_sparse_moe.experts.16.w1", "model.layers.51.block_sparse_moe.experts.17.w1", "model.layers.51.block_sparse_moe.experts.18.w1", "model.layers.51.block_sparse_moe.experts.19.w1", "model.layers.51.block_sparse_moe.experts.20.w1", "model.layers.51.block_sparse_moe.experts.21.w1", "model.layers.51.block_sparse_moe.experts.22.w1", "model.layers.51.block_sparse_moe.experts.23.w1", "model.layers.51.block_sparse_moe.experts.24.w1", "model.layers.51.block_sparse_moe.experts.25.w1", "model.layers.51.block_sparse_moe.experts.26.w1", "model.layers.51.block_sparse_moe.experts.27.w1", "model.layers.51.block_sparse_moe.experts.28.w1", "model.layers.51.block_sparse_moe.experts.29.w1", "model.layers.51.block_sparse_moe.experts.30.w1", "model.layers.51.block_sparse_moe.experts.31.w1", "model.layers.51.block_sparse_moe.experts.32.w1", "model.layers.51.block_sparse_moe.experts.33.w1", "model.layers.51.block_sparse_moe.experts.34.w1", "model.layers.51.block_sparse_moe.experts.35.w1", "model.layers.51.block_sparse_moe.experts.36.w1", "model.layers.51.block_sparse_moe.experts.37.w1", "model.layers.51.block_sparse_moe.experts.38.w1", "model.layers.51.block_sparse_moe.experts.39.w1", "model.layers.51.block_sparse_moe.experts.40.w1", "model.layers.51.block_sparse_moe.experts.41.w1", "model.layers.51.block_sparse_moe.experts.42.w1", "model.layers.51.block_sparse_moe.experts.43.w1", "model.layers.51.block_sparse_moe.experts.44.w1", "model.layers.51.block_sparse_moe.experts.45.w1", "model.layers.51.block_sparse_moe.experts.46.w1", "model.layers.51.block_sparse_moe.experts.47.w1", "model.layers.51.block_sparse_moe.experts.48.w1", "model.layers.51.block_sparse_moe.experts.49.w1", "model.layers.51.block_sparse_moe.experts.50.w1", "model.layers.51.block_sparse_moe.experts.51.w1", "model.layers.51.block_sparse_moe.experts.52.w1", "model.layers.51.block_sparse_moe.experts.53.w1", "model.layers.51.block_sparse_moe.experts.54.w1", "model.layers.51.block_sparse_moe.experts.55.w1", "model.layers.51.block_sparse_moe.experts.56.w1", "model.layers.51.block_sparse_moe.experts.57.w1", "model.layers.51.block_sparse_moe.experts.58.w1", "model.layers.51.block_sparse_moe.experts.59.w1", "model.layers.51.block_sparse_moe.experts.60.w1", "model.layers.51.block_sparse_moe.experts.61.w1", "model.layers.51.block_sparse_moe.experts.62.w1", "model.layers.51.block_sparse_moe.experts.63.w1", "model.layers.51.block_sparse_moe.experts.64.w1", "model.layers.51.block_sparse_moe.experts.65.w1", "model.layers.51.block_sparse_moe.experts.66.w1", "model.layers.51.block_sparse_moe.experts.67.w1", "model.layers.51.block_sparse_moe.experts.68.w1", "model.layers.51.block_sparse_moe.experts.69.w1", "model.layers.51.block_sparse_moe.experts.70.w1", "model.layers.51.block_sparse_moe.experts.71.w1", "model.layers.51.block_sparse_moe.experts.72.w1", "model.layers.51.block_sparse_moe.experts.73.w1", "model.layers.51.block_sparse_moe.experts.74.w1", "model.layers.51.block_sparse_moe.experts.75.w1", "model.layers.51.block_sparse_moe.experts.76.w1", "model.layers.51.block_sparse_moe.experts.77.w1", "model.layers.51.block_sparse_moe.experts.78.w1", "model.layers.51.block_sparse_moe.experts.79.w1", "model.layers.51.block_sparse_moe.experts.80.w1", "model.layers.51.block_sparse_moe.experts.81.w1", "model.layers.51.block_sparse_moe.experts.82.w1", "model.layers.51.block_sparse_moe.experts.83.w1", "model.layers.51.block_sparse_moe.experts.84.w1", "model.layers.51.block_sparse_moe.experts.85.w1", "model.layers.51.block_sparse_moe.experts.86.w1", "model.layers.51.block_sparse_moe.experts.87.w1", "model.layers.51.block_sparse_moe.experts.88.w1", "model.layers.51.block_sparse_moe.experts.89.w1", "model.layers.51.block_sparse_moe.experts.90.w1", "model.layers.51.block_sparse_moe.experts.91.w1", "model.layers.51.block_sparse_moe.experts.92.w1", "model.layers.51.block_sparse_moe.experts.93.w1", "model.layers.51.block_sparse_moe.experts.94.w1", "model.layers.51.block_sparse_moe.experts.95.w1", "model.layers.51.block_sparse_moe.experts.96.w1", "model.layers.51.block_sparse_moe.experts.97.w1", "model.layers.51.block_sparse_moe.experts.98.w1", "model.layers.51.block_sparse_moe.experts.99.w1", "model.layers.51.block_sparse_moe.experts.100.w1", "model.layers.51.block_sparse_moe.experts.101.w1", "model.layers.51.block_sparse_moe.experts.102.w1", "model.layers.51.block_sparse_moe.experts.103.w1", "model.layers.51.block_sparse_moe.experts.104.w1", "model.layers.51.block_sparse_moe.experts.105.w1", "model.layers.51.block_sparse_moe.experts.106.w1", "model.layers.51.block_sparse_moe.experts.107.w1", "model.layers.51.block_sparse_moe.experts.108.w1", "model.layers.51.block_sparse_moe.experts.109.w1", "model.layers.51.block_sparse_moe.experts.110.w1", "model.layers.51.block_sparse_moe.experts.111.w1", "model.layers.51.block_sparse_moe.experts.112.w1", "model.layers.51.block_sparse_moe.experts.113.w1", "model.layers.51.block_sparse_moe.experts.114.w1", "model.layers.51.block_sparse_moe.experts.115.w1", "model.layers.51.block_sparse_moe.experts.116.w1", "model.layers.51.block_sparse_moe.experts.117.w1", "model.layers.51.block_sparse_moe.experts.118.w1", "model.layers.51.block_sparse_moe.experts.119.w1", "model.layers.51.block_sparse_moe.experts.120.w1", "model.layers.51.block_sparse_moe.experts.121.w1", "model.layers.51.block_sparse_moe.experts.122.w1", "model.layers.51.block_sparse_moe.experts.123.w1", "model.layers.51.block_sparse_moe.experts.124.w1", "model.layers.51.block_sparse_moe.experts.125.w1", "model.layers.51.block_sparse_moe.experts.126.w1", "model.layers.51.block_sparse_moe.experts.127.w1", "model.layers.51.block_sparse_moe.experts.128.w1", "model.layers.51.block_sparse_moe.experts.129.w1", "model.layers.51.block_sparse_moe.experts.130.w1", "model.layers.51.block_sparse_moe.experts.131.w1", "model.layers.51.block_sparse_moe.experts.132.w1", "model.layers.51.block_sparse_moe.experts.133.w1", "model.layers.51.block_sparse_moe.experts.134.w1", "model.layers.51.block_sparse_moe.experts.135.w1", "model.layers.51.block_sparse_moe.experts.136.w1", "model.layers.51.block_sparse_moe.experts.137.w1", "model.layers.51.block_sparse_moe.experts.138.w1", "model.layers.51.block_sparse_moe.experts.139.w1", "model.layers.51.block_sparse_moe.experts.140.w1", "model.layers.51.block_sparse_moe.experts.141.w1", "model.layers.51.block_sparse_moe.experts.142.w1", "model.layers.51.block_sparse_moe.experts.143.w1", "model.layers.51.block_sparse_moe.experts.144.w1", "model.layers.51.block_sparse_moe.experts.145.w1", "model.layers.51.block_sparse_moe.experts.146.w1", "model.layers.51.block_sparse_moe.experts.147.w1", "model.layers.51.block_sparse_moe.experts.148.w1", "model.layers.51.block_sparse_moe.experts.149.w1", "model.layers.51.block_sparse_moe.experts.150.w1", "model.layers.51.block_sparse_moe.experts.151.w1", "model.layers.51.block_sparse_moe.experts.152.w1", "model.layers.51.block_sparse_moe.experts.153.w1", "model.layers.51.block_sparse_moe.experts.154.w1", "model.layers.51.block_sparse_moe.experts.155.w1", "model.layers.51.block_sparse_moe.experts.156.w1", "model.layers.51.block_sparse_moe.experts.157.w1", "model.layers.51.block_sparse_moe.experts.158.w1", "model.layers.51.block_sparse_moe.experts.159.w1", "model.layers.51.block_sparse_moe.experts.160.w1", "model.layers.51.block_sparse_moe.experts.161.w1", "model.layers.51.block_sparse_moe.experts.162.w1", "model.layers.51.block_sparse_moe.experts.163.w1", "model.layers.51.block_sparse_moe.experts.164.w1", "model.layers.51.block_sparse_moe.experts.165.w1", "model.layers.51.block_sparse_moe.experts.166.w1", "model.layers.51.block_sparse_moe.experts.167.w1", "model.layers.51.block_sparse_moe.experts.168.w1", "model.layers.51.block_sparse_moe.experts.169.w1", "model.layers.51.block_sparse_moe.experts.170.w1", "model.layers.51.block_sparse_moe.experts.171.w1", "model.layers.51.block_sparse_moe.experts.172.w1", "model.layers.51.block_sparse_moe.experts.173.w1", "model.layers.51.block_sparse_moe.experts.174.w1", "model.layers.51.block_sparse_moe.experts.175.w1", "model.layers.51.block_sparse_moe.experts.176.w1", "model.layers.51.block_sparse_moe.experts.177.w1", "model.layers.51.block_sparse_moe.experts.178.w1", "model.layers.51.block_sparse_moe.experts.179.w1", "model.layers.51.block_sparse_moe.experts.180.w1", "model.layers.51.block_sparse_moe.experts.181.w1", "model.layers.51.block_sparse_moe.experts.182.w1", "model.layers.51.block_sparse_moe.experts.183.w1", "model.layers.51.block_sparse_moe.experts.184.w1", "model.layers.51.block_sparse_moe.experts.185.w1", "model.layers.51.block_sparse_moe.experts.186.w1", "model.layers.51.block_sparse_moe.experts.187.w1", "model.layers.51.block_sparse_moe.experts.188.w1", "model.layers.51.block_sparse_moe.experts.189.w1", "model.layers.51.block_sparse_moe.experts.190.w1", "model.layers.51.block_sparse_moe.experts.191.w1", "model.layers.51.block_sparse_moe.experts.192.w1", "model.layers.51.block_sparse_moe.experts.193.w1", "model.layers.51.block_sparse_moe.experts.194.w1", "model.layers.51.block_sparse_moe.experts.195.w1", "model.layers.51.block_sparse_moe.experts.196.w1", "model.layers.51.block_sparse_moe.experts.197.w1", "model.layers.51.block_sparse_moe.experts.198.w1", "model.layers.51.block_sparse_moe.experts.199.w1", "model.layers.51.block_sparse_moe.experts.200.w1", "model.layers.51.block_sparse_moe.experts.201.w1", "model.layers.51.block_sparse_moe.experts.202.w1", "model.layers.51.block_sparse_moe.experts.203.w1", "model.layers.51.block_sparse_moe.experts.204.w1", "model.layers.51.block_sparse_moe.experts.205.w1", "model.layers.51.block_sparse_moe.experts.206.w1", "model.layers.51.block_sparse_moe.experts.207.w1", "model.layers.51.block_sparse_moe.experts.208.w1", "model.layers.51.block_sparse_moe.experts.209.w1", "model.layers.51.block_sparse_moe.experts.210.w1", "model.layers.51.block_sparse_moe.experts.211.w1", "model.layers.51.block_sparse_moe.experts.212.w1", "model.layers.51.block_sparse_moe.experts.213.w1", "model.layers.51.block_sparse_moe.experts.214.w1", "model.layers.51.block_sparse_moe.experts.215.w1", "model.layers.51.block_sparse_moe.experts.216.w1", "model.layers.51.block_sparse_moe.experts.217.w1", "model.layers.51.block_sparse_moe.experts.218.w1", "model.layers.51.block_sparse_moe.experts.219.w1", "model.layers.51.block_sparse_moe.experts.220.w1", "model.layers.51.block_sparse_moe.experts.221.w1", "model.layers.51.block_sparse_moe.experts.222.w1", "model.layers.51.block_sparse_moe.experts.223.w1", "model.layers.51.block_sparse_moe.experts.224.w1", "model.layers.51.block_sparse_moe.experts.225.w1", "model.layers.51.block_sparse_moe.experts.226.w1", "model.layers.51.block_sparse_moe.experts.227.w1", "model.layers.51.block_sparse_moe.experts.228.w1", "model.layers.51.block_sparse_moe.experts.229.w1", "model.layers.51.block_sparse_moe.experts.230.w1", "model.layers.51.block_sparse_moe.experts.231.w1", "model.layers.51.block_sparse_moe.experts.232.w1", "model.layers.51.block_sparse_moe.experts.233.w1", "model.layers.51.block_sparse_moe.experts.234.w1", "model.layers.51.block_sparse_moe.experts.235.w1", "model.layers.51.block_sparse_moe.experts.236.w1", "model.layers.51.block_sparse_moe.experts.237.w1", "model.layers.51.block_sparse_moe.experts.238.w1", "model.layers.51.block_sparse_moe.experts.239.w1", "model.layers.51.block_sparse_moe.experts.240.w1", "model.layers.51.block_sparse_moe.experts.241.w1", "model.layers.51.block_sparse_moe.experts.242.w1", "model.layers.51.block_sparse_moe.experts.243.w1", "model.layers.51.block_sparse_moe.experts.244.w1", "model.layers.51.block_sparse_moe.experts.245.w1", "model.layers.51.block_sparse_moe.experts.246.w1", "model.layers.51.block_sparse_moe.experts.247.w1", "model.layers.51.block_sparse_moe.experts.248.w1", "model.layers.51.block_sparse_moe.experts.249.w1", "model.layers.51.block_sparse_moe.experts.250.w1", "model.layers.51.block_sparse_moe.experts.251.w1", "model.layers.51.block_sparse_moe.experts.252.w1", "model.layers.51.block_sparse_moe.experts.253.w1", "model.layers.51.block_sparse_moe.experts.254.w1", "model.layers.51.block_sparse_moe.experts.255.w1", "model.layers.51.block_sparse_moe.experts.0.w3", "model.layers.51.block_sparse_moe.experts.1.w3", "model.layers.51.block_sparse_moe.experts.2.w3", "model.layers.51.block_sparse_moe.experts.3.w3", "model.layers.51.block_sparse_moe.experts.4.w3", "model.layers.51.block_sparse_moe.experts.5.w3", "model.layers.51.block_sparse_moe.experts.6.w3", "model.layers.51.block_sparse_moe.experts.7.w3", "model.layers.51.block_sparse_moe.experts.8.w3", "model.layers.51.block_sparse_moe.experts.9.w3", "model.layers.51.block_sparse_moe.experts.10.w3", "model.layers.51.block_sparse_moe.experts.11.w3", "model.layers.51.block_sparse_moe.experts.12.w3", "model.layers.51.block_sparse_moe.experts.13.w3", "model.layers.51.block_sparse_moe.experts.14.w3", "model.layers.51.block_sparse_moe.experts.15.w3", "model.layers.51.block_sparse_moe.experts.16.w3", "model.layers.51.block_sparse_moe.experts.17.w3", "model.layers.51.block_sparse_moe.experts.18.w3", "model.layers.51.block_sparse_moe.experts.19.w3", "model.layers.51.block_sparse_moe.experts.20.w3", "model.layers.51.block_sparse_moe.experts.21.w3", "model.layers.51.block_sparse_moe.experts.22.w3", "model.layers.51.block_sparse_moe.experts.23.w3", "model.layers.51.block_sparse_moe.experts.24.w3", "model.layers.51.block_sparse_moe.experts.25.w3", "model.layers.51.block_sparse_moe.experts.26.w3", "model.layers.51.block_sparse_moe.experts.27.w3", "model.layers.51.block_sparse_moe.experts.28.w3", "model.layers.51.block_sparse_moe.experts.29.w3", "model.layers.51.block_sparse_moe.experts.30.w3", "model.layers.51.block_sparse_moe.experts.31.w3", "model.layers.51.block_sparse_moe.experts.32.w3", "model.layers.51.block_sparse_moe.experts.33.w3", "model.layers.51.block_sparse_moe.experts.34.w3", "model.layers.51.block_sparse_moe.experts.35.w3", "model.layers.51.block_sparse_moe.experts.36.w3", "model.layers.51.block_sparse_moe.experts.37.w3", "model.layers.51.block_sparse_moe.experts.38.w3", "model.layers.51.block_sparse_moe.experts.39.w3", "model.layers.51.block_sparse_moe.experts.40.w3", "model.layers.51.block_sparse_moe.experts.41.w3", "model.layers.51.block_sparse_moe.experts.42.w3", "model.layers.51.block_sparse_moe.experts.43.w3", "model.layers.51.block_sparse_moe.experts.44.w3", "model.layers.51.block_sparse_moe.experts.45.w3", "model.layers.51.block_sparse_moe.experts.46.w3", "model.layers.51.block_sparse_moe.experts.47.w3", "model.layers.51.block_sparse_moe.experts.48.w3", "model.layers.51.block_sparse_moe.experts.49.w3", "model.layers.51.block_sparse_moe.experts.50.w3", "model.layers.51.block_sparse_moe.experts.51.w3", "model.layers.51.block_sparse_moe.experts.52.w3", "model.layers.51.block_sparse_moe.experts.53.w3", "model.layers.51.block_sparse_moe.experts.54.w3", "model.layers.51.block_sparse_moe.experts.55.w3", "model.layers.51.block_sparse_moe.experts.56.w3", "model.layers.51.block_sparse_moe.experts.57.w3", "model.layers.51.block_sparse_moe.experts.58.w3", "model.layers.51.block_sparse_moe.experts.59.w3", "model.layers.51.block_sparse_moe.experts.60.w3", "model.layers.51.block_sparse_moe.experts.61.w3", "model.layers.51.block_sparse_moe.experts.62.w3", "model.layers.51.block_sparse_moe.experts.63.w3", "model.layers.51.block_sparse_moe.experts.64.w3", "model.layers.51.block_sparse_moe.experts.65.w3", "model.layers.51.block_sparse_moe.experts.66.w3", "model.layers.51.block_sparse_moe.experts.67.w3", "model.layers.51.block_sparse_moe.experts.68.w3", "model.layers.51.block_sparse_moe.experts.69.w3", "model.layers.51.block_sparse_moe.experts.70.w3", "model.layers.51.block_sparse_moe.experts.71.w3", "model.layers.51.block_sparse_moe.experts.72.w3", "model.layers.51.block_sparse_moe.experts.73.w3", "model.layers.51.block_sparse_moe.experts.74.w3", "model.layers.51.block_sparse_moe.experts.75.w3", "model.layers.51.block_sparse_moe.experts.76.w3", "model.layers.51.block_sparse_moe.experts.77.w3", "model.layers.51.block_sparse_moe.experts.78.w3", "model.layers.51.block_sparse_moe.experts.79.w3", "model.layers.51.block_sparse_moe.experts.80.w3", "model.layers.51.block_sparse_moe.experts.81.w3", "model.layers.51.block_sparse_moe.experts.82.w3", "model.layers.51.block_sparse_moe.experts.83.w3", "model.layers.51.block_sparse_moe.experts.84.w3", "model.layers.51.block_sparse_moe.experts.85.w3", "model.layers.51.block_sparse_moe.experts.86.w3", "model.layers.51.block_sparse_moe.experts.87.w3", "model.layers.51.block_sparse_moe.experts.88.w3", "model.layers.51.block_sparse_moe.experts.89.w3", "model.layers.51.block_sparse_moe.experts.90.w3", "model.layers.51.block_sparse_moe.experts.91.w3", "model.layers.51.block_sparse_moe.experts.92.w3", "model.layers.51.block_sparse_moe.experts.93.w3", "model.layers.51.block_sparse_moe.experts.94.w3", "model.layers.51.block_sparse_moe.experts.95.w3", "model.layers.51.block_sparse_moe.experts.96.w3", "model.layers.51.block_sparse_moe.experts.97.w3", "model.layers.51.block_sparse_moe.experts.98.w3", "model.layers.51.block_sparse_moe.experts.99.w3", "model.layers.51.block_sparse_moe.experts.100.w3", "model.layers.51.block_sparse_moe.experts.101.w3", "model.layers.51.block_sparse_moe.experts.102.w3", "model.layers.51.block_sparse_moe.experts.103.w3", "model.layers.51.block_sparse_moe.experts.104.w3", "model.layers.51.block_sparse_moe.experts.105.w3", "model.layers.51.block_sparse_moe.experts.106.w3", "model.layers.51.block_sparse_moe.experts.107.w3", "model.layers.51.block_sparse_moe.experts.108.w3", "model.layers.51.block_sparse_moe.experts.109.w3", "model.layers.51.block_sparse_moe.experts.110.w3", "model.layers.51.block_sparse_moe.experts.111.w3", "model.layers.51.block_sparse_moe.experts.112.w3", "model.layers.51.block_sparse_moe.experts.113.w3", "model.layers.51.block_sparse_moe.experts.114.w3", "model.layers.51.block_sparse_moe.experts.115.w3", "model.layers.51.block_sparse_moe.experts.116.w3", "model.layers.51.block_sparse_moe.experts.117.w3", "model.layers.51.block_sparse_moe.experts.118.w3", "model.layers.51.block_sparse_moe.experts.119.w3", "model.layers.51.block_sparse_moe.experts.120.w3", "model.layers.51.block_sparse_moe.experts.121.w3", "model.layers.51.block_sparse_moe.experts.122.w3", "model.layers.51.block_sparse_moe.experts.123.w3", "model.layers.51.block_sparse_moe.experts.124.w3", "model.layers.51.block_sparse_moe.experts.125.w3", "model.layers.51.block_sparse_moe.experts.126.w3", "model.layers.51.block_sparse_moe.experts.127.w3", "model.layers.51.block_sparse_moe.experts.128.w3", "model.layers.51.block_sparse_moe.experts.129.w3", "model.layers.51.block_sparse_moe.experts.130.w3", "model.layers.51.block_sparse_moe.experts.131.w3", "model.layers.51.block_sparse_moe.experts.132.w3", "model.layers.51.block_sparse_moe.experts.133.w3", "model.layers.51.block_sparse_moe.experts.134.w3", "model.layers.51.block_sparse_moe.experts.135.w3", "model.layers.51.block_sparse_moe.experts.136.w3", "model.layers.51.block_sparse_moe.experts.137.w3", "model.layers.51.block_sparse_moe.experts.138.w3", "model.layers.51.block_sparse_moe.experts.139.w3", "model.layers.51.block_sparse_moe.experts.140.w3", "model.layers.51.block_sparse_moe.experts.141.w3", "model.layers.51.block_sparse_moe.experts.142.w3", "model.layers.51.block_sparse_moe.experts.143.w3", "model.layers.51.block_sparse_moe.experts.144.w3", "model.layers.51.block_sparse_moe.experts.145.w3", "model.layers.51.block_sparse_moe.experts.146.w3", "model.layers.51.block_sparse_moe.experts.147.w3", "model.layers.51.block_sparse_moe.experts.148.w3", "model.layers.51.block_sparse_moe.experts.149.w3", "model.layers.51.block_sparse_moe.experts.150.w3", "model.layers.51.block_sparse_moe.experts.151.w3", "model.layers.51.block_sparse_moe.experts.152.w3", "model.layers.51.block_sparse_moe.experts.153.w3", "model.layers.51.block_sparse_moe.experts.154.w3", "model.layers.51.block_sparse_moe.experts.155.w3", "model.layers.51.block_sparse_moe.experts.156.w3", "model.layers.51.block_sparse_moe.experts.157.w3", "model.layers.51.block_sparse_moe.experts.158.w3", "model.layers.51.block_sparse_moe.experts.159.w3", "model.layers.51.block_sparse_moe.experts.160.w3", "model.layers.51.block_sparse_moe.experts.161.w3", "model.layers.51.block_sparse_moe.experts.162.w3", "model.layers.51.block_sparse_moe.experts.163.w3", "model.layers.51.block_sparse_moe.experts.164.w3", "model.layers.51.block_sparse_moe.experts.165.w3", "model.layers.51.block_sparse_moe.experts.166.w3", "model.layers.51.block_sparse_moe.experts.167.w3", "model.layers.51.block_sparse_moe.experts.168.w3", "model.layers.51.block_sparse_moe.experts.169.w3", "model.layers.51.block_sparse_moe.experts.170.w3", "model.layers.51.block_sparse_moe.experts.171.w3", "model.layers.51.block_sparse_moe.experts.172.w3", "model.layers.51.block_sparse_moe.experts.173.w3", "model.layers.51.block_sparse_moe.experts.174.w3", "model.layers.51.block_sparse_moe.experts.175.w3", "model.layers.51.block_sparse_moe.experts.176.w3", "model.layers.51.block_sparse_moe.experts.177.w3", "model.layers.51.block_sparse_moe.experts.178.w3", "model.layers.51.block_sparse_moe.experts.179.w3", "model.layers.51.block_sparse_moe.experts.180.w3", "model.layers.51.block_sparse_moe.experts.181.w3", "model.layers.51.block_sparse_moe.experts.182.w3", "model.layers.51.block_sparse_moe.experts.183.w3", "model.layers.51.block_sparse_moe.experts.184.w3", "model.layers.51.block_sparse_moe.experts.185.w3", "model.layers.51.block_sparse_moe.experts.186.w3", "model.layers.51.block_sparse_moe.experts.187.w3", "model.layers.51.block_sparse_moe.experts.188.w3", "model.layers.51.block_sparse_moe.experts.189.w3", "model.layers.51.block_sparse_moe.experts.190.w3", "model.layers.51.block_sparse_moe.experts.191.w3", "model.layers.51.block_sparse_moe.experts.192.w3", "model.layers.51.block_sparse_moe.experts.193.w3", "model.layers.51.block_sparse_moe.experts.194.w3", "model.layers.51.block_sparse_moe.experts.195.w3", "model.layers.51.block_sparse_moe.experts.196.w3", "model.layers.51.block_sparse_moe.experts.197.w3", "model.layers.51.block_sparse_moe.experts.198.w3", "model.layers.51.block_sparse_moe.experts.199.w3", "model.layers.51.block_sparse_moe.experts.200.w3", "model.layers.51.block_sparse_moe.experts.201.w3", "model.layers.51.block_sparse_moe.experts.202.w3", "model.layers.51.block_sparse_moe.experts.203.w3", "model.layers.51.block_sparse_moe.experts.204.w3", "model.layers.51.block_sparse_moe.experts.205.w3", "model.layers.51.block_sparse_moe.experts.206.w3", "model.layers.51.block_sparse_moe.experts.207.w3", "model.layers.51.block_sparse_moe.experts.208.w3", "model.layers.51.block_sparse_moe.experts.209.w3", "model.layers.51.block_sparse_moe.experts.210.w3", "model.layers.51.block_sparse_moe.experts.211.w3", "model.layers.51.block_sparse_moe.experts.212.w3", "model.layers.51.block_sparse_moe.experts.213.w3", "model.layers.51.block_sparse_moe.experts.214.w3", "model.layers.51.block_sparse_moe.experts.215.w3", "model.layers.51.block_sparse_moe.experts.216.w3", "model.layers.51.block_sparse_moe.experts.217.w3", "model.layers.51.block_sparse_moe.experts.218.w3", "model.layers.51.block_sparse_moe.experts.219.w3", "model.layers.51.block_sparse_moe.experts.220.w3", "model.layers.51.block_sparse_moe.experts.221.w3", "model.layers.51.block_sparse_moe.experts.222.w3", "model.layers.51.block_sparse_moe.experts.223.w3", "model.layers.51.block_sparse_moe.experts.224.w3", "model.layers.51.block_sparse_moe.experts.225.w3", "model.layers.51.block_sparse_moe.experts.226.w3", "model.layers.51.block_sparse_moe.experts.227.w3", "model.layers.51.block_sparse_moe.experts.228.w3", "model.layers.51.block_sparse_moe.experts.229.w3", "model.layers.51.block_sparse_moe.experts.230.w3", "model.layers.51.block_sparse_moe.experts.231.w3", "model.layers.51.block_sparse_moe.experts.232.w3", "model.layers.51.block_sparse_moe.experts.233.w3", "model.layers.51.block_sparse_moe.experts.234.w3", "model.layers.51.block_sparse_moe.experts.235.w3", "model.layers.51.block_sparse_moe.experts.236.w3", "model.layers.51.block_sparse_moe.experts.237.w3", "model.layers.51.block_sparse_moe.experts.238.w3", "model.layers.51.block_sparse_moe.experts.239.w3", "model.layers.51.block_sparse_moe.experts.240.w3", "model.layers.51.block_sparse_moe.experts.241.w3", "model.layers.51.block_sparse_moe.experts.242.w3", "model.layers.51.block_sparse_moe.experts.243.w3", "model.layers.51.block_sparse_moe.experts.244.w3", "model.layers.51.block_sparse_moe.experts.245.w3", "model.layers.51.block_sparse_moe.experts.246.w3", "model.layers.51.block_sparse_moe.experts.247.w3", "model.layers.51.block_sparse_moe.experts.248.w3", "model.layers.51.block_sparse_moe.experts.249.w3", "model.layers.51.block_sparse_moe.experts.250.w3", "model.layers.51.block_sparse_moe.experts.251.w3", "model.layers.51.block_sparse_moe.experts.252.w3", "model.layers.51.block_sparse_moe.experts.253.w3", "model.layers.51.block_sparse_moe.experts.254.w3", "model.layers.51.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -1.9303709268558844e-05, "dbits": 2415919104 } ] }, { "idx": 259, "layers": [ "model.layers.51.block_sparse_moe.experts.0.w2", "model.layers.51.block_sparse_moe.experts.1.w2", "model.layers.51.block_sparse_moe.experts.2.w2", "model.layers.51.block_sparse_moe.experts.3.w2", "model.layers.51.block_sparse_moe.experts.4.w2", "model.layers.51.block_sparse_moe.experts.5.w2", "model.layers.51.block_sparse_moe.experts.6.w2", "model.layers.51.block_sparse_moe.experts.7.w2", "model.layers.51.block_sparse_moe.experts.8.w2", "model.layers.51.block_sparse_moe.experts.9.w2", "model.layers.51.block_sparse_moe.experts.10.w2", "model.layers.51.block_sparse_moe.experts.11.w2", "model.layers.51.block_sparse_moe.experts.12.w2", "model.layers.51.block_sparse_moe.experts.13.w2", "model.layers.51.block_sparse_moe.experts.14.w2", "model.layers.51.block_sparse_moe.experts.15.w2", "model.layers.51.block_sparse_moe.experts.16.w2", "model.layers.51.block_sparse_moe.experts.17.w2", "model.layers.51.block_sparse_moe.experts.18.w2", "model.layers.51.block_sparse_moe.experts.19.w2", "model.layers.51.block_sparse_moe.experts.20.w2", "model.layers.51.block_sparse_moe.experts.21.w2", "model.layers.51.block_sparse_moe.experts.22.w2", "model.layers.51.block_sparse_moe.experts.23.w2", "model.layers.51.block_sparse_moe.experts.24.w2", "model.layers.51.block_sparse_moe.experts.25.w2", "model.layers.51.block_sparse_moe.experts.26.w2", "model.layers.51.block_sparse_moe.experts.27.w2", "model.layers.51.block_sparse_moe.experts.28.w2", "model.layers.51.block_sparse_moe.experts.29.w2", "model.layers.51.block_sparse_moe.experts.30.w2", "model.layers.51.block_sparse_moe.experts.31.w2", "model.layers.51.block_sparse_moe.experts.32.w2", "model.layers.51.block_sparse_moe.experts.33.w2", "model.layers.51.block_sparse_moe.experts.34.w2", "model.layers.51.block_sparse_moe.experts.35.w2", "model.layers.51.block_sparse_moe.experts.36.w2", "model.layers.51.block_sparse_moe.experts.37.w2", "model.layers.51.block_sparse_moe.experts.38.w2", "model.layers.51.block_sparse_moe.experts.39.w2", "model.layers.51.block_sparse_moe.experts.40.w2", "model.layers.51.block_sparse_moe.experts.41.w2", "model.layers.51.block_sparse_moe.experts.42.w2", "model.layers.51.block_sparse_moe.experts.43.w2", "model.layers.51.block_sparse_moe.experts.44.w2", "model.layers.51.block_sparse_moe.experts.45.w2", "model.layers.51.block_sparse_moe.experts.46.w2", "model.layers.51.block_sparse_moe.experts.47.w2", "model.layers.51.block_sparse_moe.experts.48.w2", "model.layers.51.block_sparse_moe.experts.49.w2", "model.layers.51.block_sparse_moe.experts.50.w2", "model.layers.51.block_sparse_moe.experts.51.w2", "model.layers.51.block_sparse_moe.experts.52.w2", "model.layers.51.block_sparse_moe.experts.53.w2", "model.layers.51.block_sparse_moe.experts.54.w2", "model.layers.51.block_sparse_moe.experts.55.w2", "model.layers.51.block_sparse_moe.experts.56.w2", "model.layers.51.block_sparse_moe.experts.57.w2", "model.layers.51.block_sparse_moe.experts.58.w2", "model.layers.51.block_sparse_moe.experts.59.w2", "model.layers.51.block_sparse_moe.experts.60.w2", "model.layers.51.block_sparse_moe.experts.61.w2", "model.layers.51.block_sparse_moe.experts.62.w2", "model.layers.51.block_sparse_moe.experts.63.w2", "model.layers.51.block_sparse_moe.experts.64.w2", "model.layers.51.block_sparse_moe.experts.65.w2", "model.layers.51.block_sparse_moe.experts.66.w2", "model.layers.51.block_sparse_moe.experts.67.w2", "model.layers.51.block_sparse_moe.experts.68.w2", "model.layers.51.block_sparse_moe.experts.69.w2", "model.layers.51.block_sparse_moe.experts.70.w2", "model.layers.51.block_sparse_moe.experts.71.w2", "model.layers.51.block_sparse_moe.experts.72.w2", "model.layers.51.block_sparse_moe.experts.73.w2", "model.layers.51.block_sparse_moe.experts.74.w2", "model.layers.51.block_sparse_moe.experts.75.w2", "model.layers.51.block_sparse_moe.experts.76.w2", "model.layers.51.block_sparse_moe.experts.77.w2", "model.layers.51.block_sparse_moe.experts.78.w2", "model.layers.51.block_sparse_moe.experts.79.w2", "model.layers.51.block_sparse_moe.experts.80.w2", "model.layers.51.block_sparse_moe.experts.81.w2", "model.layers.51.block_sparse_moe.experts.82.w2", "model.layers.51.block_sparse_moe.experts.83.w2", "model.layers.51.block_sparse_moe.experts.84.w2", "model.layers.51.block_sparse_moe.experts.85.w2", "model.layers.51.block_sparse_moe.experts.86.w2", "model.layers.51.block_sparse_moe.experts.87.w2", "model.layers.51.block_sparse_moe.experts.88.w2", "model.layers.51.block_sparse_moe.experts.89.w2", "model.layers.51.block_sparse_moe.experts.90.w2", "model.layers.51.block_sparse_moe.experts.91.w2", "model.layers.51.block_sparse_moe.experts.92.w2", "model.layers.51.block_sparse_moe.experts.93.w2", "model.layers.51.block_sparse_moe.experts.94.w2", "model.layers.51.block_sparse_moe.experts.95.w2", "model.layers.51.block_sparse_moe.experts.96.w2", "model.layers.51.block_sparse_moe.experts.97.w2", "model.layers.51.block_sparse_moe.experts.98.w2", "model.layers.51.block_sparse_moe.experts.99.w2", "model.layers.51.block_sparse_moe.experts.100.w2", "model.layers.51.block_sparse_moe.experts.101.w2", "model.layers.51.block_sparse_moe.experts.102.w2", "model.layers.51.block_sparse_moe.experts.103.w2", "model.layers.51.block_sparse_moe.experts.104.w2", "model.layers.51.block_sparse_moe.experts.105.w2", "model.layers.51.block_sparse_moe.experts.106.w2", "model.layers.51.block_sparse_moe.experts.107.w2", "model.layers.51.block_sparse_moe.experts.108.w2", "model.layers.51.block_sparse_moe.experts.109.w2", "model.layers.51.block_sparse_moe.experts.110.w2", "model.layers.51.block_sparse_moe.experts.111.w2", "model.layers.51.block_sparse_moe.experts.112.w2", "model.layers.51.block_sparse_moe.experts.113.w2", "model.layers.51.block_sparse_moe.experts.114.w2", "model.layers.51.block_sparse_moe.experts.115.w2", "model.layers.51.block_sparse_moe.experts.116.w2", "model.layers.51.block_sparse_moe.experts.117.w2", "model.layers.51.block_sparse_moe.experts.118.w2", "model.layers.51.block_sparse_moe.experts.119.w2", "model.layers.51.block_sparse_moe.experts.120.w2", "model.layers.51.block_sparse_moe.experts.121.w2", "model.layers.51.block_sparse_moe.experts.122.w2", "model.layers.51.block_sparse_moe.experts.123.w2", "model.layers.51.block_sparse_moe.experts.124.w2", "model.layers.51.block_sparse_moe.experts.125.w2", "model.layers.51.block_sparse_moe.experts.126.w2", "model.layers.51.block_sparse_moe.experts.127.w2", "model.layers.51.block_sparse_moe.experts.128.w2", "model.layers.51.block_sparse_moe.experts.129.w2", "model.layers.51.block_sparse_moe.experts.130.w2", "model.layers.51.block_sparse_moe.experts.131.w2", "model.layers.51.block_sparse_moe.experts.132.w2", "model.layers.51.block_sparse_moe.experts.133.w2", "model.layers.51.block_sparse_moe.experts.134.w2", "model.layers.51.block_sparse_moe.experts.135.w2", "model.layers.51.block_sparse_moe.experts.136.w2", "model.layers.51.block_sparse_moe.experts.137.w2", "model.layers.51.block_sparse_moe.experts.138.w2", "model.layers.51.block_sparse_moe.experts.139.w2", "model.layers.51.block_sparse_moe.experts.140.w2", "model.layers.51.block_sparse_moe.experts.141.w2", "model.layers.51.block_sparse_moe.experts.142.w2", "model.layers.51.block_sparse_moe.experts.143.w2", "model.layers.51.block_sparse_moe.experts.144.w2", "model.layers.51.block_sparse_moe.experts.145.w2", "model.layers.51.block_sparse_moe.experts.146.w2", "model.layers.51.block_sparse_moe.experts.147.w2", "model.layers.51.block_sparse_moe.experts.148.w2", "model.layers.51.block_sparse_moe.experts.149.w2", "model.layers.51.block_sparse_moe.experts.150.w2", "model.layers.51.block_sparse_moe.experts.151.w2", "model.layers.51.block_sparse_moe.experts.152.w2", "model.layers.51.block_sparse_moe.experts.153.w2", "model.layers.51.block_sparse_moe.experts.154.w2", "model.layers.51.block_sparse_moe.experts.155.w2", "model.layers.51.block_sparse_moe.experts.156.w2", "model.layers.51.block_sparse_moe.experts.157.w2", "model.layers.51.block_sparse_moe.experts.158.w2", "model.layers.51.block_sparse_moe.experts.159.w2", "model.layers.51.block_sparse_moe.experts.160.w2", "model.layers.51.block_sparse_moe.experts.161.w2", "model.layers.51.block_sparse_moe.experts.162.w2", "model.layers.51.block_sparse_moe.experts.163.w2", "model.layers.51.block_sparse_moe.experts.164.w2", "model.layers.51.block_sparse_moe.experts.165.w2", "model.layers.51.block_sparse_moe.experts.166.w2", "model.layers.51.block_sparse_moe.experts.167.w2", "model.layers.51.block_sparse_moe.experts.168.w2", "model.layers.51.block_sparse_moe.experts.169.w2", "model.layers.51.block_sparse_moe.experts.170.w2", "model.layers.51.block_sparse_moe.experts.171.w2", "model.layers.51.block_sparse_moe.experts.172.w2", "model.layers.51.block_sparse_moe.experts.173.w2", "model.layers.51.block_sparse_moe.experts.174.w2", "model.layers.51.block_sparse_moe.experts.175.w2", "model.layers.51.block_sparse_moe.experts.176.w2", "model.layers.51.block_sparse_moe.experts.177.w2", "model.layers.51.block_sparse_moe.experts.178.w2", "model.layers.51.block_sparse_moe.experts.179.w2", "model.layers.51.block_sparse_moe.experts.180.w2", "model.layers.51.block_sparse_moe.experts.181.w2", "model.layers.51.block_sparse_moe.experts.182.w2", "model.layers.51.block_sparse_moe.experts.183.w2", "model.layers.51.block_sparse_moe.experts.184.w2", "model.layers.51.block_sparse_moe.experts.185.w2", "model.layers.51.block_sparse_moe.experts.186.w2", "model.layers.51.block_sparse_moe.experts.187.w2", "model.layers.51.block_sparse_moe.experts.188.w2", "model.layers.51.block_sparse_moe.experts.189.w2", "model.layers.51.block_sparse_moe.experts.190.w2", "model.layers.51.block_sparse_moe.experts.191.w2", "model.layers.51.block_sparse_moe.experts.192.w2", "model.layers.51.block_sparse_moe.experts.193.w2", "model.layers.51.block_sparse_moe.experts.194.w2", "model.layers.51.block_sparse_moe.experts.195.w2", "model.layers.51.block_sparse_moe.experts.196.w2", "model.layers.51.block_sparse_moe.experts.197.w2", "model.layers.51.block_sparse_moe.experts.198.w2", "model.layers.51.block_sparse_moe.experts.199.w2", "model.layers.51.block_sparse_moe.experts.200.w2", "model.layers.51.block_sparse_moe.experts.201.w2", "model.layers.51.block_sparse_moe.experts.202.w2", "model.layers.51.block_sparse_moe.experts.203.w2", "model.layers.51.block_sparse_moe.experts.204.w2", "model.layers.51.block_sparse_moe.experts.205.w2", "model.layers.51.block_sparse_moe.experts.206.w2", "model.layers.51.block_sparse_moe.experts.207.w2", "model.layers.51.block_sparse_moe.experts.208.w2", "model.layers.51.block_sparse_moe.experts.209.w2", "model.layers.51.block_sparse_moe.experts.210.w2", "model.layers.51.block_sparse_moe.experts.211.w2", "model.layers.51.block_sparse_moe.experts.212.w2", "model.layers.51.block_sparse_moe.experts.213.w2", "model.layers.51.block_sparse_moe.experts.214.w2", "model.layers.51.block_sparse_moe.experts.215.w2", "model.layers.51.block_sparse_moe.experts.216.w2", "model.layers.51.block_sparse_moe.experts.217.w2", "model.layers.51.block_sparse_moe.experts.218.w2", "model.layers.51.block_sparse_moe.experts.219.w2", "model.layers.51.block_sparse_moe.experts.220.w2", "model.layers.51.block_sparse_moe.experts.221.w2", "model.layers.51.block_sparse_moe.experts.222.w2", "model.layers.51.block_sparse_moe.experts.223.w2", "model.layers.51.block_sparse_moe.experts.224.w2", "model.layers.51.block_sparse_moe.experts.225.w2", "model.layers.51.block_sparse_moe.experts.226.w2", "model.layers.51.block_sparse_moe.experts.227.w2", "model.layers.51.block_sparse_moe.experts.228.w2", "model.layers.51.block_sparse_moe.experts.229.w2", "model.layers.51.block_sparse_moe.experts.230.w2", "model.layers.51.block_sparse_moe.experts.231.w2", "model.layers.51.block_sparse_moe.experts.232.w2", "model.layers.51.block_sparse_moe.experts.233.w2", "model.layers.51.block_sparse_moe.experts.234.w2", "model.layers.51.block_sparse_moe.experts.235.w2", "model.layers.51.block_sparse_moe.experts.236.w2", "model.layers.51.block_sparse_moe.experts.237.w2", "model.layers.51.block_sparse_moe.experts.238.w2", "model.layers.51.block_sparse_moe.experts.239.w2", "model.layers.51.block_sparse_moe.experts.240.w2", "model.layers.51.block_sparse_moe.experts.241.w2", "model.layers.51.block_sparse_moe.experts.242.w2", "model.layers.51.block_sparse_moe.experts.243.w2", "model.layers.51.block_sparse_moe.experts.244.w2", "model.layers.51.block_sparse_moe.experts.245.w2", "model.layers.51.block_sparse_moe.experts.246.w2", "model.layers.51.block_sparse_moe.experts.247.w2", "model.layers.51.block_sparse_moe.experts.248.w2", "model.layers.51.block_sparse_moe.experts.249.w2", "model.layers.51.block_sparse_moe.experts.250.w2", "model.layers.51.block_sparse_moe.experts.251.w2", "model.layers.51.block_sparse_moe.experts.252.w2", "model.layers.51.block_sparse_moe.experts.253.w2", "model.layers.51.block_sparse_moe.experts.254.w2", "model.layers.51.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -7.488355040552452e-05, "dbits": 1207959552 } ] }, { "idx": 260, "layers": [ "model.layers.52.self_attn.q_proj" ], "candidates": [ { "dkld": 0.00012368187308309242, "dbits": 18874368 } ] }, { "idx": 261, "layers": [ "model.layers.52.self_attn.k_proj", "model.layers.52.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0012703761458396912, "dbits": 6291456 } ] }, { "idx": 262, "layers": [ "model.layers.52.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0002754785120487324, "dbits": 18874368 } ] }, { "idx": 263, "layers": [ "model.layers.52.block_sparse_moe.experts.0.w1", "model.layers.52.block_sparse_moe.experts.1.w1", "model.layers.52.block_sparse_moe.experts.2.w1", "model.layers.52.block_sparse_moe.experts.3.w1", "model.layers.52.block_sparse_moe.experts.4.w1", "model.layers.52.block_sparse_moe.experts.5.w1", "model.layers.52.block_sparse_moe.experts.6.w1", "model.layers.52.block_sparse_moe.experts.7.w1", "model.layers.52.block_sparse_moe.experts.8.w1", "model.layers.52.block_sparse_moe.experts.9.w1", "model.layers.52.block_sparse_moe.experts.10.w1", "model.layers.52.block_sparse_moe.experts.11.w1", "model.layers.52.block_sparse_moe.experts.12.w1", "model.layers.52.block_sparse_moe.experts.13.w1", "model.layers.52.block_sparse_moe.experts.14.w1", "model.layers.52.block_sparse_moe.experts.15.w1", "model.layers.52.block_sparse_moe.experts.16.w1", "model.layers.52.block_sparse_moe.experts.17.w1", "model.layers.52.block_sparse_moe.experts.18.w1", "model.layers.52.block_sparse_moe.experts.19.w1", "model.layers.52.block_sparse_moe.experts.20.w1", "model.layers.52.block_sparse_moe.experts.21.w1", "model.layers.52.block_sparse_moe.experts.22.w1", "model.layers.52.block_sparse_moe.experts.23.w1", "model.layers.52.block_sparse_moe.experts.24.w1", "model.layers.52.block_sparse_moe.experts.25.w1", "model.layers.52.block_sparse_moe.experts.26.w1", "model.layers.52.block_sparse_moe.experts.27.w1", "model.layers.52.block_sparse_moe.experts.28.w1", "model.layers.52.block_sparse_moe.experts.29.w1", "model.layers.52.block_sparse_moe.experts.30.w1", "model.layers.52.block_sparse_moe.experts.31.w1", "model.layers.52.block_sparse_moe.experts.32.w1", "model.layers.52.block_sparse_moe.experts.33.w1", "model.layers.52.block_sparse_moe.experts.34.w1", "model.layers.52.block_sparse_moe.experts.35.w1", "model.layers.52.block_sparse_moe.experts.36.w1", "model.layers.52.block_sparse_moe.experts.37.w1", "model.layers.52.block_sparse_moe.experts.38.w1", "model.layers.52.block_sparse_moe.experts.39.w1", "model.layers.52.block_sparse_moe.experts.40.w1", "model.layers.52.block_sparse_moe.experts.41.w1", "model.layers.52.block_sparse_moe.experts.42.w1", "model.layers.52.block_sparse_moe.experts.43.w1", "model.layers.52.block_sparse_moe.experts.44.w1", "model.layers.52.block_sparse_moe.experts.45.w1", "model.layers.52.block_sparse_moe.experts.46.w1", "model.layers.52.block_sparse_moe.experts.47.w1", "model.layers.52.block_sparse_moe.experts.48.w1", "model.layers.52.block_sparse_moe.experts.49.w1", "model.layers.52.block_sparse_moe.experts.50.w1", "model.layers.52.block_sparse_moe.experts.51.w1", "model.layers.52.block_sparse_moe.experts.52.w1", "model.layers.52.block_sparse_moe.experts.53.w1", "model.layers.52.block_sparse_moe.experts.54.w1", "model.layers.52.block_sparse_moe.experts.55.w1", "model.layers.52.block_sparse_moe.experts.56.w1", "model.layers.52.block_sparse_moe.experts.57.w1", "model.layers.52.block_sparse_moe.experts.58.w1", "model.layers.52.block_sparse_moe.experts.59.w1", "model.layers.52.block_sparse_moe.experts.60.w1", "model.layers.52.block_sparse_moe.experts.61.w1", "model.layers.52.block_sparse_moe.experts.62.w1", "model.layers.52.block_sparse_moe.experts.63.w1", "model.layers.52.block_sparse_moe.experts.64.w1", "model.layers.52.block_sparse_moe.experts.65.w1", "model.layers.52.block_sparse_moe.experts.66.w1", "model.layers.52.block_sparse_moe.experts.67.w1", "model.layers.52.block_sparse_moe.experts.68.w1", "model.layers.52.block_sparse_moe.experts.69.w1", "model.layers.52.block_sparse_moe.experts.70.w1", "model.layers.52.block_sparse_moe.experts.71.w1", "model.layers.52.block_sparse_moe.experts.72.w1", "model.layers.52.block_sparse_moe.experts.73.w1", "model.layers.52.block_sparse_moe.experts.74.w1", "model.layers.52.block_sparse_moe.experts.75.w1", "model.layers.52.block_sparse_moe.experts.76.w1", "model.layers.52.block_sparse_moe.experts.77.w1", "model.layers.52.block_sparse_moe.experts.78.w1", "model.layers.52.block_sparse_moe.experts.79.w1", "model.layers.52.block_sparse_moe.experts.80.w1", "model.layers.52.block_sparse_moe.experts.81.w1", "model.layers.52.block_sparse_moe.experts.82.w1", "model.layers.52.block_sparse_moe.experts.83.w1", "model.layers.52.block_sparse_moe.experts.84.w1", "model.layers.52.block_sparse_moe.experts.85.w1", "model.layers.52.block_sparse_moe.experts.86.w1", "model.layers.52.block_sparse_moe.experts.87.w1", "model.layers.52.block_sparse_moe.experts.88.w1", "model.layers.52.block_sparse_moe.experts.89.w1", "model.layers.52.block_sparse_moe.experts.90.w1", "model.layers.52.block_sparse_moe.experts.91.w1", "model.layers.52.block_sparse_moe.experts.92.w1", "model.layers.52.block_sparse_moe.experts.93.w1", "model.layers.52.block_sparse_moe.experts.94.w1", "model.layers.52.block_sparse_moe.experts.95.w1", "model.layers.52.block_sparse_moe.experts.96.w1", "model.layers.52.block_sparse_moe.experts.97.w1", "model.layers.52.block_sparse_moe.experts.98.w1", "model.layers.52.block_sparse_moe.experts.99.w1", "model.layers.52.block_sparse_moe.experts.100.w1", "model.layers.52.block_sparse_moe.experts.101.w1", "model.layers.52.block_sparse_moe.experts.102.w1", "model.layers.52.block_sparse_moe.experts.103.w1", "model.layers.52.block_sparse_moe.experts.104.w1", "model.layers.52.block_sparse_moe.experts.105.w1", "model.layers.52.block_sparse_moe.experts.106.w1", "model.layers.52.block_sparse_moe.experts.107.w1", "model.layers.52.block_sparse_moe.experts.108.w1", "model.layers.52.block_sparse_moe.experts.109.w1", "model.layers.52.block_sparse_moe.experts.110.w1", "model.layers.52.block_sparse_moe.experts.111.w1", "model.layers.52.block_sparse_moe.experts.112.w1", "model.layers.52.block_sparse_moe.experts.113.w1", "model.layers.52.block_sparse_moe.experts.114.w1", "model.layers.52.block_sparse_moe.experts.115.w1", "model.layers.52.block_sparse_moe.experts.116.w1", "model.layers.52.block_sparse_moe.experts.117.w1", "model.layers.52.block_sparse_moe.experts.118.w1", "model.layers.52.block_sparse_moe.experts.119.w1", "model.layers.52.block_sparse_moe.experts.120.w1", "model.layers.52.block_sparse_moe.experts.121.w1", "model.layers.52.block_sparse_moe.experts.122.w1", "model.layers.52.block_sparse_moe.experts.123.w1", "model.layers.52.block_sparse_moe.experts.124.w1", "model.layers.52.block_sparse_moe.experts.125.w1", "model.layers.52.block_sparse_moe.experts.126.w1", "model.layers.52.block_sparse_moe.experts.127.w1", "model.layers.52.block_sparse_moe.experts.128.w1", "model.layers.52.block_sparse_moe.experts.129.w1", "model.layers.52.block_sparse_moe.experts.130.w1", "model.layers.52.block_sparse_moe.experts.131.w1", "model.layers.52.block_sparse_moe.experts.132.w1", "model.layers.52.block_sparse_moe.experts.133.w1", "model.layers.52.block_sparse_moe.experts.134.w1", "model.layers.52.block_sparse_moe.experts.135.w1", "model.layers.52.block_sparse_moe.experts.136.w1", "model.layers.52.block_sparse_moe.experts.137.w1", "model.layers.52.block_sparse_moe.experts.138.w1", "model.layers.52.block_sparse_moe.experts.139.w1", "model.layers.52.block_sparse_moe.experts.140.w1", "model.layers.52.block_sparse_moe.experts.141.w1", "model.layers.52.block_sparse_moe.experts.142.w1", "model.layers.52.block_sparse_moe.experts.143.w1", "model.layers.52.block_sparse_moe.experts.144.w1", "model.layers.52.block_sparse_moe.experts.145.w1", "model.layers.52.block_sparse_moe.experts.146.w1", "model.layers.52.block_sparse_moe.experts.147.w1", "model.layers.52.block_sparse_moe.experts.148.w1", "model.layers.52.block_sparse_moe.experts.149.w1", "model.layers.52.block_sparse_moe.experts.150.w1", "model.layers.52.block_sparse_moe.experts.151.w1", "model.layers.52.block_sparse_moe.experts.152.w1", "model.layers.52.block_sparse_moe.experts.153.w1", "model.layers.52.block_sparse_moe.experts.154.w1", "model.layers.52.block_sparse_moe.experts.155.w1", "model.layers.52.block_sparse_moe.experts.156.w1", "model.layers.52.block_sparse_moe.experts.157.w1", "model.layers.52.block_sparse_moe.experts.158.w1", "model.layers.52.block_sparse_moe.experts.159.w1", "model.layers.52.block_sparse_moe.experts.160.w1", "model.layers.52.block_sparse_moe.experts.161.w1", "model.layers.52.block_sparse_moe.experts.162.w1", "model.layers.52.block_sparse_moe.experts.163.w1", "model.layers.52.block_sparse_moe.experts.164.w1", "model.layers.52.block_sparse_moe.experts.165.w1", "model.layers.52.block_sparse_moe.experts.166.w1", "model.layers.52.block_sparse_moe.experts.167.w1", "model.layers.52.block_sparse_moe.experts.168.w1", "model.layers.52.block_sparse_moe.experts.169.w1", "model.layers.52.block_sparse_moe.experts.170.w1", "model.layers.52.block_sparse_moe.experts.171.w1", "model.layers.52.block_sparse_moe.experts.172.w1", "model.layers.52.block_sparse_moe.experts.173.w1", "model.layers.52.block_sparse_moe.experts.174.w1", "model.layers.52.block_sparse_moe.experts.175.w1", "model.layers.52.block_sparse_moe.experts.176.w1", "model.layers.52.block_sparse_moe.experts.177.w1", "model.layers.52.block_sparse_moe.experts.178.w1", "model.layers.52.block_sparse_moe.experts.179.w1", "model.layers.52.block_sparse_moe.experts.180.w1", "model.layers.52.block_sparse_moe.experts.181.w1", "model.layers.52.block_sparse_moe.experts.182.w1", "model.layers.52.block_sparse_moe.experts.183.w1", "model.layers.52.block_sparse_moe.experts.184.w1", "model.layers.52.block_sparse_moe.experts.185.w1", "model.layers.52.block_sparse_moe.experts.186.w1", "model.layers.52.block_sparse_moe.experts.187.w1", "model.layers.52.block_sparse_moe.experts.188.w1", "model.layers.52.block_sparse_moe.experts.189.w1", "model.layers.52.block_sparse_moe.experts.190.w1", "model.layers.52.block_sparse_moe.experts.191.w1", "model.layers.52.block_sparse_moe.experts.192.w1", "model.layers.52.block_sparse_moe.experts.193.w1", "model.layers.52.block_sparse_moe.experts.194.w1", "model.layers.52.block_sparse_moe.experts.195.w1", "model.layers.52.block_sparse_moe.experts.196.w1", "model.layers.52.block_sparse_moe.experts.197.w1", "model.layers.52.block_sparse_moe.experts.198.w1", "model.layers.52.block_sparse_moe.experts.199.w1", "model.layers.52.block_sparse_moe.experts.200.w1", "model.layers.52.block_sparse_moe.experts.201.w1", "model.layers.52.block_sparse_moe.experts.202.w1", "model.layers.52.block_sparse_moe.experts.203.w1", "model.layers.52.block_sparse_moe.experts.204.w1", "model.layers.52.block_sparse_moe.experts.205.w1", "model.layers.52.block_sparse_moe.experts.206.w1", "model.layers.52.block_sparse_moe.experts.207.w1", "model.layers.52.block_sparse_moe.experts.208.w1", "model.layers.52.block_sparse_moe.experts.209.w1", "model.layers.52.block_sparse_moe.experts.210.w1", "model.layers.52.block_sparse_moe.experts.211.w1", "model.layers.52.block_sparse_moe.experts.212.w1", "model.layers.52.block_sparse_moe.experts.213.w1", "model.layers.52.block_sparse_moe.experts.214.w1", "model.layers.52.block_sparse_moe.experts.215.w1", "model.layers.52.block_sparse_moe.experts.216.w1", "model.layers.52.block_sparse_moe.experts.217.w1", "model.layers.52.block_sparse_moe.experts.218.w1", "model.layers.52.block_sparse_moe.experts.219.w1", "model.layers.52.block_sparse_moe.experts.220.w1", "model.layers.52.block_sparse_moe.experts.221.w1", "model.layers.52.block_sparse_moe.experts.222.w1", "model.layers.52.block_sparse_moe.experts.223.w1", "model.layers.52.block_sparse_moe.experts.224.w1", "model.layers.52.block_sparse_moe.experts.225.w1", "model.layers.52.block_sparse_moe.experts.226.w1", "model.layers.52.block_sparse_moe.experts.227.w1", "model.layers.52.block_sparse_moe.experts.228.w1", "model.layers.52.block_sparse_moe.experts.229.w1", "model.layers.52.block_sparse_moe.experts.230.w1", "model.layers.52.block_sparse_moe.experts.231.w1", "model.layers.52.block_sparse_moe.experts.232.w1", "model.layers.52.block_sparse_moe.experts.233.w1", "model.layers.52.block_sparse_moe.experts.234.w1", "model.layers.52.block_sparse_moe.experts.235.w1", "model.layers.52.block_sparse_moe.experts.236.w1", "model.layers.52.block_sparse_moe.experts.237.w1", "model.layers.52.block_sparse_moe.experts.238.w1", "model.layers.52.block_sparse_moe.experts.239.w1", "model.layers.52.block_sparse_moe.experts.240.w1", "model.layers.52.block_sparse_moe.experts.241.w1", "model.layers.52.block_sparse_moe.experts.242.w1", "model.layers.52.block_sparse_moe.experts.243.w1", "model.layers.52.block_sparse_moe.experts.244.w1", "model.layers.52.block_sparse_moe.experts.245.w1", "model.layers.52.block_sparse_moe.experts.246.w1", "model.layers.52.block_sparse_moe.experts.247.w1", "model.layers.52.block_sparse_moe.experts.248.w1", "model.layers.52.block_sparse_moe.experts.249.w1", "model.layers.52.block_sparse_moe.experts.250.w1", "model.layers.52.block_sparse_moe.experts.251.w1", "model.layers.52.block_sparse_moe.experts.252.w1", "model.layers.52.block_sparse_moe.experts.253.w1", "model.layers.52.block_sparse_moe.experts.254.w1", "model.layers.52.block_sparse_moe.experts.255.w1", "model.layers.52.block_sparse_moe.experts.0.w3", "model.layers.52.block_sparse_moe.experts.1.w3", "model.layers.52.block_sparse_moe.experts.2.w3", "model.layers.52.block_sparse_moe.experts.3.w3", "model.layers.52.block_sparse_moe.experts.4.w3", "model.layers.52.block_sparse_moe.experts.5.w3", "model.layers.52.block_sparse_moe.experts.6.w3", "model.layers.52.block_sparse_moe.experts.7.w3", "model.layers.52.block_sparse_moe.experts.8.w3", "model.layers.52.block_sparse_moe.experts.9.w3", "model.layers.52.block_sparse_moe.experts.10.w3", "model.layers.52.block_sparse_moe.experts.11.w3", "model.layers.52.block_sparse_moe.experts.12.w3", "model.layers.52.block_sparse_moe.experts.13.w3", "model.layers.52.block_sparse_moe.experts.14.w3", "model.layers.52.block_sparse_moe.experts.15.w3", "model.layers.52.block_sparse_moe.experts.16.w3", "model.layers.52.block_sparse_moe.experts.17.w3", "model.layers.52.block_sparse_moe.experts.18.w3", "model.layers.52.block_sparse_moe.experts.19.w3", "model.layers.52.block_sparse_moe.experts.20.w3", "model.layers.52.block_sparse_moe.experts.21.w3", "model.layers.52.block_sparse_moe.experts.22.w3", "model.layers.52.block_sparse_moe.experts.23.w3", "model.layers.52.block_sparse_moe.experts.24.w3", "model.layers.52.block_sparse_moe.experts.25.w3", "model.layers.52.block_sparse_moe.experts.26.w3", "model.layers.52.block_sparse_moe.experts.27.w3", "model.layers.52.block_sparse_moe.experts.28.w3", "model.layers.52.block_sparse_moe.experts.29.w3", "model.layers.52.block_sparse_moe.experts.30.w3", "model.layers.52.block_sparse_moe.experts.31.w3", "model.layers.52.block_sparse_moe.experts.32.w3", "model.layers.52.block_sparse_moe.experts.33.w3", "model.layers.52.block_sparse_moe.experts.34.w3", "model.layers.52.block_sparse_moe.experts.35.w3", "model.layers.52.block_sparse_moe.experts.36.w3", "model.layers.52.block_sparse_moe.experts.37.w3", "model.layers.52.block_sparse_moe.experts.38.w3", "model.layers.52.block_sparse_moe.experts.39.w3", "model.layers.52.block_sparse_moe.experts.40.w3", "model.layers.52.block_sparse_moe.experts.41.w3", "model.layers.52.block_sparse_moe.experts.42.w3", "model.layers.52.block_sparse_moe.experts.43.w3", "model.layers.52.block_sparse_moe.experts.44.w3", "model.layers.52.block_sparse_moe.experts.45.w3", "model.layers.52.block_sparse_moe.experts.46.w3", "model.layers.52.block_sparse_moe.experts.47.w3", "model.layers.52.block_sparse_moe.experts.48.w3", "model.layers.52.block_sparse_moe.experts.49.w3", "model.layers.52.block_sparse_moe.experts.50.w3", "model.layers.52.block_sparse_moe.experts.51.w3", "model.layers.52.block_sparse_moe.experts.52.w3", "model.layers.52.block_sparse_moe.experts.53.w3", "model.layers.52.block_sparse_moe.experts.54.w3", "model.layers.52.block_sparse_moe.experts.55.w3", "model.layers.52.block_sparse_moe.experts.56.w3", "model.layers.52.block_sparse_moe.experts.57.w3", "model.layers.52.block_sparse_moe.experts.58.w3", "model.layers.52.block_sparse_moe.experts.59.w3", "model.layers.52.block_sparse_moe.experts.60.w3", "model.layers.52.block_sparse_moe.experts.61.w3", "model.layers.52.block_sparse_moe.experts.62.w3", "model.layers.52.block_sparse_moe.experts.63.w3", "model.layers.52.block_sparse_moe.experts.64.w3", "model.layers.52.block_sparse_moe.experts.65.w3", "model.layers.52.block_sparse_moe.experts.66.w3", "model.layers.52.block_sparse_moe.experts.67.w3", "model.layers.52.block_sparse_moe.experts.68.w3", "model.layers.52.block_sparse_moe.experts.69.w3", "model.layers.52.block_sparse_moe.experts.70.w3", "model.layers.52.block_sparse_moe.experts.71.w3", "model.layers.52.block_sparse_moe.experts.72.w3", "model.layers.52.block_sparse_moe.experts.73.w3", "model.layers.52.block_sparse_moe.experts.74.w3", "model.layers.52.block_sparse_moe.experts.75.w3", "model.layers.52.block_sparse_moe.experts.76.w3", "model.layers.52.block_sparse_moe.experts.77.w3", "model.layers.52.block_sparse_moe.experts.78.w3", "model.layers.52.block_sparse_moe.experts.79.w3", "model.layers.52.block_sparse_moe.experts.80.w3", "model.layers.52.block_sparse_moe.experts.81.w3", "model.layers.52.block_sparse_moe.experts.82.w3", "model.layers.52.block_sparse_moe.experts.83.w3", "model.layers.52.block_sparse_moe.experts.84.w3", "model.layers.52.block_sparse_moe.experts.85.w3", "model.layers.52.block_sparse_moe.experts.86.w3", "model.layers.52.block_sparse_moe.experts.87.w3", "model.layers.52.block_sparse_moe.experts.88.w3", "model.layers.52.block_sparse_moe.experts.89.w3", "model.layers.52.block_sparse_moe.experts.90.w3", "model.layers.52.block_sparse_moe.experts.91.w3", "model.layers.52.block_sparse_moe.experts.92.w3", "model.layers.52.block_sparse_moe.experts.93.w3", "model.layers.52.block_sparse_moe.experts.94.w3", "model.layers.52.block_sparse_moe.experts.95.w3", "model.layers.52.block_sparse_moe.experts.96.w3", "model.layers.52.block_sparse_moe.experts.97.w3", "model.layers.52.block_sparse_moe.experts.98.w3", "model.layers.52.block_sparse_moe.experts.99.w3", "model.layers.52.block_sparse_moe.experts.100.w3", "model.layers.52.block_sparse_moe.experts.101.w3", "model.layers.52.block_sparse_moe.experts.102.w3", "model.layers.52.block_sparse_moe.experts.103.w3", "model.layers.52.block_sparse_moe.experts.104.w3", "model.layers.52.block_sparse_moe.experts.105.w3", "model.layers.52.block_sparse_moe.experts.106.w3", "model.layers.52.block_sparse_moe.experts.107.w3", "model.layers.52.block_sparse_moe.experts.108.w3", "model.layers.52.block_sparse_moe.experts.109.w3", "model.layers.52.block_sparse_moe.experts.110.w3", "model.layers.52.block_sparse_moe.experts.111.w3", "model.layers.52.block_sparse_moe.experts.112.w3", "model.layers.52.block_sparse_moe.experts.113.w3", "model.layers.52.block_sparse_moe.experts.114.w3", "model.layers.52.block_sparse_moe.experts.115.w3", "model.layers.52.block_sparse_moe.experts.116.w3", "model.layers.52.block_sparse_moe.experts.117.w3", "model.layers.52.block_sparse_moe.experts.118.w3", "model.layers.52.block_sparse_moe.experts.119.w3", "model.layers.52.block_sparse_moe.experts.120.w3", "model.layers.52.block_sparse_moe.experts.121.w3", "model.layers.52.block_sparse_moe.experts.122.w3", "model.layers.52.block_sparse_moe.experts.123.w3", "model.layers.52.block_sparse_moe.experts.124.w3", "model.layers.52.block_sparse_moe.experts.125.w3", "model.layers.52.block_sparse_moe.experts.126.w3", "model.layers.52.block_sparse_moe.experts.127.w3", "model.layers.52.block_sparse_moe.experts.128.w3", "model.layers.52.block_sparse_moe.experts.129.w3", "model.layers.52.block_sparse_moe.experts.130.w3", "model.layers.52.block_sparse_moe.experts.131.w3", "model.layers.52.block_sparse_moe.experts.132.w3", "model.layers.52.block_sparse_moe.experts.133.w3", "model.layers.52.block_sparse_moe.experts.134.w3", "model.layers.52.block_sparse_moe.experts.135.w3", "model.layers.52.block_sparse_moe.experts.136.w3", "model.layers.52.block_sparse_moe.experts.137.w3", "model.layers.52.block_sparse_moe.experts.138.w3", "model.layers.52.block_sparse_moe.experts.139.w3", "model.layers.52.block_sparse_moe.experts.140.w3", "model.layers.52.block_sparse_moe.experts.141.w3", "model.layers.52.block_sparse_moe.experts.142.w3", "model.layers.52.block_sparse_moe.experts.143.w3", "model.layers.52.block_sparse_moe.experts.144.w3", "model.layers.52.block_sparse_moe.experts.145.w3", "model.layers.52.block_sparse_moe.experts.146.w3", "model.layers.52.block_sparse_moe.experts.147.w3", "model.layers.52.block_sparse_moe.experts.148.w3", "model.layers.52.block_sparse_moe.experts.149.w3", "model.layers.52.block_sparse_moe.experts.150.w3", "model.layers.52.block_sparse_moe.experts.151.w3", "model.layers.52.block_sparse_moe.experts.152.w3", "model.layers.52.block_sparse_moe.experts.153.w3", "model.layers.52.block_sparse_moe.experts.154.w3", "model.layers.52.block_sparse_moe.experts.155.w3", "model.layers.52.block_sparse_moe.experts.156.w3", "model.layers.52.block_sparse_moe.experts.157.w3", "model.layers.52.block_sparse_moe.experts.158.w3", "model.layers.52.block_sparse_moe.experts.159.w3", "model.layers.52.block_sparse_moe.experts.160.w3", "model.layers.52.block_sparse_moe.experts.161.w3", "model.layers.52.block_sparse_moe.experts.162.w3", "model.layers.52.block_sparse_moe.experts.163.w3", "model.layers.52.block_sparse_moe.experts.164.w3", "model.layers.52.block_sparse_moe.experts.165.w3", "model.layers.52.block_sparse_moe.experts.166.w3", "model.layers.52.block_sparse_moe.experts.167.w3", "model.layers.52.block_sparse_moe.experts.168.w3", "model.layers.52.block_sparse_moe.experts.169.w3", "model.layers.52.block_sparse_moe.experts.170.w3", "model.layers.52.block_sparse_moe.experts.171.w3", "model.layers.52.block_sparse_moe.experts.172.w3", "model.layers.52.block_sparse_moe.experts.173.w3", "model.layers.52.block_sparse_moe.experts.174.w3", "model.layers.52.block_sparse_moe.experts.175.w3", "model.layers.52.block_sparse_moe.experts.176.w3", "model.layers.52.block_sparse_moe.experts.177.w3", "model.layers.52.block_sparse_moe.experts.178.w3", "model.layers.52.block_sparse_moe.experts.179.w3", "model.layers.52.block_sparse_moe.experts.180.w3", "model.layers.52.block_sparse_moe.experts.181.w3", "model.layers.52.block_sparse_moe.experts.182.w3", "model.layers.52.block_sparse_moe.experts.183.w3", "model.layers.52.block_sparse_moe.experts.184.w3", "model.layers.52.block_sparse_moe.experts.185.w3", "model.layers.52.block_sparse_moe.experts.186.w3", "model.layers.52.block_sparse_moe.experts.187.w3", "model.layers.52.block_sparse_moe.experts.188.w3", "model.layers.52.block_sparse_moe.experts.189.w3", "model.layers.52.block_sparse_moe.experts.190.w3", "model.layers.52.block_sparse_moe.experts.191.w3", "model.layers.52.block_sparse_moe.experts.192.w3", "model.layers.52.block_sparse_moe.experts.193.w3", "model.layers.52.block_sparse_moe.experts.194.w3", "model.layers.52.block_sparse_moe.experts.195.w3", "model.layers.52.block_sparse_moe.experts.196.w3", "model.layers.52.block_sparse_moe.experts.197.w3", "model.layers.52.block_sparse_moe.experts.198.w3", "model.layers.52.block_sparse_moe.experts.199.w3", "model.layers.52.block_sparse_moe.experts.200.w3", "model.layers.52.block_sparse_moe.experts.201.w3", "model.layers.52.block_sparse_moe.experts.202.w3", "model.layers.52.block_sparse_moe.experts.203.w3", "model.layers.52.block_sparse_moe.experts.204.w3", "model.layers.52.block_sparse_moe.experts.205.w3", "model.layers.52.block_sparse_moe.experts.206.w3", "model.layers.52.block_sparse_moe.experts.207.w3", "model.layers.52.block_sparse_moe.experts.208.w3", "model.layers.52.block_sparse_moe.experts.209.w3", "model.layers.52.block_sparse_moe.experts.210.w3", "model.layers.52.block_sparse_moe.experts.211.w3", "model.layers.52.block_sparse_moe.experts.212.w3", "model.layers.52.block_sparse_moe.experts.213.w3", "model.layers.52.block_sparse_moe.experts.214.w3", "model.layers.52.block_sparse_moe.experts.215.w3", "model.layers.52.block_sparse_moe.experts.216.w3", "model.layers.52.block_sparse_moe.experts.217.w3", "model.layers.52.block_sparse_moe.experts.218.w3", "model.layers.52.block_sparse_moe.experts.219.w3", "model.layers.52.block_sparse_moe.experts.220.w3", "model.layers.52.block_sparse_moe.experts.221.w3", "model.layers.52.block_sparse_moe.experts.222.w3", "model.layers.52.block_sparse_moe.experts.223.w3", "model.layers.52.block_sparse_moe.experts.224.w3", "model.layers.52.block_sparse_moe.experts.225.w3", "model.layers.52.block_sparse_moe.experts.226.w3", "model.layers.52.block_sparse_moe.experts.227.w3", "model.layers.52.block_sparse_moe.experts.228.w3", "model.layers.52.block_sparse_moe.experts.229.w3", "model.layers.52.block_sparse_moe.experts.230.w3", "model.layers.52.block_sparse_moe.experts.231.w3", "model.layers.52.block_sparse_moe.experts.232.w3", "model.layers.52.block_sparse_moe.experts.233.w3", "model.layers.52.block_sparse_moe.experts.234.w3", "model.layers.52.block_sparse_moe.experts.235.w3", "model.layers.52.block_sparse_moe.experts.236.w3", "model.layers.52.block_sparse_moe.experts.237.w3", "model.layers.52.block_sparse_moe.experts.238.w3", "model.layers.52.block_sparse_moe.experts.239.w3", "model.layers.52.block_sparse_moe.experts.240.w3", "model.layers.52.block_sparse_moe.experts.241.w3", "model.layers.52.block_sparse_moe.experts.242.w3", "model.layers.52.block_sparse_moe.experts.243.w3", "model.layers.52.block_sparse_moe.experts.244.w3", "model.layers.52.block_sparse_moe.experts.245.w3", "model.layers.52.block_sparse_moe.experts.246.w3", "model.layers.52.block_sparse_moe.experts.247.w3", "model.layers.52.block_sparse_moe.experts.248.w3", "model.layers.52.block_sparse_moe.experts.249.w3", "model.layers.52.block_sparse_moe.experts.250.w3", "model.layers.52.block_sparse_moe.experts.251.w3", "model.layers.52.block_sparse_moe.experts.252.w3", "model.layers.52.block_sparse_moe.experts.253.w3", "model.layers.52.block_sparse_moe.experts.254.w3", "model.layers.52.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0002941478043794743, "dbits": 2415919104 } ] }, { "idx": 264, "layers": [ "model.layers.52.block_sparse_moe.experts.0.w2", "model.layers.52.block_sparse_moe.experts.1.w2", "model.layers.52.block_sparse_moe.experts.2.w2", "model.layers.52.block_sparse_moe.experts.3.w2", "model.layers.52.block_sparse_moe.experts.4.w2", "model.layers.52.block_sparse_moe.experts.5.w2", "model.layers.52.block_sparse_moe.experts.6.w2", "model.layers.52.block_sparse_moe.experts.7.w2", "model.layers.52.block_sparse_moe.experts.8.w2", "model.layers.52.block_sparse_moe.experts.9.w2", "model.layers.52.block_sparse_moe.experts.10.w2", "model.layers.52.block_sparse_moe.experts.11.w2", "model.layers.52.block_sparse_moe.experts.12.w2", "model.layers.52.block_sparse_moe.experts.13.w2", "model.layers.52.block_sparse_moe.experts.14.w2", "model.layers.52.block_sparse_moe.experts.15.w2", "model.layers.52.block_sparse_moe.experts.16.w2", "model.layers.52.block_sparse_moe.experts.17.w2", "model.layers.52.block_sparse_moe.experts.18.w2", "model.layers.52.block_sparse_moe.experts.19.w2", "model.layers.52.block_sparse_moe.experts.20.w2", "model.layers.52.block_sparse_moe.experts.21.w2", "model.layers.52.block_sparse_moe.experts.22.w2", "model.layers.52.block_sparse_moe.experts.23.w2", "model.layers.52.block_sparse_moe.experts.24.w2", "model.layers.52.block_sparse_moe.experts.25.w2", "model.layers.52.block_sparse_moe.experts.26.w2", "model.layers.52.block_sparse_moe.experts.27.w2", "model.layers.52.block_sparse_moe.experts.28.w2", "model.layers.52.block_sparse_moe.experts.29.w2", "model.layers.52.block_sparse_moe.experts.30.w2", "model.layers.52.block_sparse_moe.experts.31.w2", "model.layers.52.block_sparse_moe.experts.32.w2", "model.layers.52.block_sparse_moe.experts.33.w2", "model.layers.52.block_sparse_moe.experts.34.w2", "model.layers.52.block_sparse_moe.experts.35.w2", "model.layers.52.block_sparse_moe.experts.36.w2", "model.layers.52.block_sparse_moe.experts.37.w2", "model.layers.52.block_sparse_moe.experts.38.w2", "model.layers.52.block_sparse_moe.experts.39.w2", "model.layers.52.block_sparse_moe.experts.40.w2", "model.layers.52.block_sparse_moe.experts.41.w2", "model.layers.52.block_sparse_moe.experts.42.w2", "model.layers.52.block_sparse_moe.experts.43.w2", "model.layers.52.block_sparse_moe.experts.44.w2", "model.layers.52.block_sparse_moe.experts.45.w2", "model.layers.52.block_sparse_moe.experts.46.w2", "model.layers.52.block_sparse_moe.experts.47.w2", "model.layers.52.block_sparse_moe.experts.48.w2", "model.layers.52.block_sparse_moe.experts.49.w2", "model.layers.52.block_sparse_moe.experts.50.w2", "model.layers.52.block_sparse_moe.experts.51.w2", "model.layers.52.block_sparse_moe.experts.52.w2", "model.layers.52.block_sparse_moe.experts.53.w2", "model.layers.52.block_sparse_moe.experts.54.w2", "model.layers.52.block_sparse_moe.experts.55.w2", "model.layers.52.block_sparse_moe.experts.56.w2", "model.layers.52.block_sparse_moe.experts.57.w2", "model.layers.52.block_sparse_moe.experts.58.w2", "model.layers.52.block_sparse_moe.experts.59.w2", "model.layers.52.block_sparse_moe.experts.60.w2", "model.layers.52.block_sparse_moe.experts.61.w2", "model.layers.52.block_sparse_moe.experts.62.w2", "model.layers.52.block_sparse_moe.experts.63.w2", "model.layers.52.block_sparse_moe.experts.64.w2", "model.layers.52.block_sparse_moe.experts.65.w2", "model.layers.52.block_sparse_moe.experts.66.w2", "model.layers.52.block_sparse_moe.experts.67.w2", "model.layers.52.block_sparse_moe.experts.68.w2", "model.layers.52.block_sparse_moe.experts.69.w2", "model.layers.52.block_sparse_moe.experts.70.w2", "model.layers.52.block_sparse_moe.experts.71.w2", "model.layers.52.block_sparse_moe.experts.72.w2", "model.layers.52.block_sparse_moe.experts.73.w2", "model.layers.52.block_sparse_moe.experts.74.w2", "model.layers.52.block_sparse_moe.experts.75.w2", "model.layers.52.block_sparse_moe.experts.76.w2", "model.layers.52.block_sparse_moe.experts.77.w2", "model.layers.52.block_sparse_moe.experts.78.w2", "model.layers.52.block_sparse_moe.experts.79.w2", "model.layers.52.block_sparse_moe.experts.80.w2", "model.layers.52.block_sparse_moe.experts.81.w2", "model.layers.52.block_sparse_moe.experts.82.w2", "model.layers.52.block_sparse_moe.experts.83.w2", "model.layers.52.block_sparse_moe.experts.84.w2", "model.layers.52.block_sparse_moe.experts.85.w2", "model.layers.52.block_sparse_moe.experts.86.w2", "model.layers.52.block_sparse_moe.experts.87.w2", "model.layers.52.block_sparse_moe.experts.88.w2", "model.layers.52.block_sparse_moe.experts.89.w2", "model.layers.52.block_sparse_moe.experts.90.w2", "model.layers.52.block_sparse_moe.experts.91.w2", "model.layers.52.block_sparse_moe.experts.92.w2", "model.layers.52.block_sparse_moe.experts.93.w2", "model.layers.52.block_sparse_moe.experts.94.w2", "model.layers.52.block_sparse_moe.experts.95.w2", "model.layers.52.block_sparse_moe.experts.96.w2", "model.layers.52.block_sparse_moe.experts.97.w2", "model.layers.52.block_sparse_moe.experts.98.w2", "model.layers.52.block_sparse_moe.experts.99.w2", "model.layers.52.block_sparse_moe.experts.100.w2", "model.layers.52.block_sparse_moe.experts.101.w2", "model.layers.52.block_sparse_moe.experts.102.w2", "model.layers.52.block_sparse_moe.experts.103.w2", "model.layers.52.block_sparse_moe.experts.104.w2", "model.layers.52.block_sparse_moe.experts.105.w2", "model.layers.52.block_sparse_moe.experts.106.w2", "model.layers.52.block_sparse_moe.experts.107.w2", "model.layers.52.block_sparse_moe.experts.108.w2", "model.layers.52.block_sparse_moe.experts.109.w2", "model.layers.52.block_sparse_moe.experts.110.w2", "model.layers.52.block_sparse_moe.experts.111.w2", "model.layers.52.block_sparse_moe.experts.112.w2", "model.layers.52.block_sparse_moe.experts.113.w2", "model.layers.52.block_sparse_moe.experts.114.w2", "model.layers.52.block_sparse_moe.experts.115.w2", "model.layers.52.block_sparse_moe.experts.116.w2", "model.layers.52.block_sparse_moe.experts.117.w2", "model.layers.52.block_sparse_moe.experts.118.w2", "model.layers.52.block_sparse_moe.experts.119.w2", "model.layers.52.block_sparse_moe.experts.120.w2", "model.layers.52.block_sparse_moe.experts.121.w2", "model.layers.52.block_sparse_moe.experts.122.w2", "model.layers.52.block_sparse_moe.experts.123.w2", "model.layers.52.block_sparse_moe.experts.124.w2", "model.layers.52.block_sparse_moe.experts.125.w2", "model.layers.52.block_sparse_moe.experts.126.w2", "model.layers.52.block_sparse_moe.experts.127.w2", "model.layers.52.block_sparse_moe.experts.128.w2", "model.layers.52.block_sparse_moe.experts.129.w2", "model.layers.52.block_sparse_moe.experts.130.w2", "model.layers.52.block_sparse_moe.experts.131.w2", "model.layers.52.block_sparse_moe.experts.132.w2", "model.layers.52.block_sparse_moe.experts.133.w2", "model.layers.52.block_sparse_moe.experts.134.w2", "model.layers.52.block_sparse_moe.experts.135.w2", "model.layers.52.block_sparse_moe.experts.136.w2", "model.layers.52.block_sparse_moe.experts.137.w2", "model.layers.52.block_sparse_moe.experts.138.w2", "model.layers.52.block_sparse_moe.experts.139.w2", "model.layers.52.block_sparse_moe.experts.140.w2", "model.layers.52.block_sparse_moe.experts.141.w2", "model.layers.52.block_sparse_moe.experts.142.w2", "model.layers.52.block_sparse_moe.experts.143.w2", "model.layers.52.block_sparse_moe.experts.144.w2", "model.layers.52.block_sparse_moe.experts.145.w2", "model.layers.52.block_sparse_moe.experts.146.w2", "model.layers.52.block_sparse_moe.experts.147.w2", "model.layers.52.block_sparse_moe.experts.148.w2", "model.layers.52.block_sparse_moe.experts.149.w2", "model.layers.52.block_sparse_moe.experts.150.w2", "model.layers.52.block_sparse_moe.experts.151.w2", "model.layers.52.block_sparse_moe.experts.152.w2", "model.layers.52.block_sparse_moe.experts.153.w2", "model.layers.52.block_sparse_moe.experts.154.w2", "model.layers.52.block_sparse_moe.experts.155.w2", "model.layers.52.block_sparse_moe.experts.156.w2", "model.layers.52.block_sparse_moe.experts.157.w2", "model.layers.52.block_sparse_moe.experts.158.w2", "model.layers.52.block_sparse_moe.experts.159.w2", "model.layers.52.block_sparse_moe.experts.160.w2", "model.layers.52.block_sparse_moe.experts.161.w2", "model.layers.52.block_sparse_moe.experts.162.w2", "model.layers.52.block_sparse_moe.experts.163.w2", "model.layers.52.block_sparse_moe.experts.164.w2", "model.layers.52.block_sparse_moe.experts.165.w2", "model.layers.52.block_sparse_moe.experts.166.w2", "model.layers.52.block_sparse_moe.experts.167.w2", "model.layers.52.block_sparse_moe.experts.168.w2", "model.layers.52.block_sparse_moe.experts.169.w2", "model.layers.52.block_sparse_moe.experts.170.w2", "model.layers.52.block_sparse_moe.experts.171.w2", "model.layers.52.block_sparse_moe.experts.172.w2", "model.layers.52.block_sparse_moe.experts.173.w2", "model.layers.52.block_sparse_moe.experts.174.w2", "model.layers.52.block_sparse_moe.experts.175.w2", "model.layers.52.block_sparse_moe.experts.176.w2", "model.layers.52.block_sparse_moe.experts.177.w2", "model.layers.52.block_sparse_moe.experts.178.w2", "model.layers.52.block_sparse_moe.experts.179.w2", "model.layers.52.block_sparse_moe.experts.180.w2", "model.layers.52.block_sparse_moe.experts.181.w2", "model.layers.52.block_sparse_moe.experts.182.w2", "model.layers.52.block_sparse_moe.experts.183.w2", "model.layers.52.block_sparse_moe.experts.184.w2", "model.layers.52.block_sparse_moe.experts.185.w2", "model.layers.52.block_sparse_moe.experts.186.w2", "model.layers.52.block_sparse_moe.experts.187.w2", "model.layers.52.block_sparse_moe.experts.188.w2", "model.layers.52.block_sparse_moe.experts.189.w2", "model.layers.52.block_sparse_moe.experts.190.w2", "model.layers.52.block_sparse_moe.experts.191.w2", "model.layers.52.block_sparse_moe.experts.192.w2", "model.layers.52.block_sparse_moe.experts.193.w2", "model.layers.52.block_sparse_moe.experts.194.w2", "model.layers.52.block_sparse_moe.experts.195.w2", "model.layers.52.block_sparse_moe.experts.196.w2", "model.layers.52.block_sparse_moe.experts.197.w2", "model.layers.52.block_sparse_moe.experts.198.w2", "model.layers.52.block_sparse_moe.experts.199.w2", "model.layers.52.block_sparse_moe.experts.200.w2", "model.layers.52.block_sparse_moe.experts.201.w2", "model.layers.52.block_sparse_moe.experts.202.w2", "model.layers.52.block_sparse_moe.experts.203.w2", "model.layers.52.block_sparse_moe.experts.204.w2", "model.layers.52.block_sparse_moe.experts.205.w2", "model.layers.52.block_sparse_moe.experts.206.w2", "model.layers.52.block_sparse_moe.experts.207.w2", "model.layers.52.block_sparse_moe.experts.208.w2", "model.layers.52.block_sparse_moe.experts.209.w2", "model.layers.52.block_sparse_moe.experts.210.w2", "model.layers.52.block_sparse_moe.experts.211.w2", "model.layers.52.block_sparse_moe.experts.212.w2", "model.layers.52.block_sparse_moe.experts.213.w2", "model.layers.52.block_sparse_moe.experts.214.w2", "model.layers.52.block_sparse_moe.experts.215.w2", "model.layers.52.block_sparse_moe.experts.216.w2", "model.layers.52.block_sparse_moe.experts.217.w2", "model.layers.52.block_sparse_moe.experts.218.w2", "model.layers.52.block_sparse_moe.experts.219.w2", "model.layers.52.block_sparse_moe.experts.220.w2", "model.layers.52.block_sparse_moe.experts.221.w2", "model.layers.52.block_sparse_moe.experts.222.w2", "model.layers.52.block_sparse_moe.experts.223.w2", "model.layers.52.block_sparse_moe.experts.224.w2", "model.layers.52.block_sparse_moe.experts.225.w2", "model.layers.52.block_sparse_moe.experts.226.w2", "model.layers.52.block_sparse_moe.experts.227.w2", "model.layers.52.block_sparse_moe.experts.228.w2", "model.layers.52.block_sparse_moe.experts.229.w2", "model.layers.52.block_sparse_moe.experts.230.w2", "model.layers.52.block_sparse_moe.experts.231.w2", "model.layers.52.block_sparse_moe.experts.232.w2", "model.layers.52.block_sparse_moe.experts.233.w2", "model.layers.52.block_sparse_moe.experts.234.w2", "model.layers.52.block_sparse_moe.experts.235.w2", "model.layers.52.block_sparse_moe.experts.236.w2", "model.layers.52.block_sparse_moe.experts.237.w2", "model.layers.52.block_sparse_moe.experts.238.w2", "model.layers.52.block_sparse_moe.experts.239.w2", "model.layers.52.block_sparse_moe.experts.240.w2", "model.layers.52.block_sparse_moe.experts.241.w2", "model.layers.52.block_sparse_moe.experts.242.w2", "model.layers.52.block_sparse_moe.experts.243.w2", "model.layers.52.block_sparse_moe.experts.244.w2", "model.layers.52.block_sparse_moe.experts.245.w2", "model.layers.52.block_sparse_moe.experts.246.w2", "model.layers.52.block_sparse_moe.experts.247.w2", "model.layers.52.block_sparse_moe.experts.248.w2", "model.layers.52.block_sparse_moe.experts.249.w2", "model.layers.52.block_sparse_moe.experts.250.w2", "model.layers.52.block_sparse_moe.experts.251.w2", "model.layers.52.block_sparse_moe.experts.252.w2", "model.layers.52.block_sparse_moe.experts.253.w2", "model.layers.52.block_sparse_moe.experts.254.w2", "model.layers.52.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 7.437467575072132e-05, "dbits": 1207959552 } ] }, { "idx": 265, "layers": [ "model.layers.53.self_attn.q_proj" ], "candidates": [ { "dkld": -0.00019901134073735394, "dbits": 18874368 } ] }, { "idx": 266, "layers": [ "model.layers.53.self_attn.k_proj", "model.layers.53.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0011227879673242347, "dbits": 6291456 } ] }, { "idx": 267, "layers": [ "model.layers.53.self_attn.o_proj" ], "candidates": [ { "dkld": -0.00047720856964589276, "dbits": 18874368 } ] }, { "idx": 268, "layers": [ "model.layers.53.block_sparse_moe.experts.0.w1", "model.layers.53.block_sparse_moe.experts.1.w1", "model.layers.53.block_sparse_moe.experts.2.w1", "model.layers.53.block_sparse_moe.experts.3.w1", "model.layers.53.block_sparse_moe.experts.4.w1", "model.layers.53.block_sparse_moe.experts.5.w1", "model.layers.53.block_sparse_moe.experts.6.w1", "model.layers.53.block_sparse_moe.experts.7.w1", "model.layers.53.block_sparse_moe.experts.8.w1", "model.layers.53.block_sparse_moe.experts.9.w1", "model.layers.53.block_sparse_moe.experts.10.w1", "model.layers.53.block_sparse_moe.experts.11.w1", "model.layers.53.block_sparse_moe.experts.12.w1", "model.layers.53.block_sparse_moe.experts.13.w1", "model.layers.53.block_sparse_moe.experts.14.w1", "model.layers.53.block_sparse_moe.experts.15.w1", "model.layers.53.block_sparse_moe.experts.16.w1", "model.layers.53.block_sparse_moe.experts.17.w1", "model.layers.53.block_sparse_moe.experts.18.w1", "model.layers.53.block_sparse_moe.experts.19.w1", "model.layers.53.block_sparse_moe.experts.20.w1", "model.layers.53.block_sparse_moe.experts.21.w1", "model.layers.53.block_sparse_moe.experts.22.w1", "model.layers.53.block_sparse_moe.experts.23.w1", "model.layers.53.block_sparse_moe.experts.24.w1", "model.layers.53.block_sparse_moe.experts.25.w1", "model.layers.53.block_sparse_moe.experts.26.w1", "model.layers.53.block_sparse_moe.experts.27.w1", "model.layers.53.block_sparse_moe.experts.28.w1", "model.layers.53.block_sparse_moe.experts.29.w1", "model.layers.53.block_sparse_moe.experts.30.w1", "model.layers.53.block_sparse_moe.experts.31.w1", "model.layers.53.block_sparse_moe.experts.32.w1", "model.layers.53.block_sparse_moe.experts.33.w1", "model.layers.53.block_sparse_moe.experts.34.w1", "model.layers.53.block_sparse_moe.experts.35.w1", "model.layers.53.block_sparse_moe.experts.36.w1", "model.layers.53.block_sparse_moe.experts.37.w1", "model.layers.53.block_sparse_moe.experts.38.w1", "model.layers.53.block_sparse_moe.experts.39.w1", "model.layers.53.block_sparse_moe.experts.40.w1", "model.layers.53.block_sparse_moe.experts.41.w1", "model.layers.53.block_sparse_moe.experts.42.w1", "model.layers.53.block_sparse_moe.experts.43.w1", "model.layers.53.block_sparse_moe.experts.44.w1", "model.layers.53.block_sparse_moe.experts.45.w1", "model.layers.53.block_sparse_moe.experts.46.w1", "model.layers.53.block_sparse_moe.experts.47.w1", "model.layers.53.block_sparse_moe.experts.48.w1", "model.layers.53.block_sparse_moe.experts.49.w1", "model.layers.53.block_sparse_moe.experts.50.w1", "model.layers.53.block_sparse_moe.experts.51.w1", "model.layers.53.block_sparse_moe.experts.52.w1", "model.layers.53.block_sparse_moe.experts.53.w1", "model.layers.53.block_sparse_moe.experts.54.w1", "model.layers.53.block_sparse_moe.experts.55.w1", "model.layers.53.block_sparse_moe.experts.56.w1", "model.layers.53.block_sparse_moe.experts.57.w1", "model.layers.53.block_sparse_moe.experts.58.w1", "model.layers.53.block_sparse_moe.experts.59.w1", "model.layers.53.block_sparse_moe.experts.60.w1", "model.layers.53.block_sparse_moe.experts.61.w1", "model.layers.53.block_sparse_moe.experts.62.w1", "model.layers.53.block_sparse_moe.experts.63.w1", "model.layers.53.block_sparse_moe.experts.64.w1", "model.layers.53.block_sparse_moe.experts.65.w1", "model.layers.53.block_sparse_moe.experts.66.w1", "model.layers.53.block_sparse_moe.experts.67.w1", "model.layers.53.block_sparse_moe.experts.68.w1", "model.layers.53.block_sparse_moe.experts.69.w1", "model.layers.53.block_sparse_moe.experts.70.w1", "model.layers.53.block_sparse_moe.experts.71.w1", "model.layers.53.block_sparse_moe.experts.72.w1", "model.layers.53.block_sparse_moe.experts.73.w1", "model.layers.53.block_sparse_moe.experts.74.w1", "model.layers.53.block_sparse_moe.experts.75.w1", "model.layers.53.block_sparse_moe.experts.76.w1", "model.layers.53.block_sparse_moe.experts.77.w1", "model.layers.53.block_sparse_moe.experts.78.w1", "model.layers.53.block_sparse_moe.experts.79.w1", "model.layers.53.block_sparse_moe.experts.80.w1", "model.layers.53.block_sparse_moe.experts.81.w1", "model.layers.53.block_sparse_moe.experts.82.w1", "model.layers.53.block_sparse_moe.experts.83.w1", "model.layers.53.block_sparse_moe.experts.84.w1", "model.layers.53.block_sparse_moe.experts.85.w1", "model.layers.53.block_sparse_moe.experts.86.w1", "model.layers.53.block_sparse_moe.experts.87.w1", "model.layers.53.block_sparse_moe.experts.88.w1", "model.layers.53.block_sparse_moe.experts.89.w1", "model.layers.53.block_sparse_moe.experts.90.w1", "model.layers.53.block_sparse_moe.experts.91.w1", "model.layers.53.block_sparse_moe.experts.92.w1", "model.layers.53.block_sparse_moe.experts.93.w1", "model.layers.53.block_sparse_moe.experts.94.w1", "model.layers.53.block_sparse_moe.experts.95.w1", "model.layers.53.block_sparse_moe.experts.96.w1", "model.layers.53.block_sparse_moe.experts.97.w1", "model.layers.53.block_sparse_moe.experts.98.w1", "model.layers.53.block_sparse_moe.experts.99.w1", "model.layers.53.block_sparse_moe.experts.100.w1", "model.layers.53.block_sparse_moe.experts.101.w1", "model.layers.53.block_sparse_moe.experts.102.w1", "model.layers.53.block_sparse_moe.experts.103.w1", "model.layers.53.block_sparse_moe.experts.104.w1", "model.layers.53.block_sparse_moe.experts.105.w1", "model.layers.53.block_sparse_moe.experts.106.w1", "model.layers.53.block_sparse_moe.experts.107.w1", "model.layers.53.block_sparse_moe.experts.108.w1", "model.layers.53.block_sparse_moe.experts.109.w1", "model.layers.53.block_sparse_moe.experts.110.w1", "model.layers.53.block_sparse_moe.experts.111.w1", "model.layers.53.block_sparse_moe.experts.112.w1", "model.layers.53.block_sparse_moe.experts.113.w1", "model.layers.53.block_sparse_moe.experts.114.w1", "model.layers.53.block_sparse_moe.experts.115.w1", "model.layers.53.block_sparse_moe.experts.116.w1", "model.layers.53.block_sparse_moe.experts.117.w1", "model.layers.53.block_sparse_moe.experts.118.w1", "model.layers.53.block_sparse_moe.experts.119.w1", "model.layers.53.block_sparse_moe.experts.120.w1", "model.layers.53.block_sparse_moe.experts.121.w1", "model.layers.53.block_sparse_moe.experts.122.w1", "model.layers.53.block_sparse_moe.experts.123.w1", "model.layers.53.block_sparse_moe.experts.124.w1", "model.layers.53.block_sparse_moe.experts.125.w1", "model.layers.53.block_sparse_moe.experts.126.w1", "model.layers.53.block_sparse_moe.experts.127.w1", "model.layers.53.block_sparse_moe.experts.128.w1", "model.layers.53.block_sparse_moe.experts.129.w1", "model.layers.53.block_sparse_moe.experts.130.w1", "model.layers.53.block_sparse_moe.experts.131.w1", "model.layers.53.block_sparse_moe.experts.132.w1", "model.layers.53.block_sparse_moe.experts.133.w1", "model.layers.53.block_sparse_moe.experts.134.w1", "model.layers.53.block_sparse_moe.experts.135.w1", "model.layers.53.block_sparse_moe.experts.136.w1", "model.layers.53.block_sparse_moe.experts.137.w1", "model.layers.53.block_sparse_moe.experts.138.w1", "model.layers.53.block_sparse_moe.experts.139.w1", "model.layers.53.block_sparse_moe.experts.140.w1", "model.layers.53.block_sparse_moe.experts.141.w1", "model.layers.53.block_sparse_moe.experts.142.w1", "model.layers.53.block_sparse_moe.experts.143.w1", "model.layers.53.block_sparse_moe.experts.144.w1", "model.layers.53.block_sparse_moe.experts.145.w1", "model.layers.53.block_sparse_moe.experts.146.w1", "model.layers.53.block_sparse_moe.experts.147.w1", "model.layers.53.block_sparse_moe.experts.148.w1", "model.layers.53.block_sparse_moe.experts.149.w1", "model.layers.53.block_sparse_moe.experts.150.w1", "model.layers.53.block_sparse_moe.experts.151.w1", "model.layers.53.block_sparse_moe.experts.152.w1", "model.layers.53.block_sparse_moe.experts.153.w1", "model.layers.53.block_sparse_moe.experts.154.w1", "model.layers.53.block_sparse_moe.experts.155.w1", "model.layers.53.block_sparse_moe.experts.156.w1", "model.layers.53.block_sparse_moe.experts.157.w1", "model.layers.53.block_sparse_moe.experts.158.w1", "model.layers.53.block_sparse_moe.experts.159.w1", "model.layers.53.block_sparse_moe.experts.160.w1", "model.layers.53.block_sparse_moe.experts.161.w1", "model.layers.53.block_sparse_moe.experts.162.w1", "model.layers.53.block_sparse_moe.experts.163.w1", "model.layers.53.block_sparse_moe.experts.164.w1", "model.layers.53.block_sparse_moe.experts.165.w1", "model.layers.53.block_sparse_moe.experts.166.w1", "model.layers.53.block_sparse_moe.experts.167.w1", "model.layers.53.block_sparse_moe.experts.168.w1", "model.layers.53.block_sparse_moe.experts.169.w1", "model.layers.53.block_sparse_moe.experts.170.w1", "model.layers.53.block_sparse_moe.experts.171.w1", "model.layers.53.block_sparse_moe.experts.172.w1", "model.layers.53.block_sparse_moe.experts.173.w1", "model.layers.53.block_sparse_moe.experts.174.w1", "model.layers.53.block_sparse_moe.experts.175.w1", "model.layers.53.block_sparse_moe.experts.176.w1", "model.layers.53.block_sparse_moe.experts.177.w1", "model.layers.53.block_sparse_moe.experts.178.w1", "model.layers.53.block_sparse_moe.experts.179.w1", "model.layers.53.block_sparse_moe.experts.180.w1", "model.layers.53.block_sparse_moe.experts.181.w1", "model.layers.53.block_sparse_moe.experts.182.w1", "model.layers.53.block_sparse_moe.experts.183.w1", "model.layers.53.block_sparse_moe.experts.184.w1", "model.layers.53.block_sparse_moe.experts.185.w1", "model.layers.53.block_sparse_moe.experts.186.w1", "model.layers.53.block_sparse_moe.experts.187.w1", "model.layers.53.block_sparse_moe.experts.188.w1", "model.layers.53.block_sparse_moe.experts.189.w1", "model.layers.53.block_sparse_moe.experts.190.w1", "model.layers.53.block_sparse_moe.experts.191.w1", "model.layers.53.block_sparse_moe.experts.192.w1", "model.layers.53.block_sparse_moe.experts.193.w1", "model.layers.53.block_sparse_moe.experts.194.w1", "model.layers.53.block_sparse_moe.experts.195.w1", "model.layers.53.block_sparse_moe.experts.196.w1", "model.layers.53.block_sparse_moe.experts.197.w1", "model.layers.53.block_sparse_moe.experts.198.w1", "model.layers.53.block_sparse_moe.experts.199.w1", "model.layers.53.block_sparse_moe.experts.200.w1", "model.layers.53.block_sparse_moe.experts.201.w1", "model.layers.53.block_sparse_moe.experts.202.w1", "model.layers.53.block_sparse_moe.experts.203.w1", "model.layers.53.block_sparse_moe.experts.204.w1", "model.layers.53.block_sparse_moe.experts.205.w1", "model.layers.53.block_sparse_moe.experts.206.w1", "model.layers.53.block_sparse_moe.experts.207.w1", "model.layers.53.block_sparse_moe.experts.208.w1", "model.layers.53.block_sparse_moe.experts.209.w1", "model.layers.53.block_sparse_moe.experts.210.w1", "model.layers.53.block_sparse_moe.experts.211.w1", "model.layers.53.block_sparse_moe.experts.212.w1", "model.layers.53.block_sparse_moe.experts.213.w1", "model.layers.53.block_sparse_moe.experts.214.w1", "model.layers.53.block_sparse_moe.experts.215.w1", "model.layers.53.block_sparse_moe.experts.216.w1", "model.layers.53.block_sparse_moe.experts.217.w1", "model.layers.53.block_sparse_moe.experts.218.w1", "model.layers.53.block_sparse_moe.experts.219.w1", "model.layers.53.block_sparse_moe.experts.220.w1", "model.layers.53.block_sparse_moe.experts.221.w1", "model.layers.53.block_sparse_moe.experts.222.w1", "model.layers.53.block_sparse_moe.experts.223.w1", "model.layers.53.block_sparse_moe.experts.224.w1", "model.layers.53.block_sparse_moe.experts.225.w1", "model.layers.53.block_sparse_moe.experts.226.w1", "model.layers.53.block_sparse_moe.experts.227.w1", "model.layers.53.block_sparse_moe.experts.228.w1", "model.layers.53.block_sparse_moe.experts.229.w1", "model.layers.53.block_sparse_moe.experts.230.w1", "model.layers.53.block_sparse_moe.experts.231.w1", "model.layers.53.block_sparse_moe.experts.232.w1", "model.layers.53.block_sparse_moe.experts.233.w1", "model.layers.53.block_sparse_moe.experts.234.w1", "model.layers.53.block_sparse_moe.experts.235.w1", "model.layers.53.block_sparse_moe.experts.236.w1", "model.layers.53.block_sparse_moe.experts.237.w1", "model.layers.53.block_sparse_moe.experts.238.w1", "model.layers.53.block_sparse_moe.experts.239.w1", "model.layers.53.block_sparse_moe.experts.240.w1", "model.layers.53.block_sparse_moe.experts.241.w1", "model.layers.53.block_sparse_moe.experts.242.w1", "model.layers.53.block_sparse_moe.experts.243.w1", "model.layers.53.block_sparse_moe.experts.244.w1", "model.layers.53.block_sparse_moe.experts.245.w1", "model.layers.53.block_sparse_moe.experts.246.w1", "model.layers.53.block_sparse_moe.experts.247.w1", "model.layers.53.block_sparse_moe.experts.248.w1", "model.layers.53.block_sparse_moe.experts.249.w1", "model.layers.53.block_sparse_moe.experts.250.w1", "model.layers.53.block_sparse_moe.experts.251.w1", "model.layers.53.block_sparse_moe.experts.252.w1", "model.layers.53.block_sparse_moe.experts.253.w1", "model.layers.53.block_sparse_moe.experts.254.w1", "model.layers.53.block_sparse_moe.experts.255.w1", "model.layers.53.block_sparse_moe.experts.0.w3", "model.layers.53.block_sparse_moe.experts.1.w3", "model.layers.53.block_sparse_moe.experts.2.w3", "model.layers.53.block_sparse_moe.experts.3.w3", "model.layers.53.block_sparse_moe.experts.4.w3", "model.layers.53.block_sparse_moe.experts.5.w3", "model.layers.53.block_sparse_moe.experts.6.w3", "model.layers.53.block_sparse_moe.experts.7.w3", "model.layers.53.block_sparse_moe.experts.8.w3", "model.layers.53.block_sparse_moe.experts.9.w3", "model.layers.53.block_sparse_moe.experts.10.w3", "model.layers.53.block_sparse_moe.experts.11.w3", "model.layers.53.block_sparse_moe.experts.12.w3", "model.layers.53.block_sparse_moe.experts.13.w3", "model.layers.53.block_sparse_moe.experts.14.w3", "model.layers.53.block_sparse_moe.experts.15.w3", "model.layers.53.block_sparse_moe.experts.16.w3", "model.layers.53.block_sparse_moe.experts.17.w3", "model.layers.53.block_sparse_moe.experts.18.w3", "model.layers.53.block_sparse_moe.experts.19.w3", "model.layers.53.block_sparse_moe.experts.20.w3", "model.layers.53.block_sparse_moe.experts.21.w3", "model.layers.53.block_sparse_moe.experts.22.w3", "model.layers.53.block_sparse_moe.experts.23.w3", "model.layers.53.block_sparse_moe.experts.24.w3", "model.layers.53.block_sparse_moe.experts.25.w3", "model.layers.53.block_sparse_moe.experts.26.w3", "model.layers.53.block_sparse_moe.experts.27.w3", "model.layers.53.block_sparse_moe.experts.28.w3", "model.layers.53.block_sparse_moe.experts.29.w3", "model.layers.53.block_sparse_moe.experts.30.w3", "model.layers.53.block_sparse_moe.experts.31.w3", "model.layers.53.block_sparse_moe.experts.32.w3", "model.layers.53.block_sparse_moe.experts.33.w3", "model.layers.53.block_sparse_moe.experts.34.w3", "model.layers.53.block_sparse_moe.experts.35.w3", "model.layers.53.block_sparse_moe.experts.36.w3", "model.layers.53.block_sparse_moe.experts.37.w3", "model.layers.53.block_sparse_moe.experts.38.w3", "model.layers.53.block_sparse_moe.experts.39.w3", "model.layers.53.block_sparse_moe.experts.40.w3", "model.layers.53.block_sparse_moe.experts.41.w3", "model.layers.53.block_sparse_moe.experts.42.w3", "model.layers.53.block_sparse_moe.experts.43.w3", "model.layers.53.block_sparse_moe.experts.44.w3", "model.layers.53.block_sparse_moe.experts.45.w3", "model.layers.53.block_sparse_moe.experts.46.w3", "model.layers.53.block_sparse_moe.experts.47.w3", "model.layers.53.block_sparse_moe.experts.48.w3", "model.layers.53.block_sparse_moe.experts.49.w3", "model.layers.53.block_sparse_moe.experts.50.w3", "model.layers.53.block_sparse_moe.experts.51.w3", "model.layers.53.block_sparse_moe.experts.52.w3", "model.layers.53.block_sparse_moe.experts.53.w3", "model.layers.53.block_sparse_moe.experts.54.w3", "model.layers.53.block_sparse_moe.experts.55.w3", "model.layers.53.block_sparse_moe.experts.56.w3", "model.layers.53.block_sparse_moe.experts.57.w3", "model.layers.53.block_sparse_moe.experts.58.w3", "model.layers.53.block_sparse_moe.experts.59.w3", "model.layers.53.block_sparse_moe.experts.60.w3", "model.layers.53.block_sparse_moe.experts.61.w3", "model.layers.53.block_sparse_moe.experts.62.w3", "model.layers.53.block_sparse_moe.experts.63.w3", "model.layers.53.block_sparse_moe.experts.64.w3", "model.layers.53.block_sparse_moe.experts.65.w3", "model.layers.53.block_sparse_moe.experts.66.w3", "model.layers.53.block_sparse_moe.experts.67.w3", "model.layers.53.block_sparse_moe.experts.68.w3", "model.layers.53.block_sparse_moe.experts.69.w3", "model.layers.53.block_sparse_moe.experts.70.w3", "model.layers.53.block_sparse_moe.experts.71.w3", "model.layers.53.block_sparse_moe.experts.72.w3", "model.layers.53.block_sparse_moe.experts.73.w3", "model.layers.53.block_sparse_moe.experts.74.w3", "model.layers.53.block_sparse_moe.experts.75.w3", "model.layers.53.block_sparse_moe.experts.76.w3", "model.layers.53.block_sparse_moe.experts.77.w3", "model.layers.53.block_sparse_moe.experts.78.w3", "model.layers.53.block_sparse_moe.experts.79.w3", "model.layers.53.block_sparse_moe.experts.80.w3", "model.layers.53.block_sparse_moe.experts.81.w3", "model.layers.53.block_sparse_moe.experts.82.w3", "model.layers.53.block_sparse_moe.experts.83.w3", "model.layers.53.block_sparse_moe.experts.84.w3", "model.layers.53.block_sparse_moe.experts.85.w3", "model.layers.53.block_sparse_moe.experts.86.w3", "model.layers.53.block_sparse_moe.experts.87.w3", "model.layers.53.block_sparse_moe.experts.88.w3", "model.layers.53.block_sparse_moe.experts.89.w3", "model.layers.53.block_sparse_moe.experts.90.w3", "model.layers.53.block_sparse_moe.experts.91.w3", "model.layers.53.block_sparse_moe.experts.92.w3", "model.layers.53.block_sparse_moe.experts.93.w3", "model.layers.53.block_sparse_moe.experts.94.w3", "model.layers.53.block_sparse_moe.experts.95.w3", "model.layers.53.block_sparse_moe.experts.96.w3", "model.layers.53.block_sparse_moe.experts.97.w3", "model.layers.53.block_sparse_moe.experts.98.w3", "model.layers.53.block_sparse_moe.experts.99.w3", "model.layers.53.block_sparse_moe.experts.100.w3", "model.layers.53.block_sparse_moe.experts.101.w3", "model.layers.53.block_sparse_moe.experts.102.w3", "model.layers.53.block_sparse_moe.experts.103.w3", "model.layers.53.block_sparse_moe.experts.104.w3", "model.layers.53.block_sparse_moe.experts.105.w3", "model.layers.53.block_sparse_moe.experts.106.w3", "model.layers.53.block_sparse_moe.experts.107.w3", "model.layers.53.block_sparse_moe.experts.108.w3", "model.layers.53.block_sparse_moe.experts.109.w3", "model.layers.53.block_sparse_moe.experts.110.w3", "model.layers.53.block_sparse_moe.experts.111.w3", "model.layers.53.block_sparse_moe.experts.112.w3", "model.layers.53.block_sparse_moe.experts.113.w3", "model.layers.53.block_sparse_moe.experts.114.w3", "model.layers.53.block_sparse_moe.experts.115.w3", "model.layers.53.block_sparse_moe.experts.116.w3", "model.layers.53.block_sparse_moe.experts.117.w3", "model.layers.53.block_sparse_moe.experts.118.w3", "model.layers.53.block_sparse_moe.experts.119.w3", "model.layers.53.block_sparse_moe.experts.120.w3", "model.layers.53.block_sparse_moe.experts.121.w3", "model.layers.53.block_sparse_moe.experts.122.w3", "model.layers.53.block_sparse_moe.experts.123.w3", "model.layers.53.block_sparse_moe.experts.124.w3", "model.layers.53.block_sparse_moe.experts.125.w3", "model.layers.53.block_sparse_moe.experts.126.w3", "model.layers.53.block_sparse_moe.experts.127.w3", "model.layers.53.block_sparse_moe.experts.128.w3", "model.layers.53.block_sparse_moe.experts.129.w3", "model.layers.53.block_sparse_moe.experts.130.w3", "model.layers.53.block_sparse_moe.experts.131.w3", "model.layers.53.block_sparse_moe.experts.132.w3", "model.layers.53.block_sparse_moe.experts.133.w3", "model.layers.53.block_sparse_moe.experts.134.w3", "model.layers.53.block_sparse_moe.experts.135.w3", "model.layers.53.block_sparse_moe.experts.136.w3", "model.layers.53.block_sparse_moe.experts.137.w3", "model.layers.53.block_sparse_moe.experts.138.w3", "model.layers.53.block_sparse_moe.experts.139.w3", "model.layers.53.block_sparse_moe.experts.140.w3", "model.layers.53.block_sparse_moe.experts.141.w3", "model.layers.53.block_sparse_moe.experts.142.w3", "model.layers.53.block_sparse_moe.experts.143.w3", "model.layers.53.block_sparse_moe.experts.144.w3", "model.layers.53.block_sparse_moe.experts.145.w3", "model.layers.53.block_sparse_moe.experts.146.w3", "model.layers.53.block_sparse_moe.experts.147.w3", "model.layers.53.block_sparse_moe.experts.148.w3", "model.layers.53.block_sparse_moe.experts.149.w3", "model.layers.53.block_sparse_moe.experts.150.w3", "model.layers.53.block_sparse_moe.experts.151.w3", "model.layers.53.block_sparse_moe.experts.152.w3", "model.layers.53.block_sparse_moe.experts.153.w3", "model.layers.53.block_sparse_moe.experts.154.w3", "model.layers.53.block_sparse_moe.experts.155.w3", "model.layers.53.block_sparse_moe.experts.156.w3", "model.layers.53.block_sparse_moe.experts.157.w3", "model.layers.53.block_sparse_moe.experts.158.w3", "model.layers.53.block_sparse_moe.experts.159.w3", "model.layers.53.block_sparse_moe.experts.160.w3", "model.layers.53.block_sparse_moe.experts.161.w3", "model.layers.53.block_sparse_moe.experts.162.w3", "model.layers.53.block_sparse_moe.experts.163.w3", "model.layers.53.block_sparse_moe.experts.164.w3", "model.layers.53.block_sparse_moe.experts.165.w3", "model.layers.53.block_sparse_moe.experts.166.w3", "model.layers.53.block_sparse_moe.experts.167.w3", "model.layers.53.block_sparse_moe.experts.168.w3", "model.layers.53.block_sparse_moe.experts.169.w3", "model.layers.53.block_sparse_moe.experts.170.w3", "model.layers.53.block_sparse_moe.experts.171.w3", "model.layers.53.block_sparse_moe.experts.172.w3", "model.layers.53.block_sparse_moe.experts.173.w3", "model.layers.53.block_sparse_moe.experts.174.w3", "model.layers.53.block_sparse_moe.experts.175.w3", "model.layers.53.block_sparse_moe.experts.176.w3", "model.layers.53.block_sparse_moe.experts.177.w3", "model.layers.53.block_sparse_moe.experts.178.w3", "model.layers.53.block_sparse_moe.experts.179.w3", "model.layers.53.block_sparse_moe.experts.180.w3", "model.layers.53.block_sparse_moe.experts.181.w3", "model.layers.53.block_sparse_moe.experts.182.w3", "model.layers.53.block_sparse_moe.experts.183.w3", "model.layers.53.block_sparse_moe.experts.184.w3", "model.layers.53.block_sparse_moe.experts.185.w3", "model.layers.53.block_sparse_moe.experts.186.w3", "model.layers.53.block_sparse_moe.experts.187.w3", "model.layers.53.block_sparse_moe.experts.188.w3", "model.layers.53.block_sparse_moe.experts.189.w3", "model.layers.53.block_sparse_moe.experts.190.w3", "model.layers.53.block_sparse_moe.experts.191.w3", "model.layers.53.block_sparse_moe.experts.192.w3", "model.layers.53.block_sparse_moe.experts.193.w3", "model.layers.53.block_sparse_moe.experts.194.w3", "model.layers.53.block_sparse_moe.experts.195.w3", "model.layers.53.block_sparse_moe.experts.196.w3", "model.layers.53.block_sparse_moe.experts.197.w3", "model.layers.53.block_sparse_moe.experts.198.w3", "model.layers.53.block_sparse_moe.experts.199.w3", "model.layers.53.block_sparse_moe.experts.200.w3", "model.layers.53.block_sparse_moe.experts.201.w3", "model.layers.53.block_sparse_moe.experts.202.w3", "model.layers.53.block_sparse_moe.experts.203.w3", "model.layers.53.block_sparse_moe.experts.204.w3", "model.layers.53.block_sparse_moe.experts.205.w3", "model.layers.53.block_sparse_moe.experts.206.w3", "model.layers.53.block_sparse_moe.experts.207.w3", "model.layers.53.block_sparse_moe.experts.208.w3", "model.layers.53.block_sparse_moe.experts.209.w3", "model.layers.53.block_sparse_moe.experts.210.w3", "model.layers.53.block_sparse_moe.experts.211.w3", "model.layers.53.block_sparse_moe.experts.212.w3", "model.layers.53.block_sparse_moe.experts.213.w3", "model.layers.53.block_sparse_moe.experts.214.w3", "model.layers.53.block_sparse_moe.experts.215.w3", "model.layers.53.block_sparse_moe.experts.216.w3", "model.layers.53.block_sparse_moe.experts.217.w3", "model.layers.53.block_sparse_moe.experts.218.w3", "model.layers.53.block_sparse_moe.experts.219.w3", "model.layers.53.block_sparse_moe.experts.220.w3", "model.layers.53.block_sparse_moe.experts.221.w3", "model.layers.53.block_sparse_moe.experts.222.w3", "model.layers.53.block_sparse_moe.experts.223.w3", "model.layers.53.block_sparse_moe.experts.224.w3", "model.layers.53.block_sparse_moe.experts.225.w3", "model.layers.53.block_sparse_moe.experts.226.w3", "model.layers.53.block_sparse_moe.experts.227.w3", "model.layers.53.block_sparse_moe.experts.228.w3", "model.layers.53.block_sparse_moe.experts.229.w3", "model.layers.53.block_sparse_moe.experts.230.w3", "model.layers.53.block_sparse_moe.experts.231.w3", "model.layers.53.block_sparse_moe.experts.232.w3", "model.layers.53.block_sparse_moe.experts.233.w3", "model.layers.53.block_sparse_moe.experts.234.w3", "model.layers.53.block_sparse_moe.experts.235.w3", "model.layers.53.block_sparse_moe.experts.236.w3", "model.layers.53.block_sparse_moe.experts.237.w3", "model.layers.53.block_sparse_moe.experts.238.w3", "model.layers.53.block_sparse_moe.experts.239.w3", "model.layers.53.block_sparse_moe.experts.240.w3", "model.layers.53.block_sparse_moe.experts.241.w3", "model.layers.53.block_sparse_moe.experts.242.w3", "model.layers.53.block_sparse_moe.experts.243.w3", "model.layers.53.block_sparse_moe.experts.244.w3", "model.layers.53.block_sparse_moe.experts.245.w3", "model.layers.53.block_sparse_moe.experts.246.w3", "model.layers.53.block_sparse_moe.experts.247.w3", "model.layers.53.block_sparse_moe.experts.248.w3", "model.layers.53.block_sparse_moe.experts.249.w3", "model.layers.53.block_sparse_moe.experts.250.w3", "model.layers.53.block_sparse_moe.experts.251.w3", "model.layers.53.block_sparse_moe.experts.252.w3", "model.layers.53.block_sparse_moe.experts.253.w3", "model.layers.53.block_sparse_moe.experts.254.w3", "model.layers.53.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0001777712255716657, "dbits": 2415919104 } ] }, { "idx": 269, "layers": [ "model.layers.53.block_sparse_moe.experts.0.w2", "model.layers.53.block_sparse_moe.experts.1.w2", "model.layers.53.block_sparse_moe.experts.2.w2", "model.layers.53.block_sparse_moe.experts.3.w2", "model.layers.53.block_sparse_moe.experts.4.w2", "model.layers.53.block_sparse_moe.experts.5.w2", "model.layers.53.block_sparse_moe.experts.6.w2", "model.layers.53.block_sparse_moe.experts.7.w2", "model.layers.53.block_sparse_moe.experts.8.w2", "model.layers.53.block_sparse_moe.experts.9.w2", "model.layers.53.block_sparse_moe.experts.10.w2", "model.layers.53.block_sparse_moe.experts.11.w2", "model.layers.53.block_sparse_moe.experts.12.w2", "model.layers.53.block_sparse_moe.experts.13.w2", "model.layers.53.block_sparse_moe.experts.14.w2", "model.layers.53.block_sparse_moe.experts.15.w2", "model.layers.53.block_sparse_moe.experts.16.w2", "model.layers.53.block_sparse_moe.experts.17.w2", "model.layers.53.block_sparse_moe.experts.18.w2", "model.layers.53.block_sparse_moe.experts.19.w2", "model.layers.53.block_sparse_moe.experts.20.w2", "model.layers.53.block_sparse_moe.experts.21.w2", "model.layers.53.block_sparse_moe.experts.22.w2", "model.layers.53.block_sparse_moe.experts.23.w2", "model.layers.53.block_sparse_moe.experts.24.w2", "model.layers.53.block_sparse_moe.experts.25.w2", "model.layers.53.block_sparse_moe.experts.26.w2", "model.layers.53.block_sparse_moe.experts.27.w2", "model.layers.53.block_sparse_moe.experts.28.w2", "model.layers.53.block_sparse_moe.experts.29.w2", "model.layers.53.block_sparse_moe.experts.30.w2", "model.layers.53.block_sparse_moe.experts.31.w2", "model.layers.53.block_sparse_moe.experts.32.w2", "model.layers.53.block_sparse_moe.experts.33.w2", "model.layers.53.block_sparse_moe.experts.34.w2", "model.layers.53.block_sparse_moe.experts.35.w2", "model.layers.53.block_sparse_moe.experts.36.w2", "model.layers.53.block_sparse_moe.experts.37.w2", "model.layers.53.block_sparse_moe.experts.38.w2", "model.layers.53.block_sparse_moe.experts.39.w2", "model.layers.53.block_sparse_moe.experts.40.w2", "model.layers.53.block_sparse_moe.experts.41.w2", "model.layers.53.block_sparse_moe.experts.42.w2", "model.layers.53.block_sparse_moe.experts.43.w2", "model.layers.53.block_sparse_moe.experts.44.w2", "model.layers.53.block_sparse_moe.experts.45.w2", "model.layers.53.block_sparse_moe.experts.46.w2", "model.layers.53.block_sparse_moe.experts.47.w2", "model.layers.53.block_sparse_moe.experts.48.w2", "model.layers.53.block_sparse_moe.experts.49.w2", "model.layers.53.block_sparse_moe.experts.50.w2", "model.layers.53.block_sparse_moe.experts.51.w2", "model.layers.53.block_sparse_moe.experts.52.w2", "model.layers.53.block_sparse_moe.experts.53.w2", "model.layers.53.block_sparse_moe.experts.54.w2", "model.layers.53.block_sparse_moe.experts.55.w2", "model.layers.53.block_sparse_moe.experts.56.w2", "model.layers.53.block_sparse_moe.experts.57.w2", "model.layers.53.block_sparse_moe.experts.58.w2", "model.layers.53.block_sparse_moe.experts.59.w2", "model.layers.53.block_sparse_moe.experts.60.w2", "model.layers.53.block_sparse_moe.experts.61.w2", "model.layers.53.block_sparse_moe.experts.62.w2", "model.layers.53.block_sparse_moe.experts.63.w2", "model.layers.53.block_sparse_moe.experts.64.w2", "model.layers.53.block_sparse_moe.experts.65.w2", "model.layers.53.block_sparse_moe.experts.66.w2", "model.layers.53.block_sparse_moe.experts.67.w2", "model.layers.53.block_sparse_moe.experts.68.w2", "model.layers.53.block_sparse_moe.experts.69.w2", "model.layers.53.block_sparse_moe.experts.70.w2", "model.layers.53.block_sparse_moe.experts.71.w2", "model.layers.53.block_sparse_moe.experts.72.w2", "model.layers.53.block_sparse_moe.experts.73.w2", "model.layers.53.block_sparse_moe.experts.74.w2", "model.layers.53.block_sparse_moe.experts.75.w2", "model.layers.53.block_sparse_moe.experts.76.w2", "model.layers.53.block_sparse_moe.experts.77.w2", "model.layers.53.block_sparse_moe.experts.78.w2", "model.layers.53.block_sparse_moe.experts.79.w2", "model.layers.53.block_sparse_moe.experts.80.w2", "model.layers.53.block_sparse_moe.experts.81.w2", "model.layers.53.block_sparse_moe.experts.82.w2", "model.layers.53.block_sparse_moe.experts.83.w2", "model.layers.53.block_sparse_moe.experts.84.w2", "model.layers.53.block_sparse_moe.experts.85.w2", "model.layers.53.block_sparse_moe.experts.86.w2", "model.layers.53.block_sparse_moe.experts.87.w2", "model.layers.53.block_sparse_moe.experts.88.w2", "model.layers.53.block_sparse_moe.experts.89.w2", "model.layers.53.block_sparse_moe.experts.90.w2", "model.layers.53.block_sparse_moe.experts.91.w2", "model.layers.53.block_sparse_moe.experts.92.w2", "model.layers.53.block_sparse_moe.experts.93.w2", "model.layers.53.block_sparse_moe.experts.94.w2", "model.layers.53.block_sparse_moe.experts.95.w2", "model.layers.53.block_sparse_moe.experts.96.w2", "model.layers.53.block_sparse_moe.experts.97.w2", "model.layers.53.block_sparse_moe.experts.98.w2", "model.layers.53.block_sparse_moe.experts.99.w2", "model.layers.53.block_sparse_moe.experts.100.w2", "model.layers.53.block_sparse_moe.experts.101.w2", "model.layers.53.block_sparse_moe.experts.102.w2", "model.layers.53.block_sparse_moe.experts.103.w2", "model.layers.53.block_sparse_moe.experts.104.w2", "model.layers.53.block_sparse_moe.experts.105.w2", "model.layers.53.block_sparse_moe.experts.106.w2", "model.layers.53.block_sparse_moe.experts.107.w2", "model.layers.53.block_sparse_moe.experts.108.w2", "model.layers.53.block_sparse_moe.experts.109.w2", "model.layers.53.block_sparse_moe.experts.110.w2", "model.layers.53.block_sparse_moe.experts.111.w2", "model.layers.53.block_sparse_moe.experts.112.w2", "model.layers.53.block_sparse_moe.experts.113.w2", "model.layers.53.block_sparse_moe.experts.114.w2", "model.layers.53.block_sparse_moe.experts.115.w2", "model.layers.53.block_sparse_moe.experts.116.w2", "model.layers.53.block_sparse_moe.experts.117.w2", "model.layers.53.block_sparse_moe.experts.118.w2", "model.layers.53.block_sparse_moe.experts.119.w2", "model.layers.53.block_sparse_moe.experts.120.w2", "model.layers.53.block_sparse_moe.experts.121.w2", "model.layers.53.block_sparse_moe.experts.122.w2", "model.layers.53.block_sparse_moe.experts.123.w2", "model.layers.53.block_sparse_moe.experts.124.w2", "model.layers.53.block_sparse_moe.experts.125.w2", "model.layers.53.block_sparse_moe.experts.126.w2", "model.layers.53.block_sparse_moe.experts.127.w2", "model.layers.53.block_sparse_moe.experts.128.w2", "model.layers.53.block_sparse_moe.experts.129.w2", "model.layers.53.block_sparse_moe.experts.130.w2", "model.layers.53.block_sparse_moe.experts.131.w2", "model.layers.53.block_sparse_moe.experts.132.w2", "model.layers.53.block_sparse_moe.experts.133.w2", "model.layers.53.block_sparse_moe.experts.134.w2", "model.layers.53.block_sparse_moe.experts.135.w2", "model.layers.53.block_sparse_moe.experts.136.w2", "model.layers.53.block_sparse_moe.experts.137.w2", "model.layers.53.block_sparse_moe.experts.138.w2", "model.layers.53.block_sparse_moe.experts.139.w2", "model.layers.53.block_sparse_moe.experts.140.w2", "model.layers.53.block_sparse_moe.experts.141.w2", "model.layers.53.block_sparse_moe.experts.142.w2", "model.layers.53.block_sparse_moe.experts.143.w2", "model.layers.53.block_sparse_moe.experts.144.w2", "model.layers.53.block_sparse_moe.experts.145.w2", "model.layers.53.block_sparse_moe.experts.146.w2", "model.layers.53.block_sparse_moe.experts.147.w2", "model.layers.53.block_sparse_moe.experts.148.w2", "model.layers.53.block_sparse_moe.experts.149.w2", "model.layers.53.block_sparse_moe.experts.150.w2", "model.layers.53.block_sparse_moe.experts.151.w2", "model.layers.53.block_sparse_moe.experts.152.w2", "model.layers.53.block_sparse_moe.experts.153.w2", "model.layers.53.block_sparse_moe.experts.154.w2", "model.layers.53.block_sparse_moe.experts.155.w2", "model.layers.53.block_sparse_moe.experts.156.w2", "model.layers.53.block_sparse_moe.experts.157.w2", "model.layers.53.block_sparse_moe.experts.158.w2", "model.layers.53.block_sparse_moe.experts.159.w2", "model.layers.53.block_sparse_moe.experts.160.w2", "model.layers.53.block_sparse_moe.experts.161.w2", "model.layers.53.block_sparse_moe.experts.162.w2", "model.layers.53.block_sparse_moe.experts.163.w2", "model.layers.53.block_sparse_moe.experts.164.w2", "model.layers.53.block_sparse_moe.experts.165.w2", "model.layers.53.block_sparse_moe.experts.166.w2", "model.layers.53.block_sparse_moe.experts.167.w2", "model.layers.53.block_sparse_moe.experts.168.w2", "model.layers.53.block_sparse_moe.experts.169.w2", "model.layers.53.block_sparse_moe.experts.170.w2", "model.layers.53.block_sparse_moe.experts.171.w2", "model.layers.53.block_sparse_moe.experts.172.w2", "model.layers.53.block_sparse_moe.experts.173.w2", "model.layers.53.block_sparse_moe.experts.174.w2", "model.layers.53.block_sparse_moe.experts.175.w2", "model.layers.53.block_sparse_moe.experts.176.w2", "model.layers.53.block_sparse_moe.experts.177.w2", "model.layers.53.block_sparse_moe.experts.178.w2", "model.layers.53.block_sparse_moe.experts.179.w2", "model.layers.53.block_sparse_moe.experts.180.w2", "model.layers.53.block_sparse_moe.experts.181.w2", "model.layers.53.block_sparse_moe.experts.182.w2", "model.layers.53.block_sparse_moe.experts.183.w2", "model.layers.53.block_sparse_moe.experts.184.w2", "model.layers.53.block_sparse_moe.experts.185.w2", "model.layers.53.block_sparse_moe.experts.186.w2", "model.layers.53.block_sparse_moe.experts.187.w2", "model.layers.53.block_sparse_moe.experts.188.w2", "model.layers.53.block_sparse_moe.experts.189.w2", "model.layers.53.block_sparse_moe.experts.190.w2", "model.layers.53.block_sparse_moe.experts.191.w2", "model.layers.53.block_sparse_moe.experts.192.w2", "model.layers.53.block_sparse_moe.experts.193.w2", "model.layers.53.block_sparse_moe.experts.194.w2", "model.layers.53.block_sparse_moe.experts.195.w2", "model.layers.53.block_sparse_moe.experts.196.w2", "model.layers.53.block_sparse_moe.experts.197.w2", "model.layers.53.block_sparse_moe.experts.198.w2", "model.layers.53.block_sparse_moe.experts.199.w2", "model.layers.53.block_sparse_moe.experts.200.w2", "model.layers.53.block_sparse_moe.experts.201.w2", "model.layers.53.block_sparse_moe.experts.202.w2", "model.layers.53.block_sparse_moe.experts.203.w2", "model.layers.53.block_sparse_moe.experts.204.w2", "model.layers.53.block_sparse_moe.experts.205.w2", "model.layers.53.block_sparse_moe.experts.206.w2", "model.layers.53.block_sparse_moe.experts.207.w2", "model.layers.53.block_sparse_moe.experts.208.w2", "model.layers.53.block_sparse_moe.experts.209.w2", "model.layers.53.block_sparse_moe.experts.210.w2", "model.layers.53.block_sparse_moe.experts.211.w2", "model.layers.53.block_sparse_moe.experts.212.w2", "model.layers.53.block_sparse_moe.experts.213.w2", "model.layers.53.block_sparse_moe.experts.214.w2", "model.layers.53.block_sparse_moe.experts.215.w2", "model.layers.53.block_sparse_moe.experts.216.w2", "model.layers.53.block_sparse_moe.experts.217.w2", "model.layers.53.block_sparse_moe.experts.218.w2", "model.layers.53.block_sparse_moe.experts.219.w2", "model.layers.53.block_sparse_moe.experts.220.w2", "model.layers.53.block_sparse_moe.experts.221.w2", "model.layers.53.block_sparse_moe.experts.222.w2", "model.layers.53.block_sparse_moe.experts.223.w2", "model.layers.53.block_sparse_moe.experts.224.w2", "model.layers.53.block_sparse_moe.experts.225.w2", "model.layers.53.block_sparse_moe.experts.226.w2", "model.layers.53.block_sparse_moe.experts.227.w2", "model.layers.53.block_sparse_moe.experts.228.w2", "model.layers.53.block_sparse_moe.experts.229.w2", "model.layers.53.block_sparse_moe.experts.230.w2", "model.layers.53.block_sparse_moe.experts.231.w2", "model.layers.53.block_sparse_moe.experts.232.w2", "model.layers.53.block_sparse_moe.experts.233.w2", "model.layers.53.block_sparse_moe.experts.234.w2", "model.layers.53.block_sparse_moe.experts.235.w2", "model.layers.53.block_sparse_moe.experts.236.w2", "model.layers.53.block_sparse_moe.experts.237.w2", "model.layers.53.block_sparse_moe.experts.238.w2", "model.layers.53.block_sparse_moe.experts.239.w2", "model.layers.53.block_sparse_moe.experts.240.w2", "model.layers.53.block_sparse_moe.experts.241.w2", "model.layers.53.block_sparse_moe.experts.242.w2", "model.layers.53.block_sparse_moe.experts.243.w2", "model.layers.53.block_sparse_moe.experts.244.w2", "model.layers.53.block_sparse_moe.experts.245.w2", "model.layers.53.block_sparse_moe.experts.246.w2", "model.layers.53.block_sparse_moe.experts.247.w2", "model.layers.53.block_sparse_moe.experts.248.w2", "model.layers.53.block_sparse_moe.experts.249.w2", "model.layers.53.block_sparse_moe.experts.250.w2", "model.layers.53.block_sparse_moe.experts.251.w2", "model.layers.53.block_sparse_moe.experts.252.w2", "model.layers.53.block_sparse_moe.experts.253.w2", "model.layers.53.block_sparse_moe.experts.254.w2", "model.layers.53.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 7.69779086112754e-05, "dbits": 1207959552 } ] }, { "idx": 270, "layers": [ "model.layers.54.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0003921419382095226, "dbits": 18874368 } ] }, { "idx": 271, "layers": [ "model.layers.54.self_attn.k_proj", "model.layers.54.self_attn.v_proj" ], "candidates": [ { "dkld": 0.004717573896050431, "dbits": 6291456 } ] }, { "idx": 272, "layers": [ "model.layers.54.self_attn.o_proj" ], "candidates": [ { "dkld": -3.550872206686817e-05, "dbits": 18874368 } ] }, { "idx": 273, "layers": [ "model.layers.54.block_sparse_moe.experts.0.w1", "model.layers.54.block_sparse_moe.experts.1.w1", "model.layers.54.block_sparse_moe.experts.2.w1", "model.layers.54.block_sparse_moe.experts.3.w1", "model.layers.54.block_sparse_moe.experts.4.w1", "model.layers.54.block_sparse_moe.experts.5.w1", "model.layers.54.block_sparse_moe.experts.6.w1", "model.layers.54.block_sparse_moe.experts.7.w1", "model.layers.54.block_sparse_moe.experts.8.w1", "model.layers.54.block_sparse_moe.experts.9.w1", "model.layers.54.block_sparse_moe.experts.10.w1", "model.layers.54.block_sparse_moe.experts.11.w1", "model.layers.54.block_sparse_moe.experts.12.w1", "model.layers.54.block_sparse_moe.experts.13.w1", "model.layers.54.block_sparse_moe.experts.14.w1", "model.layers.54.block_sparse_moe.experts.15.w1", "model.layers.54.block_sparse_moe.experts.16.w1", "model.layers.54.block_sparse_moe.experts.17.w1", "model.layers.54.block_sparse_moe.experts.18.w1", "model.layers.54.block_sparse_moe.experts.19.w1", "model.layers.54.block_sparse_moe.experts.20.w1", "model.layers.54.block_sparse_moe.experts.21.w1", "model.layers.54.block_sparse_moe.experts.22.w1", "model.layers.54.block_sparse_moe.experts.23.w1", "model.layers.54.block_sparse_moe.experts.24.w1", "model.layers.54.block_sparse_moe.experts.25.w1", "model.layers.54.block_sparse_moe.experts.26.w1", "model.layers.54.block_sparse_moe.experts.27.w1", "model.layers.54.block_sparse_moe.experts.28.w1", "model.layers.54.block_sparse_moe.experts.29.w1", "model.layers.54.block_sparse_moe.experts.30.w1", "model.layers.54.block_sparse_moe.experts.31.w1", "model.layers.54.block_sparse_moe.experts.32.w1", "model.layers.54.block_sparse_moe.experts.33.w1", "model.layers.54.block_sparse_moe.experts.34.w1", "model.layers.54.block_sparse_moe.experts.35.w1", "model.layers.54.block_sparse_moe.experts.36.w1", "model.layers.54.block_sparse_moe.experts.37.w1", "model.layers.54.block_sparse_moe.experts.38.w1", "model.layers.54.block_sparse_moe.experts.39.w1", "model.layers.54.block_sparse_moe.experts.40.w1", "model.layers.54.block_sparse_moe.experts.41.w1", "model.layers.54.block_sparse_moe.experts.42.w1", "model.layers.54.block_sparse_moe.experts.43.w1", "model.layers.54.block_sparse_moe.experts.44.w1", "model.layers.54.block_sparse_moe.experts.45.w1", "model.layers.54.block_sparse_moe.experts.46.w1", "model.layers.54.block_sparse_moe.experts.47.w1", "model.layers.54.block_sparse_moe.experts.48.w1", "model.layers.54.block_sparse_moe.experts.49.w1", "model.layers.54.block_sparse_moe.experts.50.w1", "model.layers.54.block_sparse_moe.experts.51.w1", "model.layers.54.block_sparse_moe.experts.52.w1", "model.layers.54.block_sparse_moe.experts.53.w1", "model.layers.54.block_sparse_moe.experts.54.w1", "model.layers.54.block_sparse_moe.experts.55.w1", "model.layers.54.block_sparse_moe.experts.56.w1", "model.layers.54.block_sparse_moe.experts.57.w1", "model.layers.54.block_sparse_moe.experts.58.w1", "model.layers.54.block_sparse_moe.experts.59.w1", "model.layers.54.block_sparse_moe.experts.60.w1", "model.layers.54.block_sparse_moe.experts.61.w1", "model.layers.54.block_sparse_moe.experts.62.w1", "model.layers.54.block_sparse_moe.experts.63.w1", "model.layers.54.block_sparse_moe.experts.64.w1", "model.layers.54.block_sparse_moe.experts.65.w1", "model.layers.54.block_sparse_moe.experts.66.w1", "model.layers.54.block_sparse_moe.experts.67.w1", "model.layers.54.block_sparse_moe.experts.68.w1", "model.layers.54.block_sparse_moe.experts.69.w1", "model.layers.54.block_sparse_moe.experts.70.w1", "model.layers.54.block_sparse_moe.experts.71.w1", "model.layers.54.block_sparse_moe.experts.72.w1", "model.layers.54.block_sparse_moe.experts.73.w1", "model.layers.54.block_sparse_moe.experts.74.w1", "model.layers.54.block_sparse_moe.experts.75.w1", "model.layers.54.block_sparse_moe.experts.76.w1", "model.layers.54.block_sparse_moe.experts.77.w1", "model.layers.54.block_sparse_moe.experts.78.w1", "model.layers.54.block_sparse_moe.experts.79.w1", "model.layers.54.block_sparse_moe.experts.80.w1", "model.layers.54.block_sparse_moe.experts.81.w1", "model.layers.54.block_sparse_moe.experts.82.w1", "model.layers.54.block_sparse_moe.experts.83.w1", "model.layers.54.block_sparse_moe.experts.84.w1", "model.layers.54.block_sparse_moe.experts.85.w1", "model.layers.54.block_sparse_moe.experts.86.w1", "model.layers.54.block_sparse_moe.experts.87.w1", "model.layers.54.block_sparse_moe.experts.88.w1", "model.layers.54.block_sparse_moe.experts.89.w1", "model.layers.54.block_sparse_moe.experts.90.w1", "model.layers.54.block_sparse_moe.experts.91.w1", "model.layers.54.block_sparse_moe.experts.92.w1", "model.layers.54.block_sparse_moe.experts.93.w1", "model.layers.54.block_sparse_moe.experts.94.w1", "model.layers.54.block_sparse_moe.experts.95.w1", "model.layers.54.block_sparse_moe.experts.96.w1", "model.layers.54.block_sparse_moe.experts.97.w1", "model.layers.54.block_sparse_moe.experts.98.w1", "model.layers.54.block_sparse_moe.experts.99.w1", "model.layers.54.block_sparse_moe.experts.100.w1", "model.layers.54.block_sparse_moe.experts.101.w1", "model.layers.54.block_sparse_moe.experts.102.w1", "model.layers.54.block_sparse_moe.experts.103.w1", "model.layers.54.block_sparse_moe.experts.104.w1", "model.layers.54.block_sparse_moe.experts.105.w1", "model.layers.54.block_sparse_moe.experts.106.w1", "model.layers.54.block_sparse_moe.experts.107.w1", "model.layers.54.block_sparse_moe.experts.108.w1", "model.layers.54.block_sparse_moe.experts.109.w1", "model.layers.54.block_sparse_moe.experts.110.w1", "model.layers.54.block_sparse_moe.experts.111.w1", "model.layers.54.block_sparse_moe.experts.112.w1", "model.layers.54.block_sparse_moe.experts.113.w1", "model.layers.54.block_sparse_moe.experts.114.w1", "model.layers.54.block_sparse_moe.experts.115.w1", "model.layers.54.block_sparse_moe.experts.116.w1", "model.layers.54.block_sparse_moe.experts.117.w1", "model.layers.54.block_sparse_moe.experts.118.w1", "model.layers.54.block_sparse_moe.experts.119.w1", "model.layers.54.block_sparse_moe.experts.120.w1", "model.layers.54.block_sparse_moe.experts.121.w1", "model.layers.54.block_sparse_moe.experts.122.w1", "model.layers.54.block_sparse_moe.experts.123.w1", "model.layers.54.block_sparse_moe.experts.124.w1", "model.layers.54.block_sparse_moe.experts.125.w1", "model.layers.54.block_sparse_moe.experts.126.w1", "model.layers.54.block_sparse_moe.experts.127.w1", "model.layers.54.block_sparse_moe.experts.128.w1", "model.layers.54.block_sparse_moe.experts.129.w1", "model.layers.54.block_sparse_moe.experts.130.w1", "model.layers.54.block_sparse_moe.experts.131.w1", "model.layers.54.block_sparse_moe.experts.132.w1", "model.layers.54.block_sparse_moe.experts.133.w1", "model.layers.54.block_sparse_moe.experts.134.w1", "model.layers.54.block_sparse_moe.experts.135.w1", "model.layers.54.block_sparse_moe.experts.136.w1", "model.layers.54.block_sparse_moe.experts.137.w1", "model.layers.54.block_sparse_moe.experts.138.w1", "model.layers.54.block_sparse_moe.experts.139.w1", "model.layers.54.block_sparse_moe.experts.140.w1", "model.layers.54.block_sparse_moe.experts.141.w1", "model.layers.54.block_sparse_moe.experts.142.w1", "model.layers.54.block_sparse_moe.experts.143.w1", "model.layers.54.block_sparse_moe.experts.144.w1", "model.layers.54.block_sparse_moe.experts.145.w1", "model.layers.54.block_sparse_moe.experts.146.w1", "model.layers.54.block_sparse_moe.experts.147.w1", "model.layers.54.block_sparse_moe.experts.148.w1", "model.layers.54.block_sparse_moe.experts.149.w1", "model.layers.54.block_sparse_moe.experts.150.w1", "model.layers.54.block_sparse_moe.experts.151.w1", "model.layers.54.block_sparse_moe.experts.152.w1", "model.layers.54.block_sparse_moe.experts.153.w1", "model.layers.54.block_sparse_moe.experts.154.w1", "model.layers.54.block_sparse_moe.experts.155.w1", "model.layers.54.block_sparse_moe.experts.156.w1", "model.layers.54.block_sparse_moe.experts.157.w1", "model.layers.54.block_sparse_moe.experts.158.w1", "model.layers.54.block_sparse_moe.experts.159.w1", "model.layers.54.block_sparse_moe.experts.160.w1", "model.layers.54.block_sparse_moe.experts.161.w1", "model.layers.54.block_sparse_moe.experts.162.w1", "model.layers.54.block_sparse_moe.experts.163.w1", "model.layers.54.block_sparse_moe.experts.164.w1", "model.layers.54.block_sparse_moe.experts.165.w1", "model.layers.54.block_sparse_moe.experts.166.w1", "model.layers.54.block_sparse_moe.experts.167.w1", "model.layers.54.block_sparse_moe.experts.168.w1", "model.layers.54.block_sparse_moe.experts.169.w1", "model.layers.54.block_sparse_moe.experts.170.w1", "model.layers.54.block_sparse_moe.experts.171.w1", "model.layers.54.block_sparse_moe.experts.172.w1", "model.layers.54.block_sparse_moe.experts.173.w1", "model.layers.54.block_sparse_moe.experts.174.w1", "model.layers.54.block_sparse_moe.experts.175.w1", "model.layers.54.block_sparse_moe.experts.176.w1", "model.layers.54.block_sparse_moe.experts.177.w1", "model.layers.54.block_sparse_moe.experts.178.w1", "model.layers.54.block_sparse_moe.experts.179.w1", "model.layers.54.block_sparse_moe.experts.180.w1", "model.layers.54.block_sparse_moe.experts.181.w1", "model.layers.54.block_sparse_moe.experts.182.w1", "model.layers.54.block_sparse_moe.experts.183.w1", "model.layers.54.block_sparse_moe.experts.184.w1", "model.layers.54.block_sparse_moe.experts.185.w1", "model.layers.54.block_sparse_moe.experts.186.w1", "model.layers.54.block_sparse_moe.experts.187.w1", "model.layers.54.block_sparse_moe.experts.188.w1", "model.layers.54.block_sparse_moe.experts.189.w1", "model.layers.54.block_sparse_moe.experts.190.w1", "model.layers.54.block_sparse_moe.experts.191.w1", "model.layers.54.block_sparse_moe.experts.192.w1", "model.layers.54.block_sparse_moe.experts.193.w1", "model.layers.54.block_sparse_moe.experts.194.w1", "model.layers.54.block_sparse_moe.experts.195.w1", "model.layers.54.block_sparse_moe.experts.196.w1", "model.layers.54.block_sparse_moe.experts.197.w1", "model.layers.54.block_sparse_moe.experts.198.w1", "model.layers.54.block_sparse_moe.experts.199.w1", "model.layers.54.block_sparse_moe.experts.200.w1", "model.layers.54.block_sparse_moe.experts.201.w1", "model.layers.54.block_sparse_moe.experts.202.w1", "model.layers.54.block_sparse_moe.experts.203.w1", "model.layers.54.block_sparse_moe.experts.204.w1", "model.layers.54.block_sparse_moe.experts.205.w1", "model.layers.54.block_sparse_moe.experts.206.w1", "model.layers.54.block_sparse_moe.experts.207.w1", "model.layers.54.block_sparse_moe.experts.208.w1", "model.layers.54.block_sparse_moe.experts.209.w1", "model.layers.54.block_sparse_moe.experts.210.w1", "model.layers.54.block_sparse_moe.experts.211.w1", "model.layers.54.block_sparse_moe.experts.212.w1", "model.layers.54.block_sparse_moe.experts.213.w1", "model.layers.54.block_sparse_moe.experts.214.w1", "model.layers.54.block_sparse_moe.experts.215.w1", "model.layers.54.block_sparse_moe.experts.216.w1", "model.layers.54.block_sparse_moe.experts.217.w1", "model.layers.54.block_sparse_moe.experts.218.w1", "model.layers.54.block_sparse_moe.experts.219.w1", "model.layers.54.block_sparse_moe.experts.220.w1", "model.layers.54.block_sparse_moe.experts.221.w1", "model.layers.54.block_sparse_moe.experts.222.w1", "model.layers.54.block_sparse_moe.experts.223.w1", "model.layers.54.block_sparse_moe.experts.224.w1", "model.layers.54.block_sparse_moe.experts.225.w1", "model.layers.54.block_sparse_moe.experts.226.w1", "model.layers.54.block_sparse_moe.experts.227.w1", "model.layers.54.block_sparse_moe.experts.228.w1", "model.layers.54.block_sparse_moe.experts.229.w1", "model.layers.54.block_sparse_moe.experts.230.w1", "model.layers.54.block_sparse_moe.experts.231.w1", "model.layers.54.block_sparse_moe.experts.232.w1", "model.layers.54.block_sparse_moe.experts.233.w1", "model.layers.54.block_sparse_moe.experts.234.w1", "model.layers.54.block_sparse_moe.experts.235.w1", "model.layers.54.block_sparse_moe.experts.236.w1", "model.layers.54.block_sparse_moe.experts.237.w1", "model.layers.54.block_sparse_moe.experts.238.w1", "model.layers.54.block_sparse_moe.experts.239.w1", "model.layers.54.block_sparse_moe.experts.240.w1", "model.layers.54.block_sparse_moe.experts.241.w1", "model.layers.54.block_sparse_moe.experts.242.w1", "model.layers.54.block_sparse_moe.experts.243.w1", "model.layers.54.block_sparse_moe.experts.244.w1", "model.layers.54.block_sparse_moe.experts.245.w1", "model.layers.54.block_sparse_moe.experts.246.w1", "model.layers.54.block_sparse_moe.experts.247.w1", "model.layers.54.block_sparse_moe.experts.248.w1", "model.layers.54.block_sparse_moe.experts.249.w1", "model.layers.54.block_sparse_moe.experts.250.w1", "model.layers.54.block_sparse_moe.experts.251.w1", "model.layers.54.block_sparse_moe.experts.252.w1", "model.layers.54.block_sparse_moe.experts.253.w1", "model.layers.54.block_sparse_moe.experts.254.w1", "model.layers.54.block_sparse_moe.experts.255.w1", "model.layers.54.block_sparse_moe.experts.0.w3", "model.layers.54.block_sparse_moe.experts.1.w3", "model.layers.54.block_sparse_moe.experts.2.w3", "model.layers.54.block_sparse_moe.experts.3.w3", "model.layers.54.block_sparse_moe.experts.4.w3", "model.layers.54.block_sparse_moe.experts.5.w3", "model.layers.54.block_sparse_moe.experts.6.w3", "model.layers.54.block_sparse_moe.experts.7.w3", "model.layers.54.block_sparse_moe.experts.8.w3", "model.layers.54.block_sparse_moe.experts.9.w3", "model.layers.54.block_sparse_moe.experts.10.w3", "model.layers.54.block_sparse_moe.experts.11.w3", "model.layers.54.block_sparse_moe.experts.12.w3", "model.layers.54.block_sparse_moe.experts.13.w3", "model.layers.54.block_sparse_moe.experts.14.w3", "model.layers.54.block_sparse_moe.experts.15.w3", "model.layers.54.block_sparse_moe.experts.16.w3", "model.layers.54.block_sparse_moe.experts.17.w3", "model.layers.54.block_sparse_moe.experts.18.w3", "model.layers.54.block_sparse_moe.experts.19.w3", "model.layers.54.block_sparse_moe.experts.20.w3", "model.layers.54.block_sparse_moe.experts.21.w3", "model.layers.54.block_sparse_moe.experts.22.w3", "model.layers.54.block_sparse_moe.experts.23.w3", "model.layers.54.block_sparse_moe.experts.24.w3", "model.layers.54.block_sparse_moe.experts.25.w3", "model.layers.54.block_sparse_moe.experts.26.w3", "model.layers.54.block_sparse_moe.experts.27.w3", "model.layers.54.block_sparse_moe.experts.28.w3", "model.layers.54.block_sparse_moe.experts.29.w3", "model.layers.54.block_sparse_moe.experts.30.w3", "model.layers.54.block_sparse_moe.experts.31.w3", "model.layers.54.block_sparse_moe.experts.32.w3", "model.layers.54.block_sparse_moe.experts.33.w3", "model.layers.54.block_sparse_moe.experts.34.w3", "model.layers.54.block_sparse_moe.experts.35.w3", "model.layers.54.block_sparse_moe.experts.36.w3", "model.layers.54.block_sparse_moe.experts.37.w3", "model.layers.54.block_sparse_moe.experts.38.w3", "model.layers.54.block_sparse_moe.experts.39.w3", "model.layers.54.block_sparse_moe.experts.40.w3", "model.layers.54.block_sparse_moe.experts.41.w3", "model.layers.54.block_sparse_moe.experts.42.w3", "model.layers.54.block_sparse_moe.experts.43.w3", "model.layers.54.block_sparse_moe.experts.44.w3", "model.layers.54.block_sparse_moe.experts.45.w3", "model.layers.54.block_sparse_moe.experts.46.w3", "model.layers.54.block_sparse_moe.experts.47.w3", "model.layers.54.block_sparse_moe.experts.48.w3", "model.layers.54.block_sparse_moe.experts.49.w3", "model.layers.54.block_sparse_moe.experts.50.w3", "model.layers.54.block_sparse_moe.experts.51.w3", "model.layers.54.block_sparse_moe.experts.52.w3", "model.layers.54.block_sparse_moe.experts.53.w3", "model.layers.54.block_sparse_moe.experts.54.w3", "model.layers.54.block_sparse_moe.experts.55.w3", "model.layers.54.block_sparse_moe.experts.56.w3", "model.layers.54.block_sparse_moe.experts.57.w3", "model.layers.54.block_sparse_moe.experts.58.w3", "model.layers.54.block_sparse_moe.experts.59.w3", "model.layers.54.block_sparse_moe.experts.60.w3", "model.layers.54.block_sparse_moe.experts.61.w3", "model.layers.54.block_sparse_moe.experts.62.w3", "model.layers.54.block_sparse_moe.experts.63.w3", "model.layers.54.block_sparse_moe.experts.64.w3", "model.layers.54.block_sparse_moe.experts.65.w3", "model.layers.54.block_sparse_moe.experts.66.w3", "model.layers.54.block_sparse_moe.experts.67.w3", "model.layers.54.block_sparse_moe.experts.68.w3", "model.layers.54.block_sparse_moe.experts.69.w3", "model.layers.54.block_sparse_moe.experts.70.w3", "model.layers.54.block_sparse_moe.experts.71.w3", "model.layers.54.block_sparse_moe.experts.72.w3", "model.layers.54.block_sparse_moe.experts.73.w3", "model.layers.54.block_sparse_moe.experts.74.w3", "model.layers.54.block_sparse_moe.experts.75.w3", "model.layers.54.block_sparse_moe.experts.76.w3", "model.layers.54.block_sparse_moe.experts.77.w3", "model.layers.54.block_sparse_moe.experts.78.w3", "model.layers.54.block_sparse_moe.experts.79.w3", "model.layers.54.block_sparse_moe.experts.80.w3", "model.layers.54.block_sparse_moe.experts.81.w3", "model.layers.54.block_sparse_moe.experts.82.w3", "model.layers.54.block_sparse_moe.experts.83.w3", "model.layers.54.block_sparse_moe.experts.84.w3", "model.layers.54.block_sparse_moe.experts.85.w3", "model.layers.54.block_sparse_moe.experts.86.w3", "model.layers.54.block_sparse_moe.experts.87.w3", "model.layers.54.block_sparse_moe.experts.88.w3", "model.layers.54.block_sparse_moe.experts.89.w3", "model.layers.54.block_sparse_moe.experts.90.w3", "model.layers.54.block_sparse_moe.experts.91.w3", "model.layers.54.block_sparse_moe.experts.92.w3", "model.layers.54.block_sparse_moe.experts.93.w3", "model.layers.54.block_sparse_moe.experts.94.w3", "model.layers.54.block_sparse_moe.experts.95.w3", "model.layers.54.block_sparse_moe.experts.96.w3", "model.layers.54.block_sparse_moe.experts.97.w3", "model.layers.54.block_sparse_moe.experts.98.w3", "model.layers.54.block_sparse_moe.experts.99.w3", "model.layers.54.block_sparse_moe.experts.100.w3", "model.layers.54.block_sparse_moe.experts.101.w3", "model.layers.54.block_sparse_moe.experts.102.w3", "model.layers.54.block_sparse_moe.experts.103.w3", "model.layers.54.block_sparse_moe.experts.104.w3", "model.layers.54.block_sparse_moe.experts.105.w3", "model.layers.54.block_sparse_moe.experts.106.w3", "model.layers.54.block_sparse_moe.experts.107.w3", "model.layers.54.block_sparse_moe.experts.108.w3", "model.layers.54.block_sparse_moe.experts.109.w3", "model.layers.54.block_sparse_moe.experts.110.w3", "model.layers.54.block_sparse_moe.experts.111.w3", "model.layers.54.block_sparse_moe.experts.112.w3", "model.layers.54.block_sparse_moe.experts.113.w3", "model.layers.54.block_sparse_moe.experts.114.w3", "model.layers.54.block_sparse_moe.experts.115.w3", "model.layers.54.block_sparse_moe.experts.116.w3", "model.layers.54.block_sparse_moe.experts.117.w3", "model.layers.54.block_sparse_moe.experts.118.w3", "model.layers.54.block_sparse_moe.experts.119.w3", "model.layers.54.block_sparse_moe.experts.120.w3", "model.layers.54.block_sparse_moe.experts.121.w3", "model.layers.54.block_sparse_moe.experts.122.w3", "model.layers.54.block_sparse_moe.experts.123.w3", "model.layers.54.block_sparse_moe.experts.124.w3", "model.layers.54.block_sparse_moe.experts.125.w3", "model.layers.54.block_sparse_moe.experts.126.w3", "model.layers.54.block_sparse_moe.experts.127.w3", "model.layers.54.block_sparse_moe.experts.128.w3", "model.layers.54.block_sparse_moe.experts.129.w3", "model.layers.54.block_sparse_moe.experts.130.w3", "model.layers.54.block_sparse_moe.experts.131.w3", "model.layers.54.block_sparse_moe.experts.132.w3", "model.layers.54.block_sparse_moe.experts.133.w3", "model.layers.54.block_sparse_moe.experts.134.w3", "model.layers.54.block_sparse_moe.experts.135.w3", "model.layers.54.block_sparse_moe.experts.136.w3", "model.layers.54.block_sparse_moe.experts.137.w3", "model.layers.54.block_sparse_moe.experts.138.w3", "model.layers.54.block_sparse_moe.experts.139.w3", "model.layers.54.block_sparse_moe.experts.140.w3", "model.layers.54.block_sparse_moe.experts.141.w3", "model.layers.54.block_sparse_moe.experts.142.w3", "model.layers.54.block_sparse_moe.experts.143.w3", "model.layers.54.block_sparse_moe.experts.144.w3", "model.layers.54.block_sparse_moe.experts.145.w3", "model.layers.54.block_sparse_moe.experts.146.w3", "model.layers.54.block_sparse_moe.experts.147.w3", "model.layers.54.block_sparse_moe.experts.148.w3", "model.layers.54.block_sparse_moe.experts.149.w3", "model.layers.54.block_sparse_moe.experts.150.w3", "model.layers.54.block_sparse_moe.experts.151.w3", "model.layers.54.block_sparse_moe.experts.152.w3", "model.layers.54.block_sparse_moe.experts.153.w3", "model.layers.54.block_sparse_moe.experts.154.w3", "model.layers.54.block_sparse_moe.experts.155.w3", "model.layers.54.block_sparse_moe.experts.156.w3", "model.layers.54.block_sparse_moe.experts.157.w3", "model.layers.54.block_sparse_moe.experts.158.w3", "model.layers.54.block_sparse_moe.experts.159.w3", "model.layers.54.block_sparse_moe.experts.160.w3", "model.layers.54.block_sparse_moe.experts.161.w3", "model.layers.54.block_sparse_moe.experts.162.w3", "model.layers.54.block_sparse_moe.experts.163.w3", "model.layers.54.block_sparse_moe.experts.164.w3", "model.layers.54.block_sparse_moe.experts.165.w3", "model.layers.54.block_sparse_moe.experts.166.w3", "model.layers.54.block_sparse_moe.experts.167.w3", "model.layers.54.block_sparse_moe.experts.168.w3", "model.layers.54.block_sparse_moe.experts.169.w3", "model.layers.54.block_sparse_moe.experts.170.w3", "model.layers.54.block_sparse_moe.experts.171.w3", "model.layers.54.block_sparse_moe.experts.172.w3", "model.layers.54.block_sparse_moe.experts.173.w3", "model.layers.54.block_sparse_moe.experts.174.w3", "model.layers.54.block_sparse_moe.experts.175.w3", "model.layers.54.block_sparse_moe.experts.176.w3", "model.layers.54.block_sparse_moe.experts.177.w3", "model.layers.54.block_sparse_moe.experts.178.w3", "model.layers.54.block_sparse_moe.experts.179.w3", "model.layers.54.block_sparse_moe.experts.180.w3", "model.layers.54.block_sparse_moe.experts.181.w3", "model.layers.54.block_sparse_moe.experts.182.w3", "model.layers.54.block_sparse_moe.experts.183.w3", "model.layers.54.block_sparse_moe.experts.184.w3", "model.layers.54.block_sparse_moe.experts.185.w3", "model.layers.54.block_sparse_moe.experts.186.w3", "model.layers.54.block_sparse_moe.experts.187.w3", "model.layers.54.block_sparse_moe.experts.188.w3", "model.layers.54.block_sparse_moe.experts.189.w3", "model.layers.54.block_sparse_moe.experts.190.w3", "model.layers.54.block_sparse_moe.experts.191.w3", "model.layers.54.block_sparse_moe.experts.192.w3", "model.layers.54.block_sparse_moe.experts.193.w3", "model.layers.54.block_sparse_moe.experts.194.w3", "model.layers.54.block_sparse_moe.experts.195.w3", "model.layers.54.block_sparse_moe.experts.196.w3", "model.layers.54.block_sparse_moe.experts.197.w3", "model.layers.54.block_sparse_moe.experts.198.w3", "model.layers.54.block_sparse_moe.experts.199.w3", "model.layers.54.block_sparse_moe.experts.200.w3", "model.layers.54.block_sparse_moe.experts.201.w3", "model.layers.54.block_sparse_moe.experts.202.w3", "model.layers.54.block_sparse_moe.experts.203.w3", "model.layers.54.block_sparse_moe.experts.204.w3", "model.layers.54.block_sparse_moe.experts.205.w3", "model.layers.54.block_sparse_moe.experts.206.w3", "model.layers.54.block_sparse_moe.experts.207.w3", "model.layers.54.block_sparse_moe.experts.208.w3", "model.layers.54.block_sparse_moe.experts.209.w3", "model.layers.54.block_sparse_moe.experts.210.w3", "model.layers.54.block_sparse_moe.experts.211.w3", "model.layers.54.block_sparse_moe.experts.212.w3", "model.layers.54.block_sparse_moe.experts.213.w3", "model.layers.54.block_sparse_moe.experts.214.w3", "model.layers.54.block_sparse_moe.experts.215.w3", "model.layers.54.block_sparse_moe.experts.216.w3", "model.layers.54.block_sparse_moe.experts.217.w3", "model.layers.54.block_sparse_moe.experts.218.w3", "model.layers.54.block_sparse_moe.experts.219.w3", "model.layers.54.block_sparse_moe.experts.220.w3", "model.layers.54.block_sparse_moe.experts.221.w3", "model.layers.54.block_sparse_moe.experts.222.w3", "model.layers.54.block_sparse_moe.experts.223.w3", "model.layers.54.block_sparse_moe.experts.224.w3", "model.layers.54.block_sparse_moe.experts.225.w3", "model.layers.54.block_sparse_moe.experts.226.w3", "model.layers.54.block_sparse_moe.experts.227.w3", "model.layers.54.block_sparse_moe.experts.228.w3", "model.layers.54.block_sparse_moe.experts.229.w3", "model.layers.54.block_sparse_moe.experts.230.w3", "model.layers.54.block_sparse_moe.experts.231.w3", "model.layers.54.block_sparse_moe.experts.232.w3", "model.layers.54.block_sparse_moe.experts.233.w3", "model.layers.54.block_sparse_moe.experts.234.w3", "model.layers.54.block_sparse_moe.experts.235.w3", "model.layers.54.block_sparse_moe.experts.236.w3", "model.layers.54.block_sparse_moe.experts.237.w3", "model.layers.54.block_sparse_moe.experts.238.w3", "model.layers.54.block_sparse_moe.experts.239.w3", "model.layers.54.block_sparse_moe.experts.240.w3", "model.layers.54.block_sparse_moe.experts.241.w3", "model.layers.54.block_sparse_moe.experts.242.w3", "model.layers.54.block_sparse_moe.experts.243.w3", "model.layers.54.block_sparse_moe.experts.244.w3", "model.layers.54.block_sparse_moe.experts.245.w3", "model.layers.54.block_sparse_moe.experts.246.w3", "model.layers.54.block_sparse_moe.experts.247.w3", "model.layers.54.block_sparse_moe.experts.248.w3", "model.layers.54.block_sparse_moe.experts.249.w3", "model.layers.54.block_sparse_moe.experts.250.w3", "model.layers.54.block_sparse_moe.experts.251.w3", "model.layers.54.block_sparse_moe.experts.252.w3", "model.layers.54.block_sparse_moe.experts.253.w3", "model.layers.54.block_sparse_moe.experts.254.w3", "model.layers.54.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00025581084191800274, "dbits": 2415919104 } ] }, { "idx": 274, "layers": [ "model.layers.54.block_sparse_moe.experts.0.w2", "model.layers.54.block_sparse_moe.experts.1.w2", "model.layers.54.block_sparse_moe.experts.2.w2", "model.layers.54.block_sparse_moe.experts.3.w2", "model.layers.54.block_sparse_moe.experts.4.w2", "model.layers.54.block_sparse_moe.experts.5.w2", "model.layers.54.block_sparse_moe.experts.6.w2", "model.layers.54.block_sparse_moe.experts.7.w2", "model.layers.54.block_sparse_moe.experts.8.w2", "model.layers.54.block_sparse_moe.experts.9.w2", "model.layers.54.block_sparse_moe.experts.10.w2", "model.layers.54.block_sparse_moe.experts.11.w2", "model.layers.54.block_sparse_moe.experts.12.w2", "model.layers.54.block_sparse_moe.experts.13.w2", "model.layers.54.block_sparse_moe.experts.14.w2", "model.layers.54.block_sparse_moe.experts.15.w2", "model.layers.54.block_sparse_moe.experts.16.w2", "model.layers.54.block_sparse_moe.experts.17.w2", "model.layers.54.block_sparse_moe.experts.18.w2", "model.layers.54.block_sparse_moe.experts.19.w2", "model.layers.54.block_sparse_moe.experts.20.w2", "model.layers.54.block_sparse_moe.experts.21.w2", "model.layers.54.block_sparse_moe.experts.22.w2", "model.layers.54.block_sparse_moe.experts.23.w2", "model.layers.54.block_sparse_moe.experts.24.w2", "model.layers.54.block_sparse_moe.experts.25.w2", "model.layers.54.block_sparse_moe.experts.26.w2", "model.layers.54.block_sparse_moe.experts.27.w2", "model.layers.54.block_sparse_moe.experts.28.w2", "model.layers.54.block_sparse_moe.experts.29.w2", "model.layers.54.block_sparse_moe.experts.30.w2", "model.layers.54.block_sparse_moe.experts.31.w2", "model.layers.54.block_sparse_moe.experts.32.w2", "model.layers.54.block_sparse_moe.experts.33.w2", "model.layers.54.block_sparse_moe.experts.34.w2", "model.layers.54.block_sparse_moe.experts.35.w2", "model.layers.54.block_sparse_moe.experts.36.w2", "model.layers.54.block_sparse_moe.experts.37.w2", "model.layers.54.block_sparse_moe.experts.38.w2", "model.layers.54.block_sparse_moe.experts.39.w2", "model.layers.54.block_sparse_moe.experts.40.w2", "model.layers.54.block_sparse_moe.experts.41.w2", "model.layers.54.block_sparse_moe.experts.42.w2", "model.layers.54.block_sparse_moe.experts.43.w2", "model.layers.54.block_sparse_moe.experts.44.w2", "model.layers.54.block_sparse_moe.experts.45.w2", "model.layers.54.block_sparse_moe.experts.46.w2", "model.layers.54.block_sparse_moe.experts.47.w2", "model.layers.54.block_sparse_moe.experts.48.w2", "model.layers.54.block_sparse_moe.experts.49.w2", "model.layers.54.block_sparse_moe.experts.50.w2", "model.layers.54.block_sparse_moe.experts.51.w2", "model.layers.54.block_sparse_moe.experts.52.w2", "model.layers.54.block_sparse_moe.experts.53.w2", "model.layers.54.block_sparse_moe.experts.54.w2", "model.layers.54.block_sparse_moe.experts.55.w2", "model.layers.54.block_sparse_moe.experts.56.w2", "model.layers.54.block_sparse_moe.experts.57.w2", "model.layers.54.block_sparse_moe.experts.58.w2", "model.layers.54.block_sparse_moe.experts.59.w2", "model.layers.54.block_sparse_moe.experts.60.w2", "model.layers.54.block_sparse_moe.experts.61.w2", "model.layers.54.block_sparse_moe.experts.62.w2", "model.layers.54.block_sparse_moe.experts.63.w2", "model.layers.54.block_sparse_moe.experts.64.w2", "model.layers.54.block_sparse_moe.experts.65.w2", "model.layers.54.block_sparse_moe.experts.66.w2", "model.layers.54.block_sparse_moe.experts.67.w2", "model.layers.54.block_sparse_moe.experts.68.w2", "model.layers.54.block_sparse_moe.experts.69.w2", "model.layers.54.block_sparse_moe.experts.70.w2", "model.layers.54.block_sparse_moe.experts.71.w2", "model.layers.54.block_sparse_moe.experts.72.w2", "model.layers.54.block_sparse_moe.experts.73.w2", "model.layers.54.block_sparse_moe.experts.74.w2", "model.layers.54.block_sparse_moe.experts.75.w2", "model.layers.54.block_sparse_moe.experts.76.w2", "model.layers.54.block_sparse_moe.experts.77.w2", "model.layers.54.block_sparse_moe.experts.78.w2", "model.layers.54.block_sparse_moe.experts.79.w2", "model.layers.54.block_sparse_moe.experts.80.w2", "model.layers.54.block_sparse_moe.experts.81.w2", "model.layers.54.block_sparse_moe.experts.82.w2", "model.layers.54.block_sparse_moe.experts.83.w2", "model.layers.54.block_sparse_moe.experts.84.w2", "model.layers.54.block_sparse_moe.experts.85.w2", "model.layers.54.block_sparse_moe.experts.86.w2", "model.layers.54.block_sparse_moe.experts.87.w2", "model.layers.54.block_sparse_moe.experts.88.w2", "model.layers.54.block_sparse_moe.experts.89.w2", "model.layers.54.block_sparse_moe.experts.90.w2", "model.layers.54.block_sparse_moe.experts.91.w2", "model.layers.54.block_sparse_moe.experts.92.w2", "model.layers.54.block_sparse_moe.experts.93.w2", "model.layers.54.block_sparse_moe.experts.94.w2", "model.layers.54.block_sparse_moe.experts.95.w2", "model.layers.54.block_sparse_moe.experts.96.w2", "model.layers.54.block_sparse_moe.experts.97.w2", "model.layers.54.block_sparse_moe.experts.98.w2", "model.layers.54.block_sparse_moe.experts.99.w2", "model.layers.54.block_sparse_moe.experts.100.w2", "model.layers.54.block_sparse_moe.experts.101.w2", "model.layers.54.block_sparse_moe.experts.102.w2", "model.layers.54.block_sparse_moe.experts.103.w2", "model.layers.54.block_sparse_moe.experts.104.w2", "model.layers.54.block_sparse_moe.experts.105.w2", "model.layers.54.block_sparse_moe.experts.106.w2", "model.layers.54.block_sparse_moe.experts.107.w2", "model.layers.54.block_sparse_moe.experts.108.w2", "model.layers.54.block_sparse_moe.experts.109.w2", "model.layers.54.block_sparse_moe.experts.110.w2", "model.layers.54.block_sparse_moe.experts.111.w2", "model.layers.54.block_sparse_moe.experts.112.w2", "model.layers.54.block_sparse_moe.experts.113.w2", "model.layers.54.block_sparse_moe.experts.114.w2", "model.layers.54.block_sparse_moe.experts.115.w2", "model.layers.54.block_sparse_moe.experts.116.w2", "model.layers.54.block_sparse_moe.experts.117.w2", "model.layers.54.block_sparse_moe.experts.118.w2", "model.layers.54.block_sparse_moe.experts.119.w2", "model.layers.54.block_sparse_moe.experts.120.w2", "model.layers.54.block_sparse_moe.experts.121.w2", "model.layers.54.block_sparse_moe.experts.122.w2", "model.layers.54.block_sparse_moe.experts.123.w2", "model.layers.54.block_sparse_moe.experts.124.w2", "model.layers.54.block_sparse_moe.experts.125.w2", "model.layers.54.block_sparse_moe.experts.126.w2", "model.layers.54.block_sparse_moe.experts.127.w2", "model.layers.54.block_sparse_moe.experts.128.w2", "model.layers.54.block_sparse_moe.experts.129.w2", "model.layers.54.block_sparse_moe.experts.130.w2", "model.layers.54.block_sparse_moe.experts.131.w2", "model.layers.54.block_sparse_moe.experts.132.w2", "model.layers.54.block_sparse_moe.experts.133.w2", "model.layers.54.block_sparse_moe.experts.134.w2", "model.layers.54.block_sparse_moe.experts.135.w2", "model.layers.54.block_sparse_moe.experts.136.w2", "model.layers.54.block_sparse_moe.experts.137.w2", "model.layers.54.block_sparse_moe.experts.138.w2", "model.layers.54.block_sparse_moe.experts.139.w2", "model.layers.54.block_sparse_moe.experts.140.w2", "model.layers.54.block_sparse_moe.experts.141.w2", "model.layers.54.block_sparse_moe.experts.142.w2", "model.layers.54.block_sparse_moe.experts.143.w2", "model.layers.54.block_sparse_moe.experts.144.w2", "model.layers.54.block_sparse_moe.experts.145.w2", "model.layers.54.block_sparse_moe.experts.146.w2", "model.layers.54.block_sparse_moe.experts.147.w2", "model.layers.54.block_sparse_moe.experts.148.w2", "model.layers.54.block_sparse_moe.experts.149.w2", "model.layers.54.block_sparse_moe.experts.150.w2", "model.layers.54.block_sparse_moe.experts.151.w2", "model.layers.54.block_sparse_moe.experts.152.w2", "model.layers.54.block_sparse_moe.experts.153.w2", "model.layers.54.block_sparse_moe.experts.154.w2", "model.layers.54.block_sparse_moe.experts.155.w2", "model.layers.54.block_sparse_moe.experts.156.w2", "model.layers.54.block_sparse_moe.experts.157.w2", "model.layers.54.block_sparse_moe.experts.158.w2", "model.layers.54.block_sparse_moe.experts.159.w2", "model.layers.54.block_sparse_moe.experts.160.w2", "model.layers.54.block_sparse_moe.experts.161.w2", "model.layers.54.block_sparse_moe.experts.162.w2", "model.layers.54.block_sparse_moe.experts.163.w2", "model.layers.54.block_sparse_moe.experts.164.w2", "model.layers.54.block_sparse_moe.experts.165.w2", "model.layers.54.block_sparse_moe.experts.166.w2", "model.layers.54.block_sparse_moe.experts.167.w2", "model.layers.54.block_sparse_moe.experts.168.w2", "model.layers.54.block_sparse_moe.experts.169.w2", "model.layers.54.block_sparse_moe.experts.170.w2", "model.layers.54.block_sparse_moe.experts.171.w2", "model.layers.54.block_sparse_moe.experts.172.w2", "model.layers.54.block_sparse_moe.experts.173.w2", "model.layers.54.block_sparse_moe.experts.174.w2", "model.layers.54.block_sparse_moe.experts.175.w2", "model.layers.54.block_sparse_moe.experts.176.w2", "model.layers.54.block_sparse_moe.experts.177.w2", "model.layers.54.block_sparse_moe.experts.178.w2", "model.layers.54.block_sparse_moe.experts.179.w2", "model.layers.54.block_sparse_moe.experts.180.w2", "model.layers.54.block_sparse_moe.experts.181.w2", "model.layers.54.block_sparse_moe.experts.182.w2", "model.layers.54.block_sparse_moe.experts.183.w2", "model.layers.54.block_sparse_moe.experts.184.w2", "model.layers.54.block_sparse_moe.experts.185.w2", "model.layers.54.block_sparse_moe.experts.186.w2", "model.layers.54.block_sparse_moe.experts.187.w2", "model.layers.54.block_sparse_moe.experts.188.w2", "model.layers.54.block_sparse_moe.experts.189.w2", "model.layers.54.block_sparse_moe.experts.190.w2", "model.layers.54.block_sparse_moe.experts.191.w2", "model.layers.54.block_sparse_moe.experts.192.w2", "model.layers.54.block_sparse_moe.experts.193.w2", "model.layers.54.block_sparse_moe.experts.194.w2", "model.layers.54.block_sparse_moe.experts.195.w2", "model.layers.54.block_sparse_moe.experts.196.w2", "model.layers.54.block_sparse_moe.experts.197.w2", "model.layers.54.block_sparse_moe.experts.198.w2", "model.layers.54.block_sparse_moe.experts.199.w2", "model.layers.54.block_sparse_moe.experts.200.w2", "model.layers.54.block_sparse_moe.experts.201.w2", "model.layers.54.block_sparse_moe.experts.202.w2", "model.layers.54.block_sparse_moe.experts.203.w2", "model.layers.54.block_sparse_moe.experts.204.w2", "model.layers.54.block_sparse_moe.experts.205.w2", "model.layers.54.block_sparse_moe.experts.206.w2", "model.layers.54.block_sparse_moe.experts.207.w2", "model.layers.54.block_sparse_moe.experts.208.w2", "model.layers.54.block_sparse_moe.experts.209.w2", "model.layers.54.block_sparse_moe.experts.210.w2", "model.layers.54.block_sparse_moe.experts.211.w2", "model.layers.54.block_sparse_moe.experts.212.w2", "model.layers.54.block_sparse_moe.experts.213.w2", "model.layers.54.block_sparse_moe.experts.214.w2", "model.layers.54.block_sparse_moe.experts.215.w2", "model.layers.54.block_sparse_moe.experts.216.w2", "model.layers.54.block_sparse_moe.experts.217.w2", "model.layers.54.block_sparse_moe.experts.218.w2", "model.layers.54.block_sparse_moe.experts.219.w2", "model.layers.54.block_sparse_moe.experts.220.w2", "model.layers.54.block_sparse_moe.experts.221.w2", "model.layers.54.block_sparse_moe.experts.222.w2", "model.layers.54.block_sparse_moe.experts.223.w2", "model.layers.54.block_sparse_moe.experts.224.w2", "model.layers.54.block_sparse_moe.experts.225.w2", "model.layers.54.block_sparse_moe.experts.226.w2", "model.layers.54.block_sparse_moe.experts.227.w2", "model.layers.54.block_sparse_moe.experts.228.w2", "model.layers.54.block_sparse_moe.experts.229.w2", "model.layers.54.block_sparse_moe.experts.230.w2", "model.layers.54.block_sparse_moe.experts.231.w2", "model.layers.54.block_sparse_moe.experts.232.w2", "model.layers.54.block_sparse_moe.experts.233.w2", "model.layers.54.block_sparse_moe.experts.234.w2", "model.layers.54.block_sparse_moe.experts.235.w2", "model.layers.54.block_sparse_moe.experts.236.w2", "model.layers.54.block_sparse_moe.experts.237.w2", "model.layers.54.block_sparse_moe.experts.238.w2", "model.layers.54.block_sparse_moe.experts.239.w2", "model.layers.54.block_sparse_moe.experts.240.w2", "model.layers.54.block_sparse_moe.experts.241.w2", "model.layers.54.block_sparse_moe.experts.242.w2", "model.layers.54.block_sparse_moe.experts.243.w2", "model.layers.54.block_sparse_moe.experts.244.w2", "model.layers.54.block_sparse_moe.experts.245.w2", "model.layers.54.block_sparse_moe.experts.246.w2", "model.layers.54.block_sparse_moe.experts.247.w2", "model.layers.54.block_sparse_moe.experts.248.w2", "model.layers.54.block_sparse_moe.experts.249.w2", "model.layers.54.block_sparse_moe.experts.250.w2", "model.layers.54.block_sparse_moe.experts.251.w2", "model.layers.54.block_sparse_moe.experts.252.w2", "model.layers.54.block_sparse_moe.experts.253.w2", "model.layers.54.block_sparse_moe.experts.254.w2", "model.layers.54.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 4.618242383014337e-06, "dbits": 1207959552 } ] }, { "idx": 275, "layers": [ "model.layers.55.self_attn.q_proj" ], "candidates": [ { "dkld": 9.290464222427852e-05, "dbits": 18874368 } ] }, { "idx": 276, "layers": [ "model.layers.55.self_attn.k_proj", "model.layers.55.self_attn.v_proj" ], "candidates": [ { "dkld": -0.00016076676547527313, "dbits": 6291456 } ] }, { "idx": 277, "layers": [ "model.layers.55.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0019528936594724322, "dbits": 18874368 } ] }, { "idx": 278, "layers": [ "model.layers.55.block_sparse_moe.experts.0.w1", "model.layers.55.block_sparse_moe.experts.1.w1", "model.layers.55.block_sparse_moe.experts.2.w1", "model.layers.55.block_sparse_moe.experts.3.w1", "model.layers.55.block_sparse_moe.experts.4.w1", "model.layers.55.block_sparse_moe.experts.5.w1", "model.layers.55.block_sparse_moe.experts.6.w1", "model.layers.55.block_sparse_moe.experts.7.w1", "model.layers.55.block_sparse_moe.experts.8.w1", "model.layers.55.block_sparse_moe.experts.9.w1", "model.layers.55.block_sparse_moe.experts.10.w1", "model.layers.55.block_sparse_moe.experts.11.w1", "model.layers.55.block_sparse_moe.experts.12.w1", "model.layers.55.block_sparse_moe.experts.13.w1", "model.layers.55.block_sparse_moe.experts.14.w1", "model.layers.55.block_sparse_moe.experts.15.w1", "model.layers.55.block_sparse_moe.experts.16.w1", "model.layers.55.block_sparse_moe.experts.17.w1", "model.layers.55.block_sparse_moe.experts.18.w1", "model.layers.55.block_sparse_moe.experts.19.w1", "model.layers.55.block_sparse_moe.experts.20.w1", "model.layers.55.block_sparse_moe.experts.21.w1", "model.layers.55.block_sparse_moe.experts.22.w1", "model.layers.55.block_sparse_moe.experts.23.w1", "model.layers.55.block_sparse_moe.experts.24.w1", "model.layers.55.block_sparse_moe.experts.25.w1", "model.layers.55.block_sparse_moe.experts.26.w1", "model.layers.55.block_sparse_moe.experts.27.w1", "model.layers.55.block_sparse_moe.experts.28.w1", "model.layers.55.block_sparse_moe.experts.29.w1", "model.layers.55.block_sparse_moe.experts.30.w1", "model.layers.55.block_sparse_moe.experts.31.w1", "model.layers.55.block_sparse_moe.experts.32.w1", "model.layers.55.block_sparse_moe.experts.33.w1", "model.layers.55.block_sparse_moe.experts.34.w1", "model.layers.55.block_sparse_moe.experts.35.w1", "model.layers.55.block_sparse_moe.experts.36.w1", "model.layers.55.block_sparse_moe.experts.37.w1", "model.layers.55.block_sparse_moe.experts.38.w1", "model.layers.55.block_sparse_moe.experts.39.w1", "model.layers.55.block_sparse_moe.experts.40.w1", "model.layers.55.block_sparse_moe.experts.41.w1", "model.layers.55.block_sparse_moe.experts.42.w1", "model.layers.55.block_sparse_moe.experts.43.w1", "model.layers.55.block_sparse_moe.experts.44.w1", "model.layers.55.block_sparse_moe.experts.45.w1", "model.layers.55.block_sparse_moe.experts.46.w1", "model.layers.55.block_sparse_moe.experts.47.w1", "model.layers.55.block_sparse_moe.experts.48.w1", "model.layers.55.block_sparse_moe.experts.49.w1", "model.layers.55.block_sparse_moe.experts.50.w1", "model.layers.55.block_sparse_moe.experts.51.w1", "model.layers.55.block_sparse_moe.experts.52.w1", "model.layers.55.block_sparse_moe.experts.53.w1", "model.layers.55.block_sparse_moe.experts.54.w1", "model.layers.55.block_sparse_moe.experts.55.w1", "model.layers.55.block_sparse_moe.experts.56.w1", "model.layers.55.block_sparse_moe.experts.57.w1", "model.layers.55.block_sparse_moe.experts.58.w1", "model.layers.55.block_sparse_moe.experts.59.w1", "model.layers.55.block_sparse_moe.experts.60.w1", "model.layers.55.block_sparse_moe.experts.61.w1", "model.layers.55.block_sparse_moe.experts.62.w1", "model.layers.55.block_sparse_moe.experts.63.w1", "model.layers.55.block_sparse_moe.experts.64.w1", "model.layers.55.block_sparse_moe.experts.65.w1", "model.layers.55.block_sparse_moe.experts.66.w1", "model.layers.55.block_sparse_moe.experts.67.w1", "model.layers.55.block_sparse_moe.experts.68.w1", "model.layers.55.block_sparse_moe.experts.69.w1", "model.layers.55.block_sparse_moe.experts.70.w1", "model.layers.55.block_sparse_moe.experts.71.w1", "model.layers.55.block_sparse_moe.experts.72.w1", "model.layers.55.block_sparse_moe.experts.73.w1", "model.layers.55.block_sparse_moe.experts.74.w1", "model.layers.55.block_sparse_moe.experts.75.w1", "model.layers.55.block_sparse_moe.experts.76.w1", "model.layers.55.block_sparse_moe.experts.77.w1", "model.layers.55.block_sparse_moe.experts.78.w1", "model.layers.55.block_sparse_moe.experts.79.w1", "model.layers.55.block_sparse_moe.experts.80.w1", "model.layers.55.block_sparse_moe.experts.81.w1", "model.layers.55.block_sparse_moe.experts.82.w1", "model.layers.55.block_sparse_moe.experts.83.w1", "model.layers.55.block_sparse_moe.experts.84.w1", "model.layers.55.block_sparse_moe.experts.85.w1", "model.layers.55.block_sparse_moe.experts.86.w1", "model.layers.55.block_sparse_moe.experts.87.w1", "model.layers.55.block_sparse_moe.experts.88.w1", "model.layers.55.block_sparse_moe.experts.89.w1", "model.layers.55.block_sparse_moe.experts.90.w1", "model.layers.55.block_sparse_moe.experts.91.w1", "model.layers.55.block_sparse_moe.experts.92.w1", "model.layers.55.block_sparse_moe.experts.93.w1", "model.layers.55.block_sparse_moe.experts.94.w1", "model.layers.55.block_sparse_moe.experts.95.w1", "model.layers.55.block_sparse_moe.experts.96.w1", "model.layers.55.block_sparse_moe.experts.97.w1", "model.layers.55.block_sparse_moe.experts.98.w1", "model.layers.55.block_sparse_moe.experts.99.w1", "model.layers.55.block_sparse_moe.experts.100.w1", "model.layers.55.block_sparse_moe.experts.101.w1", "model.layers.55.block_sparse_moe.experts.102.w1", "model.layers.55.block_sparse_moe.experts.103.w1", "model.layers.55.block_sparse_moe.experts.104.w1", "model.layers.55.block_sparse_moe.experts.105.w1", "model.layers.55.block_sparse_moe.experts.106.w1", "model.layers.55.block_sparse_moe.experts.107.w1", "model.layers.55.block_sparse_moe.experts.108.w1", "model.layers.55.block_sparse_moe.experts.109.w1", "model.layers.55.block_sparse_moe.experts.110.w1", "model.layers.55.block_sparse_moe.experts.111.w1", "model.layers.55.block_sparse_moe.experts.112.w1", "model.layers.55.block_sparse_moe.experts.113.w1", "model.layers.55.block_sparse_moe.experts.114.w1", "model.layers.55.block_sparse_moe.experts.115.w1", "model.layers.55.block_sparse_moe.experts.116.w1", "model.layers.55.block_sparse_moe.experts.117.w1", "model.layers.55.block_sparse_moe.experts.118.w1", "model.layers.55.block_sparse_moe.experts.119.w1", "model.layers.55.block_sparse_moe.experts.120.w1", "model.layers.55.block_sparse_moe.experts.121.w1", "model.layers.55.block_sparse_moe.experts.122.w1", "model.layers.55.block_sparse_moe.experts.123.w1", "model.layers.55.block_sparse_moe.experts.124.w1", "model.layers.55.block_sparse_moe.experts.125.w1", "model.layers.55.block_sparse_moe.experts.126.w1", "model.layers.55.block_sparse_moe.experts.127.w1", "model.layers.55.block_sparse_moe.experts.128.w1", "model.layers.55.block_sparse_moe.experts.129.w1", "model.layers.55.block_sparse_moe.experts.130.w1", "model.layers.55.block_sparse_moe.experts.131.w1", "model.layers.55.block_sparse_moe.experts.132.w1", "model.layers.55.block_sparse_moe.experts.133.w1", "model.layers.55.block_sparse_moe.experts.134.w1", "model.layers.55.block_sparse_moe.experts.135.w1", "model.layers.55.block_sparse_moe.experts.136.w1", "model.layers.55.block_sparse_moe.experts.137.w1", "model.layers.55.block_sparse_moe.experts.138.w1", "model.layers.55.block_sparse_moe.experts.139.w1", "model.layers.55.block_sparse_moe.experts.140.w1", "model.layers.55.block_sparse_moe.experts.141.w1", "model.layers.55.block_sparse_moe.experts.142.w1", "model.layers.55.block_sparse_moe.experts.143.w1", "model.layers.55.block_sparse_moe.experts.144.w1", "model.layers.55.block_sparse_moe.experts.145.w1", "model.layers.55.block_sparse_moe.experts.146.w1", "model.layers.55.block_sparse_moe.experts.147.w1", "model.layers.55.block_sparse_moe.experts.148.w1", "model.layers.55.block_sparse_moe.experts.149.w1", "model.layers.55.block_sparse_moe.experts.150.w1", "model.layers.55.block_sparse_moe.experts.151.w1", "model.layers.55.block_sparse_moe.experts.152.w1", "model.layers.55.block_sparse_moe.experts.153.w1", "model.layers.55.block_sparse_moe.experts.154.w1", "model.layers.55.block_sparse_moe.experts.155.w1", "model.layers.55.block_sparse_moe.experts.156.w1", "model.layers.55.block_sparse_moe.experts.157.w1", "model.layers.55.block_sparse_moe.experts.158.w1", "model.layers.55.block_sparse_moe.experts.159.w1", "model.layers.55.block_sparse_moe.experts.160.w1", "model.layers.55.block_sparse_moe.experts.161.w1", "model.layers.55.block_sparse_moe.experts.162.w1", "model.layers.55.block_sparse_moe.experts.163.w1", "model.layers.55.block_sparse_moe.experts.164.w1", "model.layers.55.block_sparse_moe.experts.165.w1", "model.layers.55.block_sparse_moe.experts.166.w1", "model.layers.55.block_sparse_moe.experts.167.w1", "model.layers.55.block_sparse_moe.experts.168.w1", "model.layers.55.block_sparse_moe.experts.169.w1", "model.layers.55.block_sparse_moe.experts.170.w1", "model.layers.55.block_sparse_moe.experts.171.w1", "model.layers.55.block_sparse_moe.experts.172.w1", "model.layers.55.block_sparse_moe.experts.173.w1", "model.layers.55.block_sparse_moe.experts.174.w1", "model.layers.55.block_sparse_moe.experts.175.w1", "model.layers.55.block_sparse_moe.experts.176.w1", "model.layers.55.block_sparse_moe.experts.177.w1", "model.layers.55.block_sparse_moe.experts.178.w1", "model.layers.55.block_sparse_moe.experts.179.w1", "model.layers.55.block_sparse_moe.experts.180.w1", "model.layers.55.block_sparse_moe.experts.181.w1", "model.layers.55.block_sparse_moe.experts.182.w1", "model.layers.55.block_sparse_moe.experts.183.w1", "model.layers.55.block_sparse_moe.experts.184.w1", "model.layers.55.block_sparse_moe.experts.185.w1", "model.layers.55.block_sparse_moe.experts.186.w1", "model.layers.55.block_sparse_moe.experts.187.w1", "model.layers.55.block_sparse_moe.experts.188.w1", "model.layers.55.block_sparse_moe.experts.189.w1", "model.layers.55.block_sparse_moe.experts.190.w1", "model.layers.55.block_sparse_moe.experts.191.w1", "model.layers.55.block_sparse_moe.experts.192.w1", "model.layers.55.block_sparse_moe.experts.193.w1", "model.layers.55.block_sparse_moe.experts.194.w1", "model.layers.55.block_sparse_moe.experts.195.w1", "model.layers.55.block_sparse_moe.experts.196.w1", "model.layers.55.block_sparse_moe.experts.197.w1", "model.layers.55.block_sparse_moe.experts.198.w1", "model.layers.55.block_sparse_moe.experts.199.w1", "model.layers.55.block_sparse_moe.experts.200.w1", "model.layers.55.block_sparse_moe.experts.201.w1", "model.layers.55.block_sparse_moe.experts.202.w1", "model.layers.55.block_sparse_moe.experts.203.w1", "model.layers.55.block_sparse_moe.experts.204.w1", "model.layers.55.block_sparse_moe.experts.205.w1", "model.layers.55.block_sparse_moe.experts.206.w1", "model.layers.55.block_sparse_moe.experts.207.w1", "model.layers.55.block_sparse_moe.experts.208.w1", "model.layers.55.block_sparse_moe.experts.209.w1", "model.layers.55.block_sparse_moe.experts.210.w1", "model.layers.55.block_sparse_moe.experts.211.w1", "model.layers.55.block_sparse_moe.experts.212.w1", "model.layers.55.block_sparse_moe.experts.213.w1", "model.layers.55.block_sparse_moe.experts.214.w1", "model.layers.55.block_sparse_moe.experts.215.w1", "model.layers.55.block_sparse_moe.experts.216.w1", "model.layers.55.block_sparse_moe.experts.217.w1", "model.layers.55.block_sparse_moe.experts.218.w1", "model.layers.55.block_sparse_moe.experts.219.w1", "model.layers.55.block_sparse_moe.experts.220.w1", "model.layers.55.block_sparse_moe.experts.221.w1", "model.layers.55.block_sparse_moe.experts.222.w1", "model.layers.55.block_sparse_moe.experts.223.w1", "model.layers.55.block_sparse_moe.experts.224.w1", "model.layers.55.block_sparse_moe.experts.225.w1", "model.layers.55.block_sparse_moe.experts.226.w1", "model.layers.55.block_sparse_moe.experts.227.w1", "model.layers.55.block_sparse_moe.experts.228.w1", "model.layers.55.block_sparse_moe.experts.229.w1", "model.layers.55.block_sparse_moe.experts.230.w1", "model.layers.55.block_sparse_moe.experts.231.w1", "model.layers.55.block_sparse_moe.experts.232.w1", "model.layers.55.block_sparse_moe.experts.233.w1", "model.layers.55.block_sparse_moe.experts.234.w1", "model.layers.55.block_sparse_moe.experts.235.w1", "model.layers.55.block_sparse_moe.experts.236.w1", "model.layers.55.block_sparse_moe.experts.237.w1", "model.layers.55.block_sparse_moe.experts.238.w1", "model.layers.55.block_sparse_moe.experts.239.w1", "model.layers.55.block_sparse_moe.experts.240.w1", "model.layers.55.block_sparse_moe.experts.241.w1", "model.layers.55.block_sparse_moe.experts.242.w1", "model.layers.55.block_sparse_moe.experts.243.w1", "model.layers.55.block_sparse_moe.experts.244.w1", "model.layers.55.block_sparse_moe.experts.245.w1", "model.layers.55.block_sparse_moe.experts.246.w1", "model.layers.55.block_sparse_moe.experts.247.w1", "model.layers.55.block_sparse_moe.experts.248.w1", "model.layers.55.block_sparse_moe.experts.249.w1", "model.layers.55.block_sparse_moe.experts.250.w1", "model.layers.55.block_sparse_moe.experts.251.w1", "model.layers.55.block_sparse_moe.experts.252.w1", "model.layers.55.block_sparse_moe.experts.253.w1", "model.layers.55.block_sparse_moe.experts.254.w1", "model.layers.55.block_sparse_moe.experts.255.w1", "model.layers.55.block_sparse_moe.experts.0.w3", "model.layers.55.block_sparse_moe.experts.1.w3", "model.layers.55.block_sparse_moe.experts.2.w3", "model.layers.55.block_sparse_moe.experts.3.w3", "model.layers.55.block_sparse_moe.experts.4.w3", "model.layers.55.block_sparse_moe.experts.5.w3", "model.layers.55.block_sparse_moe.experts.6.w3", "model.layers.55.block_sparse_moe.experts.7.w3", "model.layers.55.block_sparse_moe.experts.8.w3", "model.layers.55.block_sparse_moe.experts.9.w3", "model.layers.55.block_sparse_moe.experts.10.w3", "model.layers.55.block_sparse_moe.experts.11.w3", "model.layers.55.block_sparse_moe.experts.12.w3", "model.layers.55.block_sparse_moe.experts.13.w3", "model.layers.55.block_sparse_moe.experts.14.w3", "model.layers.55.block_sparse_moe.experts.15.w3", "model.layers.55.block_sparse_moe.experts.16.w3", "model.layers.55.block_sparse_moe.experts.17.w3", "model.layers.55.block_sparse_moe.experts.18.w3", "model.layers.55.block_sparse_moe.experts.19.w3", "model.layers.55.block_sparse_moe.experts.20.w3", "model.layers.55.block_sparse_moe.experts.21.w3", "model.layers.55.block_sparse_moe.experts.22.w3", "model.layers.55.block_sparse_moe.experts.23.w3", "model.layers.55.block_sparse_moe.experts.24.w3", "model.layers.55.block_sparse_moe.experts.25.w3", "model.layers.55.block_sparse_moe.experts.26.w3", "model.layers.55.block_sparse_moe.experts.27.w3", "model.layers.55.block_sparse_moe.experts.28.w3", "model.layers.55.block_sparse_moe.experts.29.w3", "model.layers.55.block_sparse_moe.experts.30.w3", "model.layers.55.block_sparse_moe.experts.31.w3", "model.layers.55.block_sparse_moe.experts.32.w3", "model.layers.55.block_sparse_moe.experts.33.w3", "model.layers.55.block_sparse_moe.experts.34.w3", "model.layers.55.block_sparse_moe.experts.35.w3", "model.layers.55.block_sparse_moe.experts.36.w3", "model.layers.55.block_sparse_moe.experts.37.w3", "model.layers.55.block_sparse_moe.experts.38.w3", "model.layers.55.block_sparse_moe.experts.39.w3", "model.layers.55.block_sparse_moe.experts.40.w3", "model.layers.55.block_sparse_moe.experts.41.w3", "model.layers.55.block_sparse_moe.experts.42.w3", "model.layers.55.block_sparse_moe.experts.43.w3", "model.layers.55.block_sparse_moe.experts.44.w3", "model.layers.55.block_sparse_moe.experts.45.w3", "model.layers.55.block_sparse_moe.experts.46.w3", "model.layers.55.block_sparse_moe.experts.47.w3", "model.layers.55.block_sparse_moe.experts.48.w3", "model.layers.55.block_sparse_moe.experts.49.w3", "model.layers.55.block_sparse_moe.experts.50.w3", "model.layers.55.block_sparse_moe.experts.51.w3", "model.layers.55.block_sparse_moe.experts.52.w3", "model.layers.55.block_sparse_moe.experts.53.w3", "model.layers.55.block_sparse_moe.experts.54.w3", "model.layers.55.block_sparse_moe.experts.55.w3", "model.layers.55.block_sparse_moe.experts.56.w3", "model.layers.55.block_sparse_moe.experts.57.w3", "model.layers.55.block_sparse_moe.experts.58.w3", "model.layers.55.block_sparse_moe.experts.59.w3", "model.layers.55.block_sparse_moe.experts.60.w3", "model.layers.55.block_sparse_moe.experts.61.w3", "model.layers.55.block_sparse_moe.experts.62.w3", "model.layers.55.block_sparse_moe.experts.63.w3", "model.layers.55.block_sparse_moe.experts.64.w3", "model.layers.55.block_sparse_moe.experts.65.w3", "model.layers.55.block_sparse_moe.experts.66.w3", "model.layers.55.block_sparse_moe.experts.67.w3", "model.layers.55.block_sparse_moe.experts.68.w3", "model.layers.55.block_sparse_moe.experts.69.w3", "model.layers.55.block_sparse_moe.experts.70.w3", "model.layers.55.block_sparse_moe.experts.71.w3", "model.layers.55.block_sparse_moe.experts.72.w3", "model.layers.55.block_sparse_moe.experts.73.w3", "model.layers.55.block_sparse_moe.experts.74.w3", "model.layers.55.block_sparse_moe.experts.75.w3", "model.layers.55.block_sparse_moe.experts.76.w3", "model.layers.55.block_sparse_moe.experts.77.w3", "model.layers.55.block_sparse_moe.experts.78.w3", "model.layers.55.block_sparse_moe.experts.79.w3", "model.layers.55.block_sparse_moe.experts.80.w3", "model.layers.55.block_sparse_moe.experts.81.w3", "model.layers.55.block_sparse_moe.experts.82.w3", "model.layers.55.block_sparse_moe.experts.83.w3", "model.layers.55.block_sparse_moe.experts.84.w3", "model.layers.55.block_sparse_moe.experts.85.w3", "model.layers.55.block_sparse_moe.experts.86.w3", "model.layers.55.block_sparse_moe.experts.87.w3", "model.layers.55.block_sparse_moe.experts.88.w3", "model.layers.55.block_sparse_moe.experts.89.w3", "model.layers.55.block_sparse_moe.experts.90.w3", "model.layers.55.block_sparse_moe.experts.91.w3", "model.layers.55.block_sparse_moe.experts.92.w3", "model.layers.55.block_sparse_moe.experts.93.w3", "model.layers.55.block_sparse_moe.experts.94.w3", "model.layers.55.block_sparse_moe.experts.95.w3", "model.layers.55.block_sparse_moe.experts.96.w3", "model.layers.55.block_sparse_moe.experts.97.w3", "model.layers.55.block_sparse_moe.experts.98.w3", "model.layers.55.block_sparse_moe.experts.99.w3", "model.layers.55.block_sparse_moe.experts.100.w3", "model.layers.55.block_sparse_moe.experts.101.w3", "model.layers.55.block_sparse_moe.experts.102.w3", "model.layers.55.block_sparse_moe.experts.103.w3", "model.layers.55.block_sparse_moe.experts.104.w3", "model.layers.55.block_sparse_moe.experts.105.w3", "model.layers.55.block_sparse_moe.experts.106.w3", "model.layers.55.block_sparse_moe.experts.107.w3", "model.layers.55.block_sparse_moe.experts.108.w3", "model.layers.55.block_sparse_moe.experts.109.w3", "model.layers.55.block_sparse_moe.experts.110.w3", "model.layers.55.block_sparse_moe.experts.111.w3", "model.layers.55.block_sparse_moe.experts.112.w3", "model.layers.55.block_sparse_moe.experts.113.w3", "model.layers.55.block_sparse_moe.experts.114.w3", "model.layers.55.block_sparse_moe.experts.115.w3", "model.layers.55.block_sparse_moe.experts.116.w3", "model.layers.55.block_sparse_moe.experts.117.w3", "model.layers.55.block_sparse_moe.experts.118.w3", "model.layers.55.block_sparse_moe.experts.119.w3", "model.layers.55.block_sparse_moe.experts.120.w3", "model.layers.55.block_sparse_moe.experts.121.w3", "model.layers.55.block_sparse_moe.experts.122.w3", "model.layers.55.block_sparse_moe.experts.123.w3", "model.layers.55.block_sparse_moe.experts.124.w3", "model.layers.55.block_sparse_moe.experts.125.w3", "model.layers.55.block_sparse_moe.experts.126.w3", "model.layers.55.block_sparse_moe.experts.127.w3", "model.layers.55.block_sparse_moe.experts.128.w3", "model.layers.55.block_sparse_moe.experts.129.w3", "model.layers.55.block_sparse_moe.experts.130.w3", "model.layers.55.block_sparse_moe.experts.131.w3", "model.layers.55.block_sparse_moe.experts.132.w3", "model.layers.55.block_sparse_moe.experts.133.w3", "model.layers.55.block_sparse_moe.experts.134.w3", "model.layers.55.block_sparse_moe.experts.135.w3", "model.layers.55.block_sparse_moe.experts.136.w3", "model.layers.55.block_sparse_moe.experts.137.w3", "model.layers.55.block_sparse_moe.experts.138.w3", "model.layers.55.block_sparse_moe.experts.139.w3", "model.layers.55.block_sparse_moe.experts.140.w3", "model.layers.55.block_sparse_moe.experts.141.w3", "model.layers.55.block_sparse_moe.experts.142.w3", "model.layers.55.block_sparse_moe.experts.143.w3", "model.layers.55.block_sparse_moe.experts.144.w3", "model.layers.55.block_sparse_moe.experts.145.w3", "model.layers.55.block_sparse_moe.experts.146.w3", "model.layers.55.block_sparse_moe.experts.147.w3", "model.layers.55.block_sparse_moe.experts.148.w3", "model.layers.55.block_sparse_moe.experts.149.w3", "model.layers.55.block_sparse_moe.experts.150.w3", "model.layers.55.block_sparse_moe.experts.151.w3", "model.layers.55.block_sparse_moe.experts.152.w3", "model.layers.55.block_sparse_moe.experts.153.w3", "model.layers.55.block_sparse_moe.experts.154.w3", "model.layers.55.block_sparse_moe.experts.155.w3", "model.layers.55.block_sparse_moe.experts.156.w3", "model.layers.55.block_sparse_moe.experts.157.w3", "model.layers.55.block_sparse_moe.experts.158.w3", "model.layers.55.block_sparse_moe.experts.159.w3", "model.layers.55.block_sparse_moe.experts.160.w3", "model.layers.55.block_sparse_moe.experts.161.w3", "model.layers.55.block_sparse_moe.experts.162.w3", "model.layers.55.block_sparse_moe.experts.163.w3", "model.layers.55.block_sparse_moe.experts.164.w3", "model.layers.55.block_sparse_moe.experts.165.w3", "model.layers.55.block_sparse_moe.experts.166.w3", "model.layers.55.block_sparse_moe.experts.167.w3", "model.layers.55.block_sparse_moe.experts.168.w3", "model.layers.55.block_sparse_moe.experts.169.w3", "model.layers.55.block_sparse_moe.experts.170.w3", "model.layers.55.block_sparse_moe.experts.171.w3", "model.layers.55.block_sparse_moe.experts.172.w3", "model.layers.55.block_sparse_moe.experts.173.w3", "model.layers.55.block_sparse_moe.experts.174.w3", "model.layers.55.block_sparse_moe.experts.175.w3", "model.layers.55.block_sparse_moe.experts.176.w3", "model.layers.55.block_sparse_moe.experts.177.w3", "model.layers.55.block_sparse_moe.experts.178.w3", "model.layers.55.block_sparse_moe.experts.179.w3", "model.layers.55.block_sparse_moe.experts.180.w3", "model.layers.55.block_sparse_moe.experts.181.w3", "model.layers.55.block_sparse_moe.experts.182.w3", "model.layers.55.block_sparse_moe.experts.183.w3", "model.layers.55.block_sparse_moe.experts.184.w3", "model.layers.55.block_sparse_moe.experts.185.w3", "model.layers.55.block_sparse_moe.experts.186.w3", "model.layers.55.block_sparse_moe.experts.187.w3", "model.layers.55.block_sparse_moe.experts.188.w3", "model.layers.55.block_sparse_moe.experts.189.w3", "model.layers.55.block_sparse_moe.experts.190.w3", "model.layers.55.block_sparse_moe.experts.191.w3", "model.layers.55.block_sparse_moe.experts.192.w3", "model.layers.55.block_sparse_moe.experts.193.w3", "model.layers.55.block_sparse_moe.experts.194.w3", "model.layers.55.block_sparse_moe.experts.195.w3", "model.layers.55.block_sparse_moe.experts.196.w3", "model.layers.55.block_sparse_moe.experts.197.w3", "model.layers.55.block_sparse_moe.experts.198.w3", "model.layers.55.block_sparse_moe.experts.199.w3", "model.layers.55.block_sparse_moe.experts.200.w3", "model.layers.55.block_sparse_moe.experts.201.w3", "model.layers.55.block_sparse_moe.experts.202.w3", "model.layers.55.block_sparse_moe.experts.203.w3", "model.layers.55.block_sparse_moe.experts.204.w3", "model.layers.55.block_sparse_moe.experts.205.w3", "model.layers.55.block_sparse_moe.experts.206.w3", "model.layers.55.block_sparse_moe.experts.207.w3", "model.layers.55.block_sparse_moe.experts.208.w3", "model.layers.55.block_sparse_moe.experts.209.w3", "model.layers.55.block_sparse_moe.experts.210.w3", "model.layers.55.block_sparse_moe.experts.211.w3", "model.layers.55.block_sparse_moe.experts.212.w3", "model.layers.55.block_sparse_moe.experts.213.w3", "model.layers.55.block_sparse_moe.experts.214.w3", "model.layers.55.block_sparse_moe.experts.215.w3", "model.layers.55.block_sparse_moe.experts.216.w3", "model.layers.55.block_sparse_moe.experts.217.w3", "model.layers.55.block_sparse_moe.experts.218.w3", "model.layers.55.block_sparse_moe.experts.219.w3", "model.layers.55.block_sparse_moe.experts.220.w3", "model.layers.55.block_sparse_moe.experts.221.w3", "model.layers.55.block_sparse_moe.experts.222.w3", "model.layers.55.block_sparse_moe.experts.223.w3", "model.layers.55.block_sparse_moe.experts.224.w3", "model.layers.55.block_sparse_moe.experts.225.w3", "model.layers.55.block_sparse_moe.experts.226.w3", "model.layers.55.block_sparse_moe.experts.227.w3", "model.layers.55.block_sparse_moe.experts.228.w3", "model.layers.55.block_sparse_moe.experts.229.w3", "model.layers.55.block_sparse_moe.experts.230.w3", "model.layers.55.block_sparse_moe.experts.231.w3", "model.layers.55.block_sparse_moe.experts.232.w3", "model.layers.55.block_sparse_moe.experts.233.w3", "model.layers.55.block_sparse_moe.experts.234.w3", "model.layers.55.block_sparse_moe.experts.235.w3", "model.layers.55.block_sparse_moe.experts.236.w3", "model.layers.55.block_sparse_moe.experts.237.w3", "model.layers.55.block_sparse_moe.experts.238.w3", "model.layers.55.block_sparse_moe.experts.239.w3", "model.layers.55.block_sparse_moe.experts.240.w3", "model.layers.55.block_sparse_moe.experts.241.w3", "model.layers.55.block_sparse_moe.experts.242.w3", "model.layers.55.block_sparse_moe.experts.243.w3", "model.layers.55.block_sparse_moe.experts.244.w3", "model.layers.55.block_sparse_moe.experts.245.w3", "model.layers.55.block_sparse_moe.experts.246.w3", "model.layers.55.block_sparse_moe.experts.247.w3", "model.layers.55.block_sparse_moe.experts.248.w3", "model.layers.55.block_sparse_moe.experts.249.w3", "model.layers.55.block_sparse_moe.experts.250.w3", "model.layers.55.block_sparse_moe.experts.251.w3", "model.layers.55.block_sparse_moe.experts.252.w3", "model.layers.55.block_sparse_moe.experts.253.w3", "model.layers.55.block_sparse_moe.experts.254.w3", "model.layers.55.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 5.446486175059162e-05, "dbits": 2415919104 } ] }, { "idx": 279, "layers": [ "model.layers.55.block_sparse_moe.experts.0.w2", "model.layers.55.block_sparse_moe.experts.1.w2", "model.layers.55.block_sparse_moe.experts.2.w2", "model.layers.55.block_sparse_moe.experts.3.w2", "model.layers.55.block_sparse_moe.experts.4.w2", "model.layers.55.block_sparse_moe.experts.5.w2", "model.layers.55.block_sparse_moe.experts.6.w2", "model.layers.55.block_sparse_moe.experts.7.w2", "model.layers.55.block_sparse_moe.experts.8.w2", "model.layers.55.block_sparse_moe.experts.9.w2", "model.layers.55.block_sparse_moe.experts.10.w2", "model.layers.55.block_sparse_moe.experts.11.w2", "model.layers.55.block_sparse_moe.experts.12.w2", "model.layers.55.block_sparse_moe.experts.13.w2", "model.layers.55.block_sparse_moe.experts.14.w2", "model.layers.55.block_sparse_moe.experts.15.w2", "model.layers.55.block_sparse_moe.experts.16.w2", "model.layers.55.block_sparse_moe.experts.17.w2", "model.layers.55.block_sparse_moe.experts.18.w2", "model.layers.55.block_sparse_moe.experts.19.w2", "model.layers.55.block_sparse_moe.experts.20.w2", "model.layers.55.block_sparse_moe.experts.21.w2", "model.layers.55.block_sparse_moe.experts.22.w2", "model.layers.55.block_sparse_moe.experts.23.w2", "model.layers.55.block_sparse_moe.experts.24.w2", "model.layers.55.block_sparse_moe.experts.25.w2", "model.layers.55.block_sparse_moe.experts.26.w2", "model.layers.55.block_sparse_moe.experts.27.w2", "model.layers.55.block_sparse_moe.experts.28.w2", "model.layers.55.block_sparse_moe.experts.29.w2", "model.layers.55.block_sparse_moe.experts.30.w2", "model.layers.55.block_sparse_moe.experts.31.w2", "model.layers.55.block_sparse_moe.experts.32.w2", "model.layers.55.block_sparse_moe.experts.33.w2", "model.layers.55.block_sparse_moe.experts.34.w2", "model.layers.55.block_sparse_moe.experts.35.w2", "model.layers.55.block_sparse_moe.experts.36.w2", "model.layers.55.block_sparse_moe.experts.37.w2", "model.layers.55.block_sparse_moe.experts.38.w2", "model.layers.55.block_sparse_moe.experts.39.w2", "model.layers.55.block_sparse_moe.experts.40.w2", "model.layers.55.block_sparse_moe.experts.41.w2", "model.layers.55.block_sparse_moe.experts.42.w2", "model.layers.55.block_sparse_moe.experts.43.w2", "model.layers.55.block_sparse_moe.experts.44.w2", "model.layers.55.block_sparse_moe.experts.45.w2", "model.layers.55.block_sparse_moe.experts.46.w2", "model.layers.55.block_sparse_moe.experts.47.w2", "model.layers.55.block_sparse_moe.experts.48.w2", "model.layers.55.block_sparse_moe.experts.49.w2", "model.layers.55.block_sparse_moe.experts.50.w2", "model.layers.55.block_sparse_moe.experts.51.w2", "model.layers.55.block_sparse_moe.experts.52.w2", "model.layers.55.block_sparse_moe.experts.53.w2", "model.layers.55.block_sparse_moe.experts.54.w2", "model.layers.55.block_sparse_moe.experts.55.w2", "model.layers.55.block_sparse_moe.experts.56.w2", "model.layers.55.block_sparse_moe.experts.57.w2", "model.layers.55.block_sparse_moe.experts.58.w2", "model.layers.55.block_sparse_moe.experts.59.w2", "model.layers.55.block_sparse_moe.experts.60.w2", "model.layers.55.block_sparse_moe.experts.61.w2", "model.layers.55.block_sparse_moe.experts.62.w2", "model.layers.55.block_sparse_moe.experts.63.w2", "model.layers.55.block_sparse_moe.experts.64.w2", "model.layers.55.block_sparse_moe.experts.65.w2", "model.layers.55.block_sparse_moe.experts.66.w2", "model.layers.55.block_sparse_moe.experts.67.w2", "model.layers.55.block_sparse_moe.experts.68.w2", "model.layers.55.block_sparse_moe.experts.69.w2", "model.layers.55.block_sparse_moe.experts.70.w2", "model.layers.55.block_sparse_moe.experts.71.w2", "model.layers.55.block_sparse_moe.experts.72.w2", "model.layers.55.block_sparse_moe.experts.73.w2", "model.layers.55.block_sparse_moe.experts.74.w2", "model.layers.55.block_sparse_moe.experts.75.w2", "model.layers.55.block_sparse_moe.experts.76.w2", "model.layers.55.block_sparse_moe.experts.77.w2", "model.layers.55.block_sparse_moe.experts.78.w2", "model.layers.55.block_sparse_moe.experts.79.w2", "model.layers.55.block_sparse_moe.experts.80.w2", "model.layers.55.block_sparse_moe.experts.81.w2", "model.layers.55.block_sparse_moe.experts.82.w2", "model.layers.55.block_sparse_moe.experts.83.w2", "model.layers.55.block_sparse_moe.experts.84.w2", "model.layers.55.block_sparse_moe.experts.85.w2", "model.layers.55.block_sparse_moe.experts.86.w2", "model.layers.55.block_sparse_moe.experts.87.w2", "model.layers.55.block_sparse_moe.experts.88.w2", "model.layers.55.block_sparse_moe.experts.89.w2", "model.layers.55.block_sparse_moe.experts.90.w2", "model.layers.55.block_sparse_moe.experts.91.w2", "model.layers.55.block_sparse_moe.experts.92.w2", "model.layers.55.block_sparse_moe.experts.93.w2", "model.layers.55.block_sparse_moe.experts.94.w2", "model.layers.55.block_sparse_moe.experts.95.w2", "model.layers.55.block_sparse_moe.experts.96.w2", "model.layers.55.block_sparse_moe.experts.97.w2", "model.layers.55.block_sparse_moe.experts.98.w2", "model.layers.55.block_sparse_moe.experts.99.w2", "model.layers.55.block_sparse_moe.experts.100.w2", "model.layers.55.block_sparse_moe.experts.101.w2", "model.layers.55.block_sparse_moe.experts.102.w2", "model.layers.55.block_sparse_moe.experts.103.w2", "model.layers.55.block_sparse_moe.experts.104.w2", "model.layers.55.block_sparse_moe.experts.105.w2", "model.layers.55.block_sparse_moe.experts.106.w2", "model.layers.55.block_sparse_moe.experts.107.w2", "model.layers.55.block_sparse_moe.experts.108.w2", "model.layers.55.block_sparse_moe.experts.109.w2", "model.layers.55.block_sparse_moe.experts.110.w2", "model.layers.55.block_sparse_moe.experts.111.w2", "model.layers.55.block_sparse_moe.experts.112.w2", "model.layers.55.block_sparse_moe.experts.113.w2", "model.layers.55.block_sparse_moe.experts.114.w2", "model.layers.55.block_sparse_moe.experts.115.w2", "model.layers.55.block_sparse_moe.experts.116.w2", "model.layers.55.block_sparse_moe.experts.117.w2", "model.layers.55.block_sparse_moe.experts.118.w2", "model.layers.55.block_sparse_moe.experts.119.w2", "model.layers.55.block_sparse_moe.experts.120.w2", "model.layers.55.block_sparse_moe.experts.121.w2", "model.layers.55.block_sparse_moe.experts.122.w2", "model.layers.55.block_sparse_moe.experts.123.w2", "model.layers.55.block_sparse_moe.experts.124.w2", "model.layers.55.block_sparse_moe.experts.125.w2", "model.layers.55.block_sparse_moe.experts.126.w2", "model.layers.55.block_sparse_moe.experts.127.w2", "model.layers.55.block_sparse_moe.experts.128.w2", "model.layers.55.block_sparse_moe.experts.129.w2", "model.layers.55.block_sparse_moe.experts.130.w2", "model.layers.55.block_sparse_moe.experts.131.w2", "model.layers.55.block_sparse_moe.experts.132.w2", "model.layers.55.block_sparse_moe.experts.133.w2", "model.layers.55.block_sparse_moe.experts.134.w2", "model.layers.55.block_sparse_moe.experts.135.w2", "model.layers.55.block_sparse_moe.experts.136.w2", "model.layers.55.block_sparse_moe.experts.137.w2", "model.layers.55.block_sparse_moe.experts.138.w2", "model.layers.55.block_sparse_moe.experts.139.w2", "model.layers.55.block_sparse_moe.experts.140.w2", "model.layers.55.block_sparse_moe.experts.141.w2", "model.layers.55.block_sparse_moe.experts.142.w2", "model.layers.55.block_sparse_moe.experts.143.w2", "model.layers.55.block_sparse_moe.experts.144.w2", "model.layers.55.block_sparse_moe.experts.145.w2", "model.layers.55.block_sparse_moe.experts.146.w2", "model.layers.55.block_sparse_moe.experts.147.w2", "model.layers.55.block_sparse_moe.experts.148.w2", "model.layers.55.block_sparse_moe.experts.149.w2", "model.layers.55.block_sparse_moe.experts.150.w2", "model.layers.55.block_sparse_moe.experts.151.w2", "model.layers.55.block_sparse_moe.experts.152.w2", "model.layers.55.block_sparse_moe.experts.153.w2", "model.layers.55.block_sparse_moe.experts.154.w2", "model.layers.55.block_sparse_moe.experts.155.w2", "model.layers.55.block_sparse_moe.experts.156.w2", "model.layers.55.block_sparse_moe.experts.157.w2", "model.layers.55.block_sparse_moe.experts.158.w2", "model.layers.55.block_sparse_moe.experts.159.w2", "model.layers.55.block_sparse_moe.experts.160.w2", "model.layers.55.block_sparse_moe.experts.161.w2", "model.layers.55.block_sparse_moe.experts.162.w2", "model.layers.55.block_sparse_moe.experts.163.w2", "model.layers.55.block_sparse_moe.experts.164.w2", "model.layers.55.block_sparse_moe.experts.165.w2", "model.layers.55.block_sparse_moe.experts.166.w2", "model.layers.55.block_sparse_moe.experts.167.w2", "model.layers.55.block_sparse_moe.experts.168.w2", "model.layers.55.block_sparse_moe.experts.169.w2", "model.layers.55.block_sparse_moe.experts.170.w2", "model.layers.55.block_sparse_moe.experts.171.w2", "model.layers.55.block_sparse_moe.experts.172.w2", "model.layers.55.block_sparse_moe.experts.173.w2", "model.layers.55.block_sparse_moe.experts.174.w2", "model.layers.55.block_sparse_moe.experts.175.w2", "model.layers.55.block_sparse_moe.experts.176.w2", "model.layers.55.block_sparse_moe.experts.177.w2", "model.layers.55.block_sparse_moe.experts.178.w2", "model.layers.55.block_sparse_moe.experts.179.w2", "model.layers.55.block_sparse_moe.experts.180.w2", "model.layers.55.block_sparse_moe.experts.181.w2", "model.layers.55.block_sparse_moe.experts.182.w2", "model.layers.55.block_sparse_moe.experts.183.w2", "model.layers.55.block_sparse_moe.experts.184.w2", "model.layers.55.block_sparse_moe.experts.185.w2", "model.layers.55.block_sparse_moe.experts.186.w2", "model.layers.55.block_sparse_moe.experts.187.w2", "model.layers.55.block_sparse_moe.experts.188.w2", "model.layers.55.block_sparse_moe.experts.189.w2", "model.layers.55.block_sparse_moe.experts.190.w2", "model.layers.55.block_sparse_moe.experts.191.w2", "model.layers.55.block_sparse_moe.experts.192.w2", "model.layers.55.block_sparse_moe.experts.193.w2", "model.layers.55.block_sparse_moe.experts.194.w2", "model.layers.55.block_sparse_moe.experts.195.w2", "model.layers.55.block_sparse_moe.experts.196.w2", "model.layers.55.block_sparse_moe.experts.197.w2", "model.layers.55.block_sparse_moe.experts.198.w2", "model.layers.55.block_sparse_moe.experts.199.w2", "model.layers.55.block_sparse_moe.experts.200.w2", "model.layers.55.block_sparse_moe.experts.201.w2", "model.layers.55.block_sparse_moe.experts.202.w2", "model.layers.55.block_sparse_moe.experts.203.w2", "model.layers.55.block_sparse_moe.experts.204.w2", "model.layers.55.block_sparse_moe.experts.205.w2", "model.layers.55.block_sparse_moe.experts.206.w2", "model.layers.55.block_sparse_moe.experts.207.w2", "model.layers.55.block_sparse_moe.experts.208.w2", "model.layers.55.block_sparse_moe.experts.209.w2", "model.layers.55.block_sparse_moe.experts.210.w2", "model.layers.55.block_sparse_moe.experts.211.w2", "model.layers.55.block_sparse_moe.experts.212.w2", "model.layers.55.block_sparse_moe.experts.213.w2", "model.layers.55.block_sparse_moe.experts.214.w2", "model.layers.55.block_sparse_moe.experts.215.w2", "model.layers.55.block_sparse_moe.experts.216.w2", "model.layers.55.block_sparse_moe.experts.217.w2", "model.layers.55.block_sparse_moe.experts.218.w2", "model.layers.55.block_sparse_moe.experts.219.w2", "model.layers.55.block_sparse_moe.experts.220.w2", "model.layers.55.block_sparse_moe.experts.221.w2", "model.layers.55.block_sparse_moe.experts.222.w2", "model.layers.55.block_sparse_moe.experts.223.w2", "model.layers.55.block_sparse_moe.experts.224.w2", "model.layers.55.block_sparse_moe.experts.225.w2", "model.layers.55.block_sparse_moe.experts.226.w2", "model.layers.55.block_sparse_moe.experts.227.w2", "model.layers.55.block_sparse_moe.experts.228.w2", "model.layers.55.block_sparse_moe.experts.229.w2", "model.layers.55.block_sparse_moe.experts.230.w2", "model.layers.55.block_sparse_moe.experts.231.w2", "model.layers.55.block_sparse_moe.experts.232.w2", "model.layers.55.block_sparse_moe.experts.233.w2", "model.layers.55.block_sparse_moe.experts.234.w2", "model.layers.55.block_sparse_moe.experts.235.w2", "model.layers.55.block_sparse_moe.experts.236.w2", "model.layers.55.block_sparse_moe.experts.237.w2", "model.layers.55.block_sparse_moe.experts.238.w2", "model.layers.55.block_sparse_moe.experts.239.w2", "model.layers.55.block_sparse_moe.experts.240.w2", "model.layers.55.block_sparse_moe.experts.241.w2", "model.layers.55.block_sparse_moe.experts.242.w2", "model.layers.55.block_sparse_moe.experts.243.w2", "model.layers.55.block_sparse_moe.experts.244.w2", "model.layers.55.block_sparse_moe.experts.245.w2", "model.layers.55.block_sparse_moe.experts.246.w2", "model.layers.55.block_sparse_moe.experts.247.w2", "model.layers.55.block_sparse_moe.experts.248.w2", "model.layers.55.block_sparse_moe.experts.249.w2", "model.layers.55.block_sparse_moe.experts.250.w2", "model.layers.55.block_sparse_moe.experts.251.w2", "model.layers.55.block_sparse_moe.experts.252.w2", "model.layers.55.block_sparse_moe.experts.253.w2", "model.layers.55.block_sparse_moe.experts.254.w2", "model.layers.55.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -3.1756237149238586e-05, "dbits": 1207959552 } ] }, { "idx": 280, "layers": [ "model.layers.56.self_attn.q_proj" ], "candidates": [ { "dkld": -0.00015759244561197594, "dbits": 18874368 } ] }, { "idx": 281, "layers": [ "model.layers.56.self_attn.k_proj", "model.layers.56.self_attn.v_proj" ], "candidates": [ { "dkld": -0.002001548185944557, "dbits": 6291456 } ] }, { "idx": 282, "layers": [ "model.layers.56.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0007587078958749549, "dbits": 18874368 } ] }, { "idx": 283, "layers": [ "model.layers.56.block_sparse_moe.experts.0.w1", "model.layers.56.block_sparse_moe.experts.1.w1", "model.layers.56.block_sparse_moe.experts.2.w1", "model.layers.56.block_sparse_moe.experts.3.w1", "model.layers.56.block_sparse_moe.experts.4.w1", "model.layers.56.block_sparse_moe.experts.5.w1", "model.layers.56.block_sparse_moe.experts.6.w1", "model.layers.56.block_sparse_moe.experts.7.w1", "model.layers.56.block_sparse_moe.experts.8.w1", "model.layers.56.block_sparse_moe.experts.9.w1", "model.layers.56.block_sparse_moe.experts.10.w1", "model.layers.56.block_sparse_moe.experts.11.w1", "model.layers.56.block_sparse_moe.experts.12.w1", "model.layers.56.block_sparse_moe.experts.13.w1", "model.layers.56.block_sparse_moe.experts.14.w1", "model.layers.56.block_sparse_moe.experts.15.w1", "model.layers.56.block_sparse_moe.experts.16.w1", "model.layers.56.block_sparse_moe.experts.17.w1", "model.layers.56.block_sparse_moe.experts.18.w1", "model.layers.56.block_sparse_moe.experts.19.w1", "model.layers.56.block_sparse_moe.experts.20.w1", "model.layers.56.block_sparse_moe.experts.21.w1", "model.layers.56.block_sparse_moe.experts.22.w1", "model.layers.56.block_sparse_moe.experts.23.w1", "model.layers.56.block_sparse_moe.experts.24.w1", "model.layers.56.block_sparse_moe.experts.25.w1", "model.layers.56.block_sparse_moe.experts.26.w1", "model.layers.56.block_sparse_moe.experts.27.w1", "model.layers.56.block_sparse_moe.experts.28.w1", "model.layers.56.block_sparse_moe.experts.29.w1", "model.layers.56.block_sparse_moe.experts.30.w1", "model.layers.56.block_sparse_moe.experts.31.w1", "model.layers.56.block_sparse_moe.experts.32.w1", "model.layers.56.block_sparse_moe.experts.33.w1", "model.layers.56.block_sparse_moe.experts.34.w1", "model.layers.56.block_sparse_moe.experts.35.w1", "model.layers.56.block_sparse_moe.experts.36.w1", "model.layers.56.block_sparse_moe.experts.37.w1", "model.layers.56.block_sparse_moe.experts.38.w1", "model.layers.56.block_sparse_moe.experts.39.w1", "model.layers.56.block_sparse_moe.experts.40.w1", "model.layers.56.block_sparse_moe.experts.41.w1", "model.layers.56.block_sparse_moe.experts.42.w1", "model.layers.56.block_sparse_moe.experts.43.w1", "model.layers.56.block_sparse_moe.experts.44.w1", "model.layers.56.block_sparse_moe.experts.45.w1", "model.layers.56.block_sparse_moe.experts.46.w1", "model.layers.56.block_sparse_moe.experts.47.w1", "model.layers.56.block_sparse_moe.experts.48.w1", "model.layers.56.block_sparse_moe.experts.49.w1", "model.layers.56.block_sparse_moe.experts.50.w1", "model.layers.56.block_sparse_moe.experts.51.w1", "model.layers.56.block_sparse_moe.experts.52.w1", "model.layers.56.block_sparse_moe.experts.53.w1", "model.layers.56.block_sparse_moe.experts.54.w1", "model.layers.56.block_sparse_moe.experts.55.w1", "model.layers.56.block_sparse_moe.experts.56.w1", "model.layers.56.block_sparse_moe.experts.57.w1", "model.layers.56.block_sparse_moe.experts.58.w1", "model.layers.56.block_sparse_moe.experts.59.w1", "model.layers.56.block_sparse_moe.experts.60.w1", "model.layers.56.block_sparse_moe.experts.61.w1", "model.layers.56.block_sparse_moe.experts.62.w1", "model.layers.56.block_sparse_moe.experts.63.w1", "model.layers.56.block_sparse_moe.experts.64.w1", "model.layers.56.block_sparse_moe.experts.65.w1", "model.layers.56.block_sparse_moe.experts.66.w1", "model.layers.56.block_sparse_moe.experts.67.w1", "model.layers.56.block_sparse_moe.experts.68.w1", "model.layers.56.block_sparse_moe.experts.69.w1", "model.layers.56.block_sparse_moe.experts.70.w1", "model.layers.56.block_sparse_moe.experts.71.w1", "model.layers.56.block_sparse_moe.experts.72.w1", "model.layers.56.block_sparse_moe.experts.73.w1", "model.layers.56.block_sparse_moe.experts.74.w1", "model.layers.56.block_sparse_moe.experts.75.w1", "model.layers.56.block_sparse_moe.experts.76.w1", "model.layers.56.block_sparse_moe.experts.77.w1", "model.layers.56.block_sparse_moe.experts.78.w1", "model.layers.56.block_sparse_moe.experts.79.w1", "model.layers.56.block_sparse_moe.experts.80.w1", "model.layers.56.block_sparse_moe.experts.81.w1", "model.layers.56.block_sparse_moe.experts.82.w1", "model.layers.56.block_sparse_moe.experts.83.w1", "model.layers.56.block_sparse_moe.experts.84.w1", "model.layers.56.block_sparse_moe.experts.85.w1", "model.layers.56.block_sparse_moe.experts.86.w1", "model.layers.56.block_sparse_moe.experts.87.w1", "model.layers.56.block_sparse_moe.experts.88.w1", "model.layers.56.block_sparse_moe.experts.89.w1", "model.layers.56.block_sparse_moe.experts.90.w1", "model.layers.56.block_sparse_moe.experts.91.w1", "model.layers.56.block_sparse_moe.experts.92.w1", "model.layers.56.block_sparse_moe.experts.93.w1", "model.layers.56.block_sparse_moe.experts.94.w1", "model.layers.56.block_sparse_moe.experts.95.w1", "model.layers.56.block_sparse_moe.experts.96.w1", "model.layers.56.block_sparse_moe.experts.97.w1", "model.layers.56.block_sparse_moe.experts.98.w1", "model.layers.56.block_sparse_moe.experts.99.w1", "model.layers.56.block_sparse_moe.experts.100.w1", "model.layers.56.block_sparse_moe.experts.101.w1", "model.layers.56.block_sparse_moe.experts.102.w1", "model.layers.56.block_sparse_moe.experts.103.w1", "model.layers.56.block_sparse_moe.experts.104.w1", "model.layers.56.block_sparse_moe.experts.105.w1", "model.layers.56.block_sparse_moe.experts.106.w1", "model.layers.56.block_sparse_moe.experts.107.w1", "model.layers.56.block_sparse_moe.experts.108.w1", "model.layers.56.block_sparse_moe.experts.109.w1", "model.layers.56.block_sparse_moe.experts.110.w1", "model.layers.56.block_sparse_moe.experts.111.w1", "model.layers.56.block_sparse_moe.experts.112.w1", "model.layers.56.block_sparse_moe.experts.113.w1", "model.layers.56.block_sparse_moe.experts.114.w1", "model.layers.56.block_sparse_moe.experts.115.w1", "model.layers.56.block_sparse_moe.experts.116.w1", "model.layers.56.block_sparse_moe.experts.117.w1", "model.layers.56.block_sparse_moe.experts.118.w1", "model.layers.56.block_sparse_moe.experts.119.w1", "model.layers.56.block_sparse_moe.experts.120.w1", "model.layers.56.block_sparse_moe.experts.121.w1", "model.layers.56.block_sparse_moe.experts.122.w1", "model.layers.56.block_sparse_moe.experts.123.w1", "model.layers.56.block_sparse_moe.experts.124.w1", "model.layers.56.block_sparse_moe.experts.125.w1", "model.layers.56.block_sparse_moe.experts.126.w1", "model.layers.56.block_sparse_moe.experts.127.w1", "model.layers.56.block_sparse_moe.experts.128.w1", "model.layers.56.block_sparse_moe.experts.129.w1", "model.layers.56.block_sparse_moe.experts.130.w1", "model.layers.56.block_sparse_moe.experts.131.w1", "model.layers.56.block_sparse_moe.experts.132.w1", "model.layers.56.block_sparse_moe.experts.133.w1", "model.layers.56.block_sparse_moe.experts.134.w1", "model.layers.56.block_sparse_moe.experts.135.w1", "model.layers.56.block_sparse_moe.experts.136.w1", "model.layers.56.block_sparse_moe.experts.137.w1", "model.layers.56.block_sparse_moe.experts.138.w1", "model.layers.56.block_sparse_moe.experts.139.w1", "model.layers.56.block_sparse_moe.experts.140.w1", "model.layers.56.block_sparse_moe.experts.141.w1", "model.layers.56.block_sparse_moe.experts.142.w1", "model.layers.56.block_sparse_moe.experts.143.w1", "model.layers.56.block_sparse_moe.experts.144.w1", "model.layers.56.block_sparse_moe.experts.145.w1", "model.layers.56.block_sparse_moe.experts.146.w1", "model.layers.56.block_sparse_moe.experts.147.w1", "model.layers.56.block_sparse_moe.experts.148.w1", "model.layers.56.block_sparse_moe.experts.149.w1", "model.layers.56.block_sparse_moe.experts.150.w1", "model.layers.56.block_sparse_moe.experts.151.w1", "model.layers.56.block_sparse_moe.experts.152.w1", "model.layers.56.block_sparse_moe.experts.153.w1", "model.layers.56.block_sparse_moe.experts.154.w1", "model.layers.56.block_sparse_moe.experts.155.w1", "model.layers.56.block_sparse_moe.experts.156.w1", "model.layers.56.block_sparse_moe.experts.157.w1", "model.layers.56.block_sparse_moe.experts.158.w1", "model.layers.56.block_sparse_moe.experts.159.w1", "model.layers.56.block_sparse_moe.experts.160.w1", "model.layers.56.block_sparse_moe.experts.161.w1", "model.layers.56.block_sparse_moe.experts.162.w1", "model.layers.56.block_sparse_moe.experts.163.w1", "model.layers.56.block_sparse_moe.experts.164.w1", "model.layers.56.block_sparse_moe.experts.165.w1", "model.layers.56.block_sparse_moe.experts.166.w1", "model.layers.56.block_sparse_moe.experts.167.w1", "model.layers.56.block_sparse_moe.experts.168.w1", "model.layers.56.block_sparse_moe.experts.169.w1", "model.layers.56.block_sparse_moe.experts.170.w1", "model.layers.56.block_sparse_moe.experts.171.w1", "model.layers.56.block_sparse_moe.experts.172.w1", "model.layers.56.block_sparse_moe.experts.173.w1", "model.layers.56.block_sparse_moe.experts.174.w1", "model.layers.56.block_sparse_moe.experts.175.w1", "model.layers.56.block_sparse_moe.experts.176.w1", "model.layers.56.block_sparse_moe.experts.177.w1", "model.layers.56.block_sparse_moe.experts.178.w1", "model.layers.56.block_sparse_moe.experts.179.w1", "model.layers.56.block_sparse_moe.experts.180.w1", "model.layers.56.block_sparse_moe.experts.181.w1", "model.layers.56.block_sparse_moe.experts.182.w1", "model.layers.56.block_sparse_moe.experts.183.w1", "model.layers.56.block_sparse_moe.experts.184.w1", "model.layers.56.block_sparse_moe.experts.185.w1", "model.layers.56.block_sparse_moe.experts.186.w1", "model.layers.56.block_sparse_moe.experts.187.w1", "model.layers.56.block_sparse_moe.experts.188.w1", "model.layers.56.block_sparse_moe.experts.189.w1", "model.layers.56.block_sparse_moe.experts.190.w1", "model.layers.56.block_sparse_moe.experts.191.w1", "model.layers.56.block_sparse_moe.experts.192.w1", "model.layers.56.block_sparse_moe.experts.193.w1", "model.layers.56.block_sparse_moe.experts.194.w1", "model.layers.56.block_sparse_moe.experts.195.w1", "model.layers.56.block_sparse_moe.experts.196.w1", "model.layers.56.block_sparse_moe.experts.197.w1", "model.layers.56.block_sparse_moe.experts.198.w1", "model.layers.56.block_sparse_moe.experts.199.w1", "model.layers.56.block_sparse_moe.experts.200.w1", "model.layers.56.block_sparse_moe.experts.201.w1", "model.layers.56.block_sparse_moe.experts.202.w1", "model.layers.56.block_sparse_moe.experts.203.w1", "model.layers.56.block_sparse_moe.experts.204.w1", "model.layers.56.block_sparse_moe.experts.205.w1", "model.layers.56.block_sparse_moe.experts.206.w1", "model.layers.56.block_sparse_moe.experts.207.w1", "model.layers.56.block_sparse_moe.experts.208.w1", "model.layers.56.block_sparse_moe.experts.209.w1", "model.layers.56.block_sparse_moe.experts.210.w1", "model.layers.56.block_sparse_moe.experts.211.w1", "model.layers.56.block_sparse_moe.experts.212.w1", "model.layers.56.block_sparse_moe.experts.213.w1", "model.layers.56.block_sparse_moe.experts.214.w1", "model.layers.56.block_sparse_moe.experts.215.w1", "model.layers.56.block_sparse_moe.experts.216.w1", "model.layers.56.block_sparse_moe.experts.217.w1", "model.layers.56.block_sparse_moe.experts.218.w1", "model.layers.56.block_sparse_moe.experts.219.w1", "model.layers.56.block_sparse_moe.experts.220.w1", "model.layers.56.block_sparse_moe.experts.221.w1", "model.layers.56.block_sparse_moe.experts.222.w1", "model.layers.56.block_sparse_moe.experts.223.w1", "model.layers.56.block_sparse_moe.experts.224.w1", "model.layers.56.block_sparse_moe.experts.225.w1", "model.layers.56.block_sparse_moe.experts.226.w1", "model.layers.56.block_sparse_moe.experts.227.w1", "model.layers.56.block_sparse_moe.experts.228.w1", "model.layers.56.block_sparse_moe.experts.229.w1", "model.layers.56.block_sparse_moe.experts.230.w1", "model.layers.56.block_sparse_moe.experts.231.w1", "model.layers.56.block_sparse_moe.experts.232.w1", "model.layers.56.block_sparse_moe.experts.233.w1", "model.layers.56.block_sparse_moe.experts.234.w1", "model.layers.56.block_sparse_moe.experts.235.w1", "model.layers.56.block_sparse_moe.experts.236.w1", "model.layers.56.block_sparse_moe.experts.237.w1", "model.layers.56.block_sparse_moe.experts.238.w1", "model.layers.56.block_sparse_moe.experts.239.w1", "model.layers.56.block_sparse_moe.experts.240.w1", "model.layers.56.block_sparse_moe.experts.241.w1", "model.layers.56.block_sparse_moe.experts.242.w1", "model.layers.56.block_sparse_moe.experts.243.w1", "model.layers.56.block_sparse_moe.experts.244.w1", "model.layers.56.block_sparse_moe.experts.245.w1", "model.layers.56.block_sparse_moe.experts.246.w1", "model.layers.56.block_sparse_moe.experts.247.w1", "model.layers.56.block_sparse_moe.experts.248.w1", "model.layers.56.block_sparse_moe.experts.249.w1", "model.layers.56.block_sparse_moe.experts.250.w1", "model.layers.56.block_sparse_moe.experts.251.w1", "model.layers.56.block_sparse_moe.experts.252.w1", "model.layers.56.block_sparse_moe.experts.253.w1", "model.layers.56.block_sparse_moe.experts.254.w1", "model.layers.56.block_sparse_moe.experts.255.w1", "model.layers.56.block_sparse_moe.experts.0.w3", "model.layers.56.block_sparse_moe.experts.1.w3", "model.layers.56.block_sparse_moe.experts.2.w3", "model.layers.56.block_sparse_moe.experts.3.w3", "model.layers.56.block_sparse_moe.experts.4.w3", "model.layers.56.block_sparse_moe.experts.5.w3", "model.layers.56.block_sparse_moe.experts.6.w3", "model.layers.56.block_sparse_moe.experts.7.w3", "model.layers.56.block_sparse_moe.experts.8.w3", "model.layers.56.block_sparse_moe.experts.9.w3", "model.layers.56.block_sparse_moe.experts.10.w3", "model.layers.56.block_sparse_moe.experts.11.w3", "model.layers.56.block_sparse_moe.experts.12.w3", "model.layers.56.block_sparse_moe.experts.13.w3", "model.layers.56.block_sparse_moe.experts.14.w3", "model.layers.56.block_sparse_moe.experts.15.w3", "model.layers.56.block_sparse_moe.experts.16.w3", "model.layers.56.block_sparse_moe.experts.17.w3", "model.layers.56.block_sparse_moe.experts.18.w3", "model.layers.56.block_sparse_moe.experts.19.w3", "model.layers.56.block_sparse_moe.experts.20.w3", "model.layers.56.block_sparse_moe.experts.21.w3", "model.layers.56.block_sparse_moe.experts.22.w3", "model.layers.56.block_sparse_moe.experts.23.w3", "model.layers.56.block_sparse_moe.experts.24.w3", "model.layers.56.block_sparse_moe.experts.25.w3", "model.layers.56.block_sparse_moe.experts.26.w3", "model.layers.56.block_sparse_moe.experts.27.w3", "model.layers.56.block_sparse_moe.experts.28.w3", "model.layers.56.block_sparse_moe.experts.29.w3", "model.layers.56.block_sparse_moe.experts.30.w3", "model.layers.56.block_sparse_moe.experts.31.w3", "model.layers.56.block_sparse_moe.experts.32.w3", "model.layers.56.block_sparse_moe.experts.33.w3", "model.layers.56.block_sparse_moe.experts.34.w3", "model.layers.56.block_sparse_moe.experts.35.w3", "model.layers.56.block_sparse_moe.experts.36.w3", "model.layers.56.block_sparse_moe.experts.37.w3", "model.layers.56.block_sparse_moe.experts.38.w3", "model.layers.56.block_sparse_moe.experts.39.w3", "model.layers.56.block_sparse_moe.experts.40.w3", "model.layers.56.block_sparse_moe.experts.41.w3", "model.layers.56.block_sparse_moe.experts.42.w3", "model.layers.56.block_sparse_moe.experts.43.w3", "model.layers.56.block_sparse_moe.experts.44.w3", "model.layers.56.block_sparse_moe.experts.45.w3", "model.layers.56.block_sparse_moe.experts.46.w3", "model.layers.56.block_sparse_moe.experts.47.w3", "model.layers.56.block_sparse_moe.experts.48.w3", "model.layers.56.block_sparse_moe.experts.49.w3", "model.layers.56.block_sparse_moe.experts.50.w3", "model.layers.56.block_sparse_moe.experts.51.w3", "model.layers.56.block_sparse_moe.experts.52.w3", "model.layers.56.block_sparse_moe.experts.53.w3", "model.layers.56.block_sparse_moe.experts.54.w3", "model.layers.56.block_sparse_moe.experts.55.w3", "model.layers.56.block_sparse_moe.experts.56.w3", "model.layers.56.block_sparse_moe.experts.57.w3", "model.layers.56.block_sparse_moe.experts.58.w3", "model.layers.56.block_sparse_moe.experts.59.w3", "model.layers.56.block_sparse_moe.experts.60.w3", "model.layers.56.block_sparse_moe.experts.61.w3", "model.layers.56.block_sparse_moe.experts.62.w3", "model.layers.56.block_sparse_moe.experts.63.w3", "model.layers.56.block_sparse_moe.experts.64.w3", "model.layers.56.block_sparse_moe.experts.65.w3", "model.layers.56.block_sparse_moe.experts.66.w3", "model.layers.56.block_sparse_moe.experts.67.w3", "model.layers.56.block_sparse_moe.experts.68.w3", "model.layers.56.block_sparse_moe.experts.69.w3", "model.layers.56.block_sparse_moe.experts.70.w3", "model.layers.56.block_sparse_moe.experts.71.w3", "model.layers.56.block_sparse_moe.experts.72.w3", "model.layers.56.block_sparse_moe.experts.73.w3", "model.layers.56.block_sparse_moe.experts.74.w3", "model.layers.56.block_sparse_moe.experts.75.w3", "model.layers.56.block_sparse_moe.experts.76.w3", "model.layers.56.block_sparse_moe.experts.77.w3", "model.layers.56.block_sparse_moe.experts.78.w3", "model.layers.56.block_sparse_moe.experts.79.w3", "model.layers.56.block_sparse_moe.experts.80.w3", "model.layers.56.block_sparse_moe.experts.81.w3", "model.layers.56.block_sparse_moe.experts.82.w3", "model.layers.56.block_sparse_moe.experts.83.w3", "model.layers.56.block_sparse_moe.experts.84.w3", "model.layers.56.block_sparse_moe.experts.85.w3", "model.layers.56.block_sparse_moe.experts.86.w3", "model.layers.56.block_sparse_moe.experts.87.w3", "model.layers.56.block_sparse_moe.experts.88.w3", "model.layers.56.block_sparse_moe.experts.89.w3", "model.layers.56.block_sparse_moe.experts.90.w3", "model.layers.56.block_sparse_moe.experts.91.w3", "model.layers.56.block_sparse_moe.experts.92.w3", "model.layers.56.block_sparse_moe.experts.93.w3", "model.layers.56.block_sparse_moe.experts.94.w3", "model.layers.56.block_sparse_moe.experts.95.w3", "model.layers.56.block_sparse_moe.experts.96.w3", "model.layers.56.block_sparse_moe.experts.97.w3", "model.layers.56.block_sparse_moe.experts.98.w3", "model.layers.56.block_sparse_moe.experts.99.w3", "model.layers.56.block_sparse_moe.experts.100.w3", "model.layers.56.block_sparse_moe.experts.101.w3", "model.layers.56.block_sparse_moe.experts.102.w3", "model.layers.56.block_sparse_moe.experts.103.w3", "model.layers.56.block_sparse_moe.experts.104.w3", "model.layers.56.block_sparse_moe.experts.105.w3", "model.layers.56.block_sparse_moe.experts.106.w3", "model.layers.56.block_sparse_moe.experts.107.w3", "model.layers.56.block_sparse_moe.experts.108.w3", "model.layers.56.block_sparse_moe.experts.109.w3", "model.layers.56.block_sparse_moe.experts.110.w3", "model.layers.56.block_sparse_moe.experts.111.w3", "model.layers.56.block_sparse_moe.experts.112.w3", "model.layers.56.block_sparse_moe.experts.113.w3", "model.layers.56.block_sparse_moe.experts.114.w3", "model.layers.56.block_sparse_moe.experts.115.w3", "model.layers.56.block_sparse_moe.experts.116.w3", "model.layers.56.block_sparse_moe.experts.117.w3", "model.layers.56.block_sparse_moe.experts.118.w3", "model.layers.56.block_sparse_moe.experts.119.w3", "model.layers.56.block_sparse_moe.experts.120.w3", "model.layers.56.block_sparse_moe.experts.121.w3", "model.layers.56.block_sparse_moe.experts.122.w3", "model.layers.56.block_sparse_moe.experts.123.w3", "model.layers.56.block_sparse_moe.experts.124.w3", "model.layers.56.block_sparse_moe.experts.125.w3", "model.layers.56.block_sparse_moe.experts.126.w3", "model.layers.56.block_sparse_moe.experts.127.w3", "model.layers.56.block_sparse_moe.experts.128.w3", "model.layers.56.block_sparse_moe.experts.129.w3", "model.layers.56.block_sparse_moe.experts.130.w3", "model.layers.56.block_sparse_moe.experts.131.w3", "model.layers.56.block_sparse_moe.experts.132.w3", "model.layers.56.block_sparse_moe.experts.133.w3", "model.layers.56.block_sparse_moe.experts.134.w3", "model.layers.56.block_sparse_moe.experts.135.w3", "model.layers.56.block_sparse_moe.experts.136.w3", "model.layers.56.block_sparse_moe.experts.137.w3", "model.layers.56.block_sparse_moe.experts.138.w3", "model.layers.56.block_sparse_moe.experts.139.w3", "model.layers.56.block_sparse_moe.experts.140.w3", "model.layers.56.block_sparse_moe.experts.141.w3", "model.layers.56.block_sparse_moe.experts.142.w3", "model.layers.56.block_sparse_moe.experts.143.w3", "model.layers.56.block_sparse_moe.experts.144.w3", "model.layers.56.block_sparse_moe.experts.145.w3", "model.layers.56.block_sparse_moe.experts.146.w3", "model.layers.56.block_sparse_moe.experts.147.w3", "model.layers.56.block_sparse_moe.experts.148.w3", "model.layers.56.block_sparse_moe.experts.149.w3", "model.layers.56.block_sparse_moe.experts.150.w3", "model.layers.56.block_sparse_moe.experts.151.w3", "model.layers.56.block_sparse_moe.experts.152.w3", "model.layers.56.block_sparse_moe.experts.153.w3", "model.layers.56.block_sparse_moe.experts.154.w3", "model.layers.56.block_sparse_moe.experts.155.w3", "model.layers.56.block_sparse_moe.experts.156.w3", "model.layers.56.block_sparse_moe.experts.157.w3", "model.layers.56.block_sparse_moe.experts.158.w3", "model.layers.56.block_sparse_moe.experts.159.w3", "model.layers.56.block_sparse_moe.experts.160.w3", "model.layers.56.block_sparse_moe.experts.161.w3", "model.layers.56.block_sparse_moe.experts.162.w3", "model.layers.56.block_sparse_moe.experts.163.w3", "model.layers.56.block_sparse_moe.experts.164.w3", "model.layers.56.block_sparse_moe.experts.165.w3", "model.layers.56.block_sparse_moe.experts.166.w3", "model.layers.56.block_sparse_moe.experts.167.w3", "model.layers.56.block_sparse_moe.experts.168.w3", "model.layers.56.block_sparse_moe.experts.169.w3", "model.layers.56.block_sparse_moe.experts.170.w3", "model.layers.56.block_sparse_moe.experts.171.w3", "model.layers.56.block_sparse_moe.experts.172.w3", "model.layers.56.block_sparse_moe.experts.173.w3", "model.layers.56.block_sparse_moe.experts.174.w3", "model.layers.56.block_sparse_moe.experts.175.w3", "model.layers.56.block_sparse_moe.experts.176.w3", "model.layers.56.block_sparse_moe.experts.177.w3", "model.layers.56.block_sparse_moe.experts.178.w3", "model.layers.56.block_sparse_moe.experts.179.w3", "model.layers.56.block_sparse_moe.experts.180.w3", "model.layers.56.block_sparse_moe.experts.181.w3", "model.layers.56.block_sparse_moe.experts.182.w3", "model.layers.56.block_sparse_moe.experts.183.w3", "model.layers.56.block_sparse_moe.experts.184.w3", "model.layers.56.block_sparse_moe.experts.185.w3", "model.layers.56.block_sparse_moe.experts.186.w3", "model.layers.56.block_sparse_moe.experts.187.w3", "model.layers.56.block_sparse_moe.experts.188.w3", "model.layers.56.block_sparse_moe.experts.189.w3", "model.layers.56.block_sparse_moe.experts.190.w3", "model.layers.56.block_sparse_moe.experts.191.w3", "model.layers.56.block_sparse_moe.experts.192.w3", "model.layers.56.block_sparse_moe.experts.193.w3", "model.layers.56.block_sparse_moe.experts.194.w3", "model.layers.56.block_sparse_moe.experts.195.w3", "model.layers.56.block_sparse_moe.experts.196.w3", "model.layers.56.block_sparse_moe.experts.197.w3", "model.layers.56.block_sparse_moe.experts.198.w3", "model.layers.56.block_sparse_moe.experts.199.w3", "model.layers.56.block_sparse_moe.experts.200.w3", "model.layers.56.block_sparse_moe.experts.201.w3", "model.layers.56.block_sparse_moe.experts.202.w3", "model.layers.56.block_sparse_moe.experts.203.w3", "model.layers.56.block_sparse_moe.experts.204.w3", "model.layers.56.block_sparse_moe.experts.205.w3", "model.layers.56.block_sparse_moe.experts.206.w3", "model.layers.56.block_sparse_moe.experts.207.w3", "model.layers.56.block_sparse_moe.experts.208.w3", "model.layers.56.block_sparse_moe.experts.209.w3", "model.layers.56.block_sparse_moe.experts.210.w3", "model.layers.56.block_sparse_moe.experts.211.w3", "model.layers.56.block_sparse_moe.experts.212.w3", "model.layers.56.block_sparse_moe.experts.213.w3", "model.layers.56.block_sparse_moe.experts.214.w3", "model.layers.56.block_sparse_moe.experts.215.w3", "model.layers.56.block_sparse_moe.experts.216.w3", "model.layers.56.block_sparse_moe.experts.217.w3", "model.layers.56.block_sparse_moe.experts.218.w3", "model.layers.56.block_sparse_moe.experts.219.w3", "model.layers.56.block_sparse_moe.experts.220.w3", "model.layers.56.block_sparse_moe.experts.221.w3", "model.layers.56.block_sparse_moe.experts.222.w3", "model.layers.56.block_sparse_moe.experts.223.w3", "model.layers.56.block_sparse_moe.experts.224.w3", "model.layers.56.block_sparse_moe.experts.225.w3", "model.layers.56.block_sparse_moe.experts.226.w3", "model.layers.56.block_sparse_moe.experts.227.w3", "model.layers.56.block_sparse_moe.experts.228.w3", "model.layers.56.block_sparse_moe.experts.229.w3", "model.layers.56.block_sparse_moe.experts.230.w3", "model.layers.56.block_sparse_moe.experts.231.w3", "model.layers.56.block_sparse_moe.experts.232.w3", "model.layers.56.block_sparse_moe.experts.233.w3", "model.layers.56.block_sparse_moe.experts.234.w3", "model.layers.56.block_sparse_moe.experts.235.w3", "model.layers.56.block_sparse_moe.experts.236.w3", "model.layers.56.block_sparse_moe.experts.237.w3", "model.layers.56.block_sparse_moe.experts.238.w3", "model.layers.56.block_sparse_moe.experts.239.w3", "model.layers.56.block_sparse_moe.experts.240.w3", "model.layers.56.block_sparse_moe.experts.241.w3", "model.layers.56.block_sparse_moe.experts.242.w3", "model.layers.56.block_sparse_moe.experts.243.w3", "model.layers.56.block_sparse_moe.experts.244.w3", "model.layers.56.block_sparse_moe.experts.245.w3", "model.layers.56.block_sparse_moe.experts.246.w3", "model.layers.56.block_sparse_moe.experts.247.w3", "model.layers.56.block_sparse_moe.experts.248.w3", "model.layers.56.block_sparse_moe.experts.249.w3", "model.layers.56.block_sparse_moe.experts.250.w3", "model.layers.56.block_sparse_moe.experts.251.w3", "model.layers.56.block_sparse_moe.experts.252.w3", "model.layers.56.block_sparse_moe.experts.253.w3", "model.layers.56.block_sparse_moe.experts.254.w3", "model.layers.56.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -3.393962979320042e-05, "dbits": 2415919104 } ] }, { "idx": 284, "layers": [ "model.layers.56.block_sparse_moe.experts.0.w2", "model.layers.56.block_sparse_moe.experts.1.w2", "model.layers.56.block_sparse_moe.experts.2.w2", "model.layers.56.block_sparse_moe.experts.3.w2", "model.layers.56.block_sparse_moe.experts.4.w2", "model.layers.56.block_sparse_moe.experts.5.w2", "model.layers.56.block_sparse_moe.experts.6.w2", "model.layers.56.block_sparse_moe.experts.7.w2", "model.layers.56.block_sparse_moe.experts.8.w2", "model.layers.56.block_sparse_moe.experts.9.w2", "model.layers.56.block_sparse_moe.experts.10.w2", "model.layers.56.block_sparse_moe.experts.11.w2", "model.layers.56.block_sparse_moe.experts.12.w2", "model.layers.56.block_sparse_moe.experts.13.w2", "model.layers.56.block_sparse_moe.experts.14.w2", "model.layers.56.block_sparse_moe.experts.15.w2", "model.layers.56.block_sparse_moe.experts.16.w2", "model.layers.56.block_sparse_moe.experts.17.w2", "model.layers.56.block_sparse_moe.experts.18.w2", "model.layers.56.block_sparse_moe.experts.19.w2", "model.layers.56.block_sparse_moe.experts.20.w2", "model.layers.56.block_sparse_moe.experts.21.w2", "model.layers.56.block_sparse_moe.experts.22.w2", "model.layers.56.block_sparse_moe.experts.23.w2", "model.layers.56.block_sparse_moe.experts.24.w2", "model.layers.56.block_sparse_moe.experts.25.w2", "model.layers.56.block_sparse_moe.experts.26.w2", "model.layers.56.block_sparse_moe.experts.27.w2", "model.layers.56.block_sparse_moe.experts.28.w2", "model.layers.56.block_sparse_moe.experts.29.w2", "model.layers.56.block_sparse_moe.experts.30.w2", "model.layers.56.block_sparse_moe.experts.31.w2", "model.layers.56.block_sparse_moe.experts.32.w2", "model.layers.56.block_sparse_moe.experts.33.w2", "model.layers.56.block_sparse_moe.experts.34.w2", "model.layers.56.block_sparse_moe.experts.35.w2", "model.layers.56.block_sparse_moe.experts.36.w2", "model.layers.56.block_sparse_moe.experts.37.w2", "model.layers.56.block_sparse_moe.experts.38.w2", "model.layers.56.block_sparse_moe.experts.39.w2", "model.layers.56.block_sparse_moe.experts.40.w2", "model.layers.56.block_sparse_moe.experts.41.w2", "model.layers.56.block_sparse_moe.experts.42.w2", "model.layers.56.block_sparse_moe.experts.43.w2", "model.layers.56.block_sparse_moe.experts.44.w2", "model.layers.56.block_sparse_moe.experts.45.w2", "model.layers.56.block_sparse_moe.experts.46.w2", "model.layers.56.block_sparse_moe.experts.47.w2", "model.layers.56.block_sparse_moe.experts.48.w2", "model.layers.56.block_sparse_moe.experts.49.w2", "model.layers.56.block_sparse_moe.experts.50.w2", "model.layers.56.block_sparse_moe.experts.51.w2", "model.layers.56.block_sparse_moe.experts.52.w2", "model.layers.56.block_sparse_moe.experts.53.w2", "model.layers.56.block_sparse_moe.experts.54.w2", "model.layers.56.block_sparse_moe.experts.55.w2", "model.layers.56.block_sparse_moe.experts.56.w2", "model.layers.56.block_sparse_moe.experts.57.w2", "model.layers.56.block_sparse_moe.experts.58.w2", "model.layers.56.block_sparse_moe.experts.59.w2", "model.layers.56.block_sparse_moe.experts.60.w2", "model.layers.56.block_sparse_moe.experts.61.w2", "model.layers.56.block_sparse_moe.experts.62.w2", "model.layers.56.block_sparse_moe.experts.63.w2", "model.layers.56.block_sparse_moe.experts.64.w2", "model.layers.56.block_sparse_moe.experts.65.w2", "model.layers.56.block_sparse_moe.experts.66.w2", "model.layers.56.block_sparse_moe.experts.67.w2", "model.layers.56.block_sparse_moe.experts.68.w2", "model.layers.56.block_sparse_moe.experts.69.w2", "model.layers.56.block_sparse_moe.experts.70.w2", "model.layers.56.block_sparse_moe.experts.71.w2", "model.layers.56.block_sparse_moe.experts.72.w2", "model.layers.56.block_sparse_moe.experts.73.w2", "model.layers.56.block_sparse_moe.experts.74.w2", "model.layers.56.block_sparse_moe.experts.75.w2", "model.layers.56.block_sparse_moe.experts.76.w2", "model.layers.56.block_sparse_moe.experts.77.w2", "model.layers.56.block_sparse_moe.experts.78.w2", "model.layers.56.block_sparse_moe.experts.79.w2", "model.layers.56.block_sparse_moe.experts.80.w2", "model.layers.56.block_sparse_moe.experts.81.w2", "model.layers.56.block_sparse_moe.experts.82.w2", "model.layers.56.block_sparse_moe.experts.83.w2", "model.layers.56.block_sparse_moe.experts.84.w2", "model.layers.56.block_sparse_moe.experts.85.w2", "model.layers.56.block_sparse_moe.experts.86.w2", "model.layers.56.block_sparse_moe.experts.87.w2", "model.layers.56.block_sparse_moe.experts.88.w2", "model.layers.56.block_sparse_moe.experts.89.w2", "model.layers.56.block_sparse_moe.experts.90.w2", "model.layers.56.block_sparse_moe.experts.91.w2", "model.layers.56.block_sparse_moe.experts.92.w2", "model.layers.56.block_sparse_moe.experts.93.w2", "model.layers.56.block_sparse_moe.experts.94.w2", "model.layers.56.block_sparse_moe.experts.95.w2", "model.layers.56.block_sparse_moe.experts.96.w2", "model.layers.56.block_sparse_moe.experts.97.w2", "model.layers.56.block_sparse_moe.experts.98.w2", "model.layers.56.block_sparse_moe.experts.99.w2", "model.layers.56.block_sparse_moe.experts.100.w2", "model.layers.56.block_sparse_moe.experts.101.w2", "model.layers.56.block_sparse_moe.experts.102.w2", "model.layers.56.block_sparse_moe.experts.103.w2", "model.layers.56.block_sparse_moe.experts.104.w2", "model.layers.56.block_sparse_moe.experts.105.w2", "model.layers.56.block_sparse_moe.experts.106.w2", "model.layers.56.block_sparse_moe.experts.107.w2", "model.layers.56.block_sparse_moe.experts.108.w2", "model.layers.56.block_sparse_moe.experts.109.w2", "model.layers.56.block_sparse_moe.experts.110.w2", "model.layers.56.block_sparse_moe.experts.111.w2", "model.layers.56.block_sparse_moe.experts.112.w2", "model.layers.56.block_sparse_moe.experts.113.w2", "model.layers.56.block_sparse_moe.experts.114.w2", "model.layers.56.block_sparse_moe.experts.115.w2", "model.layers.56.block_sparse_moe.experts.116.w2", "model.layers.56.block_sparse_moe.experts.117.w2", "model.layers.56.block_sparse_moe.experts.118.w2", "model.layers.56.block_sparse_moe.experts.119.w2", "model.layers.56.block_sparse_moe.experts.120.w2", "model.layers.56.block_sparse_moe.experts.121.w2", "model.layers.56.block_sparse_moe.experts.122.w2", "model.layers.56.block_sparse_moe.experts.123.w2", "model.layers.56.block_sparse_moe.experts.124.w2", "model.layers.56.block_sparse_moe.experts.125.w2", "model.layers.56.block_sparse_moe.experts.126.w2", "model.layers.56.block_sparse_moe.experts.127.w2", "model.layers.56.block_sparse_moe.experts.128.w2", "model.layers.56.block_sparse_moe.experts.129.w2", "model.layers.56.block_sparse_moe.experts.130.w2", "model.layers.56.block_sparse_moe.experts.131.w2", "model.layers.56.block_sparse_moe.experts.132.w2", "model.layers.56.block_sparse_moe.experts.133.w2", "model.layers.56.block_sparse_moe.experts.134.w2", "model.layers.56.block_sparse_moe.experts.135.w2", "model.layers.56.block_sparse_moe.experts.136.w2", "model.layers.56.block_sparse_moe.experts.137.w2", "model.layers.56.block_sparse_moe.experts.138.w2", "model.layers.56.block_sparse_moe.experts.139.w2", "model.layers.56.block_sparse_moe.experts.140.w2", "model.layers.56.block_sparse_moe.experts.141.w2", "model.layers.56.block_sparse_moe.experts.142.w2", "model.layers.56.block_sparse_moe.experts.143.w2", "model.layers.56.block_sparse_moe.experts.144.w2", "model.layers.56.block_sparse_moe.experts.145.w2", "model.layers.56.block_sparse_moe.experts.146.w2", "model.layers.56.block_sparse_moe.experts.147.w2", "model.layers.56.block_sparse_moe.experts.148.w2", "model.layers.56.block_sparse_moe.experts.149.w2", "model.layers.56.block_sparse_moe.experts.150.w2", "model.layers.56.block_sparse_moe.experts.151.w2", "model.layers.56.block_sparse_moe.experts.152.w2", "model.layers.56.block_sparse_moe.experts.153.w2", "model.layers.56.block_sparse_moe.experts.154.w2", "model.layers.56.block_sparse_moe.experts.155.w2", "model.layers.56.block_sparse_moe.experts.156.w2", "model.layers.56.block_sparse_moe.experts.157.w2", "model.layers.56.block_sparse_moe.experts.158.w2", "model.layers.56.block_sparse_moe.experts.159.w2", "model.layers.56.block_sparse_moe.experts.160.w2", "model.layers.56.block_sparse_moe.experts.161.w2", "model.layers.56.block_sparse_moe.experts.162.w2", "model.layers.56.block_sparse_moe.experts.163.w2", "model.layers.56.block_sparse_moe.experts.164.w2", "model.layers.56.block_sparse_moe.experts.165.w2", "model.layers.56.block_sparse_moe.experts.166.w2", "model.layers.56.block_sparse_moe.experts.167.w2", "model.layers.56.block_sparse_moe.experts.168.w2", "model.layers.56.block_sparse_moe.experts.169.w2", "model.layers.56.block_sparse_moe.experts.170.w2", "model.layers.56.block_sparse_moe.experts.171.w2", "model.layers.56.block_sparse_moe.experts.172.w2", "model.layers.56.block_sparse_moe.experts.173.w2", "model.layers.56.block_sparse_moe.experts.174.w2", "model.layers.56.block_sparse_moe.experts.175.w2", "model.layers.56.block_sparse_moe.experts.176.w2", "model.layers.56.block_sparse_moe.experts.177.w2", "model.layers.56.block_sparse_moe.experts.178.w2", "model.layers.56.block_sparse_moe.experts.179.w2", "model.layers.56.block_sparse_moe.experts.180.w2", "model.layers.56.block_sparse_moe.experts.181.w2", "model.layers.56.block_sparse_moe.experts.182.w2", "model.layers.56.block_sparse_moe.experts.183.w2", "model.layers.56.block_sparse_moe.experts.184.w2", "model.layers.56.block_sparse_moe.experts.185.w2", "model.layers.56.block_sparse_moe.experts.186.w2", "model.layers.56.block_sparse_moe.experts.187.w2", "model.layers.56.block_sparse_moe.experts.188.w2", "model.layers.56.block_sparse_moe.experts.189.w2", "model.layers.56.block_sparse_moe.experts.190.w2", "model.layers.56.block_sparse_moe.experts.191.w2", "model.layers.56.block_sparse_moe.experts.192.w2", "model.layers.56.block_sparse_moe.experts.193.w2", "model.layers.56.block_sparse_moe.experts.194.w2", "model.layers.56.block_sparse_moe.experts.195.w2", "model.layers.56.block_sparse_moe.experts.196.w2", "model.layers.56.block_sparse_moe.experts.197.w2", "model.layers.56.block_sparse_moe.experts.198.w2", "model.layers.56.block_sparse_moe.experts.199.w2", "model.layers.56.block_sparse_moe.experts.200.w2", "model.layers.56.block_sparse_moe.experts.201.w2", "model.layers.56.block_sparse_moe.experts.202.w2", "model.layers.56.block_sparse_moe.experts.203.w2", "model.layers.56.block_sparse_moe.experts.204.w2", "model.layers.56.block_sparse_moe.experts.205.w2", "model.layers.56.block_sparse_moe.experts.206.w2", "model.layers.56.block_sparse_moe.experts.207.w2", "model.layers.56.block_sparse_moe.experts.208.w2", "model.layers.56.block_sparse_moe.experts.209.w2", "model.layers.56.block_sparse_moe.experts.210.w2", "model.layers.56.block_sparse_moe.experts.211.w2", "model.layers.56.block_sparse_moe.experts.212.w2", "model.layers.56.block_sparse_moe.experts.213.w2", "model.layers.56.block_sparse_moe.experts.214.w2", "model.layers.56.block_sparse_moe.experts.215.w2", "model.layers.56.block_sparse_moe.experts.216.w2", "model.layers.56.block_sparse_moe.experts.217.w2", "model.layers.56.block_sparse_moe.experts.218.w2", "model.layers.56.block_sparse_moe.experts.219.w2", "model.layers.56.block_sparse_moe.experts.220.w2", "model.layers.56.block_sparse_moe.experts.221.w2", "model.layers.56.block_sparse_moe.experts.222.w2", "model.layers.56.block_sparse_moe.experts.223.w2", "model.layers.56.block_sparse_moe.experts.224.w2", "model.layers.56.block_sparse_moe.experts.225.w2", "model.layers.56.block_sparse_moe.experts.226.w2", "model.layers.56.block_sparse_moe.experts.227.w2", "model.layers.56.block_sparse_moe.experts.228.w2", "model.layers.56.block_sparse_moe.experts.229.w2", "model.layers.56.block_sparse_moe.experts.230.w2", "model.layers.56.block_sparse_moe.experts.231.w2", "model.layers.56.block_sparse_moe.experts.232.w2", "model.layers.56.block_sparse_moe.experts.233.w2", "model.layers.56.block_sparse_moe.experts.234.w2", "model.layers.56.block_sparse_moe.experts.235.w2", "model.layers.56.block_sparse_moe.experts.236.w2", "model.layers.56.block_sparse_moe.experts.237.w2", "model.layers.56.block_sparse_moe.experts.238.w2", "model.layers.56.block_sparse_moe.experts.239.w2", "model.layers.56.block_sparse_moe.experts.240.w2", "model.layers.56.block_sparse_moe.experts.241.w2", "model.layers.56.block_sparse_moe.experts.242.w2", "model.layers.56.block_sparse_moe.experts.243.w2", "model.layers.56.block_sparse_moe.experts.244.w2", "model.layers.56.block_sparse_moe.experts.245.w2", "model.layers.56.block_sparse_moe.experts.246.w2", "model.layers.56.block_sparse_moe.experts.247.w2", "model.layers.56.block_sparse_moe.experts.248.w2", "model.layers.56.block_sparse_moe.experts.249.w2", "model.layers.56.block_sparse_moe.experts.250.w2", "model.layers.56.block_sparse_moe.experts.251.w2", "model.layers.56.block_sparse_moe.experts.252.w2", "model.layers.56.block_sparse_moe.experts.253.w2", "model.layers.56.block_sparse_moe.experts.254.w2", "model.layers.56.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 7.692761719224617e-05, "dbits": 1207959552 } ] }, { "idx": 285, "layers": [ "model.layers.57.self_attn.q_proj" ], "candidates": [ { "dkld": 1.2796744704224317e-05, "dbits": 18874368 } ] }, { "idx": 286, "layers": [ "model.layers.57.self_attn.k_proj", "model.layers.57.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0021236542612314002, "dbits": 6291456 } ] }, { "idx": 287, "layers": [ "model.layers.57.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0023198079317808262, "dbits": 18874368 } ] }, { "idx": 288, "layers": [ "model.layers.57.block_sparse_moe.experts.0.w1", "model.layers.57.block_sparse_moe.experts.1.w1", "model.layers.57.block_sparse_moe.experts.2.w1", "model.layers.57.block_sparse_moe.experts.3.w1", "model.layers.57.block_sparse_moe.experts.4.w1", "model.layers.57.block_sparse_moe.experts.5.w1", "model.layers.57.block_sparse_moe.experts.6.w1", "model.layers.57.block_sparse_moe.experts.7.w1", "model.layers.57.block_sparse_moe.experts.8.w1", "model.layers.57.block_sparse_moe.experts.9.w1", "model.layers.57.block_sparse_moe.experts.10.w1", "model.layers.57.block_sparse_moe.experts.11.w1", "model.layers.57.block_sparse_moe.experts.12.w1", "model.layers.57.block_sparse_moe.experts.13.w1", "model.layers.57.block_sparse_moe.experts.14.w1", "model.layers.57.block_sparse_moe.experts.15.w1", "model.layers.57.block_sparse_moe.experts.16.w1", "model.layers.57.block_sparse_moe.experts.17.w1", "model.layers.57.block_sparse_moe.experts.18.w1", "model.layers.57.block_sparse_moe.experts.19.w1", "model.layers.57.block_sparse_moe.experts.20.w1", "model.layers.57.block_sparse_moe.experts.21.w1", "model.layers.57.block_sparse_moe.experts.22.w1", "model.layers.57.block_sparse_moe.experts.23.w1", "model.layers.57.block_sparse_moe.experts.24.w1", "model.layers.57.block_sparse_moe.experts.25.w1", "model.layers.57.block_sparse_moe.experts.26.w1", "model.layers.57.block_sparse_moe.experts.27.w1", "model.layers.57.block_sparse_moe.experts.28.w1", "model.layers.57.block_sparse_moe.experts.29.w1", "model.layers.57.block_sparse_moe.experts.30.w1", "model.layers.57.block_sparse_moe.experts.31.w1", "model.layers.57.block_sparse_moe.experts.32.w1", "model.layers.57.block_sparse_moe.experts.33.w1", "model.layers.57.block_sparse_moe.experts.34.w1", "model.layers.57.block_sparse_moe.experts.35.w1", "model.layers.57.block_sparse_moe.experts.36.w1", "model.layers.57.block_sparse_moe.experts.37.w1", "model.layers.57.block_sparse_moe.experts.38.w1", "model.layers.57.block_sparse_moe.experts.39.w1", "model.layers.57.block_sparse_moe.experts.40.w1", "model.layers.57.block_sparse_moe.experts.41.w1", "model.layers.57.block_sparse_moe.experts.42.w1", "model.layers.57.block_sparse_moe.experts.43.w1", "model.layers.57.block_sparse_moe.experts.44.w1", "model.layers.57.block_sparse_moe.experts.45.w1", "model.layers.57.block_sparse_moe.experts.46.w1", "model.layers.57.block_sparse_moe.experts.47.w1", "model.layers.57.block_sparse_moe.experts.48.w1", "model.layers.57.block_sparse_moe.experts.49.w1", "model.layers.57.block_sparse_moe.experts.50.w1", "model.layers.57.block_sparse_moe.experts.51.w1", "model.layers.57.block_sparse_moe.experts.52.w1", "model.layers.57.block_sparse_moe.experts.53.w1", "model.layers.57.block_sparse_moe.experts.54.w1", "model.layers.57.block_sparse_moe.experts.55.w1", "model.layers.57.block_sparse_moe.experts.56.w1", "model.layers.57.block_sparse_moe.experts.57.w1", "model.layers.57.block_sparse_moe.experts.58.w1", "model.layers.57.block_sparse_moe.experts.59.w1", "model.layers.57.block_sparse_moe.experts.60.w1", "model.layers.57.block_sparse_moe.experts.61.w1", "model.layers.57.block_sparse_moe.experts.62.w1", "model.layers.57.block_sparse_moe.experts.63.w1", "model.layers.57.block_sparse_moe.experts.64.w1", "model.layers.57.block_sparse_moe.experts.65.w1", "model.layers.57.block_sparse_moe.experts.66.w1", "model.layers.57.block_sparse_moe.experts.67.w1", "model.layers.57.block_sparse_moe.experts.68.w1", "model.layers.57.block_sparse_moe.experts.69.w1", "model.layers.57.block_sparse_moe.experts.70.w1", "model.layers.57.block_sparse_moe.experts.71.w1", "model.layers.57.block_sparse_moe.experts.72.w1", "model.layers.57.block_sparse_moe.experts.73.w1", "model.layers.57.block_sparse_moe.experts.74.w1", "model.layers.57.block_sparse_moe.experts.75.w1", "model.layers.57.block_sparse_moe.experts.76.w1", "model.layers.57.block_sparse_moe.experts.77.w1", "model.layers.57.block_sparse_moe.experts.78.w1", "model.layers.57.block_sparse_moe.experts.79.w1", "model.layers.57.block_sparse_moe.experts.80.w1", "model.layers.57.block_sparse_moe.experts.81.w1", "model.layers.57.block_sparse_moe.experts.82.w1", "model.layers.57.block_sparse_moe.experts.83.w1", "model.layers.57.block_sparse_moe.experts.84.w1", "model.layers.57.block_sparse_moe.experts.85.w1", "model.layers.57.block_sparse_moe.experts.86.w1", "model.layers.57.block_sparse_moe.experts.87.w1", "model.layers.57.block_sparse_moe.experts.88.w1", "model.layers.57.block_sparse_moe.experts.89.w1", "model.layers.57.block_sparse_moe.experts.90.w1", "model.layers.57.block_sparse_moe.experts.91.w1", "model.layers.57.block_sparse_moe.experts.92.w1", "model.layers.57.block_sparse_moe.experts.93.w1", "model.layers.57.block_sparse_moe.experts.94.w1", "model.layers.57.block_sparse_moe.experts.95.w1", "model.layers.57.block_sparse_moe.experts.96.w1", "model.layers.57.block_sparse_moe.experts.97.w1", "model.layers.57.block_sparse_moe.experts.98.w1", "model.layers.57.block_sparse_moe.experts.99.w1", "model.layers.57.block_sparse_moe.experts.100.w1", "model.layers.57.block_sparse_moe.experts.101.w1", "model.layers.57.block_sparse_moe.experts.102.w1", "model.layers.57.block_sparse_moe.experts.103.w1", "model.layers.57.block_sparse_moe.experts.104.w1", "model.layers.57.block_sparse_moe.experts.105.w1", "model.layers.57.block_sparse_moe.experts.106.w1", "model.layers.57.block_sparse_moe.experts.107.w1", "model.layers.57.block_sparse_moe.experts.108.w1", "model.layers.57.block_sparse_moe.experts.109.w1", "model.layers.57.block_sparse_moe.experts.110.w1", "model.layers.57.block_sparse_moe.experts.111.w1", "model.layers.57.block_sparse_moe.experts.112.w1", "model.layers.57.block_sparse_moe.experts.113.w1", "model.layers.57.block_sparse_moe.experts.114.w1", "model.layers.57.block_sparse_moe.experts.115.w1", "model.layers.57.block_sparse_moe.experts.116.w1", "model.layers.57.block_sparse_moe.experts.117.w1", "model.layers.57.block_sparse_moe.experts.118.w1", "model.layers.57.block_sparse_moe.experts.119.w1", "model.layers.57.block_sparse_moe.experts.120.w1", "model.layers.57.block_sparse_moe.experts.121.w1", "model.layers.57.block_sparse_moe.experts.122.w1", "model.layers.57.block_sparse_moe.experts.123.w1", "model.layers.57.block_sparse_moe.experts.124.w1", "model.layers.57.block_sparse_moe.experts.125.w1", "model.layers.57.block_sparse_moe.experts.126.w1", "model.layers.57.block_sparse_moe.experts.127.w1", "model.layers.57.block_sparse_moe.experts.128.w1", "model.layers.57.block_sparse_moe.experts.129.w1", "model.layers.57.block_sparse_moe.experts.130.w1", "model.layers.57.block_sparse_moe.experts.131.w1", "model.layers.57.block_sparse_moe.experts.132.w1", "model.layers.57.block_sparse_moe.experts.133.w1", "model.layers.57.block_sparse_moe.experts.134.w1", "model.layers.57.block_sparse_moe.experts.135.w1", "model.layers.57.block_sparse_moe.experts.136.w1", "model.layers.57.block_sparse_moe.experts.137.w1", "model.layers.57.block_sparse_moe.experts.138.w1", "model.layers.57.block_sparse_moe.experts.139.w1", "model.layers.57.block_sparse_moe.experts.140.w1", "model.layers.57.block_sparse_moe.experts.141.w1", "model.layers.57.block_sparse_moe.experts.142.w1", "model.layers.57.block_sparse_moe.experts.143.w1", "model.layers.57.block_sparse_moe.experts.144.w1", "model.layers.57.block_sparse_moe.experts.145.w1", "model.layers.57.block_sparse_moe.experts.146.w1", "model.layers.57.block_sparse_moe.experts.147.w1", "model.layers.57.block_sparse_moe.experts.148.w1", "model.layers.57.block_sparse_moe.experts.149.w1", "model.layers.57.block_sparse_moe.experts.150.w1", "model.layers.57.block_sparse_moe.experts.151.w1", "model.layers.57.block_sparse_moe.experts.152.w1", "model.layers.57.block_sparse_moe.experts.153.w1", "model.layers.57.block_sparse_moe.experts.154.w1", "model.layers.57.block_sparse_moe.experts.155.w1", "model.layers.57.block_sparse_moe.experts.156.w1", "model.layers.57.block_sparse_moe.experts.157.w1", "model.layers.57.block_sparse_moe.experts.158.w1", "model.layers.57.block_sparse_moe.experts.159.w1", "model.layers.57.block_sparse_moe.experts.160.w1", "model.layers.57.block_sparse_moe.experts.161.w1", "model.layers.57.block_sparse_moe.experts.162.w1", "model.layers.57.block_sparse_moe.experts.163.w1", "model.layers.57.block_sparse_moe.experts.164.w1", "model.layers.57.block_sparse_moe.experts.165.w1", "model.layers.57.block_sparse_moe.experts.166.w1", "model.layers.57.block_sparse_moe.experts.167.w1", "model.layers.57.block_sparse_moe.experts.168.w1", "model.layers.57.block_sparse_moe.experts.169.w1", "model.layers.57.block_sparse_moe.experts.170.w1", "model.layers.57.block_sparse_moe.experts.171.w1", "model.layers.57.block_sparse_moe.experts.172.w1", "model.layers.57.block_sparse_moe.experts.173.w1", "model.layers.57.block_sparse_moe.experts.174.w1", "model.layers.57.block_sparse_moe.experts.175.w1", "model.layers.57.block_sparse_moe.experts.176.w1", "model.layers.57.block_sparse_moe.experts.177.w1", "model.layers.57.block_sparse_moe.experts.178.w1", "model.layers.57.block_sparse_moe.experts.179.w1", "model.layers.57.block_sparse_moe.experts.180.w1", "model.layers.57.block_sparse_moe.experts.181.w1", "model.layers.57.block_sparse_moe.experts.182.w1", "model.layers.57.block_sparse_moe.experts.183.w1", "model.layers.57.block_sparse_moe.experts.184.w1", "model.layers.57.block_sparse_moe.experts.185.w1", "model.layers.57.block_sparse_moe.experts.186.w1", "model.layers.57.block_sparse_moe.experts.187.w1", "model.layers.57.block_sparse_moe.experts.188.w1", "model.layers.57.block_sparse_moe.experts.189.w1", "model.layers.57.block_sparse_moe.experts.190.w1", "model.layers.57.block_sparse_moe.experts.191.w1", "model.layers.57.block_sparse_moe.experts.192.w1", "model.layers.57.block_sparse_moe.experts.193.w1", "model.layers.57.block_sparse_moe.experts.194.w1", "model.layers.57.block_sparse_moe.experts.195.w1", "model.layers.57.block_sparse_moe.experts.196.w1", "model.layers.57.block_sparse_moe.experts.197.w1", "model.layers.57.block_sparse_moe.experts.198.w1", "model.layers.57.block_sparse_moe.experts.199.w1", "model.layers.57.block_sparse_moe.experts.200.w1", "model.layers.57.block_sparse_moe.experts.201.w1", "model.layers.57.block_sparse_moe.experts.202.w1", "model.layers.57.block_sparse_moe.experts.203.w1", "model.layers.57.block_sparse_moe.experts.204.w1", "model.layers.57.block_sparse_moe.experts.205.w1", "model.layers.57.block_sparse_moe.experts.206.w1", "model.layers.57.block_sparse_moe.experts.207.w1", "model.layers.57.block_sparse_moe.experts.208.w1", "model.layers.57.block_sparse_moe.experts.209.w1", "model.layers.57.block_sparse_moe.experts.210.w1", "model.layers.57.block_sparse_moe.experts.211.w1", "model.layers.57.block_sparse_moe.experts.212.w1", "model.layers.57.block_sparse_moe.experts.213.w1", "model.layers.57.block_sparse_moe.experts.214.w1", "model.layers.57.block_sparse_moe.experts.215.w1", "model.layers.57.block_sparse_moe.experts.216.w1", "model.layers.57.block_sparse_moe.experts.217.w1", "model.layers.57.block_sparse_moe.experts.218.w1", "model.layers.57.block_sparse_moe.experts.219.w1", "model.layers.57.block_sparse_moe.experts.220.w1", "model.layers.57.block_sparse_moe.experts.221.w1", "model.layers.57.block_sparse_moe.experts.222.w1", "model.layers.57.block_sparse_moe.experts.223.w1", "model.layers.57.block_sparse_moe.experts.224.w1", "model.layers.57.block_sparse_moe.experts.225.w1", "model.layers.57.block_sparse_moe.experts.226.w1", "model.layers.57.block_sparse_moe.experts.227.w1", "model.layers.57.block_sparse_moe.experts.228.w1", "model.layers.57.block_sparse_moe.experts.229.w1", "model.layers.57.block_sparse_moe.experts.230.w1", "model.layers.57.block_sparse_moe.experts.231.w1", "model.layers.57.block_sparse_moe.experts.232.w1", "model.layers.57.block_sparse_moe.experts.233.w1", "model.layers.57.block_sparse_moe.experts.234.w1", "model.layers.57.block_sparse_moe.experts.235.w1", "model.layers.57.block_sparse_moe.experts.236.w1", "model.layers.57.block_sparse_moe.experts.237.w1", "model.layers.57.block_sparse_moe.experts.238.w1", "model.layers.57.block_sparse_moe.experts.239.w1", "model.layers.57.block_sparse_moe.experts.240.w1", "model.layers.57.block_sparse_moe.experts.241.w1", "model.layers.57.block_sparse_moe.experts.242.w1", "model.layers.57.block_sparse_moe.experts.243.w1", "model.layers.57.block_sparse_moe.experts.244.w1", "model.layers.57.block_sparse_moe.experts.245.w1", "model.layers.57.block_sparse_moe.experts.246.w1", "model.layers.57.block_sparse_moe.experts.247.w1", "model.layers.57.block_sparse_moe.experts.248.w1", "model.layers.57.block_sparse_moe.experts.249.w1", "model.layers.57.block_sparse_moe.experts.250.w1", "model.layers.57.block_sparse_moe.experts.251.w1", "model.layers.57.block_sparse_moe.experts.252.w1", "model.layers.57.block_sparse_moe.experts.253.w1", "model.layers.57.block_sparse_moe.experts.254.w1", "model.layers.57.block_sparse_moe.experts.255.w1", "model.layers.57.block_sparse_moe.experts.0.w3", "model.layers.57.block_sparse_moe.experts.1.w3", "model.layers.57.block_sparse_moe.experts.2.w3", "model.layers.57.block_sparse_moe.experts.3.w3", "model.layers.57.block_sparse_moe.experts.4.w3", "model.layers.57.block_sparse_moe.experts.5.w3", "model.layers.57.block_sparse_moe.experts.6.w3", "model.layers.57.block_sparse_moe.experts.7.w3", "model.layers.57.block_sparse_moe.experts.8.w3", "model.layers.57.block_sparse_moe.experts.9.w3", "model.layers.57.block_sparse_moe.experts.10.w3", "model.layers.57.block_sparse_moe.experts.11.w3", "model.layers.57.block_sparse_moe.experts.12.w3", "model.layers.57.block_sparse_moe.experts.13.w3", "model.layers.57.block_sparse_moe.experts.14.w3", "model.layers.57.block_sparse_moe.experts.15.w3", "model.layers.57.block_sparse_moe.experts.16.w3", "model.layers.57.block_sparse_moe.experts.17.w3", "model.layers.57.block_sparse_moe.experts.18.w3", "model.layers.57.block_sparse_moe.experts.19.w3", "model.layers.57.block_sparse_moe.experts.20.w3", "model.layers.57.block_sparse_moe.experts.21.w3", "model.layers.57.block_sparse_moe.experts.22.w3", "model.layers.57.block_sparse_moe.experts.23.w3", "model.layers.57.block_sparse_moe.experts.24.w3", "model.layers.57.block_sparse_moe.experts.25.w3", "model.layers.57.block_sparse_moe.experts.26.w3", "model.layers.57.block_sparse_moe.experts.27.w3", "model.layers.57.block_sparse_moe.experts.28.w3", "model.layers.57.block_sparse_moe.experts.29.w3", "model.layers.57.block_sparse_moe.experts.30.w3", "model.layers.57.block_sparse_moe.experts.31.w3", "model.layers.57.block_sparse_moe.experts.32.w3", "model.layers.57.block_sparse_moe.experts.33.w3", "model.layers.57.block_sparse_moe.experts.34.w3", "model.layers.57.block_sparse_moe.experts.35.w3", "model.layers.57.block_sparse_moe.experts.36.w3", "model.layers.57.block_sparse_moe.experts.37.w3", "model.layers.57.block_sparse_moe.experts.38.w3", "model.layers.57.block_sparse_moe.experts.39.w3", "model.layers.57.block_sparse_moe.experts.40.w3", "model.layers.57.block_sparse_moe.experts.41.w3", "model.layers.57.block_sparse_moe.experts.42.w3", "model.layers.57.block_sparse_moe.experts.43.w3", "model.layers.57.block_sparse_moe.experts.44.w3", "model.layers.57.block_sparse_moe.experts.45.w3", "model.layers.57.block_sparse_moe.experts.46.w3", "model.layers.57.block_sparse_moe.experts.47.w3", "model.layers.57.block_sparse_moe.experts.48.w3", "model.layers.57.block_sparse_moe.experts.49.w3", "model.layers.57.block_sparse_moe.experts.50.w3", "model.layers.57.block_sparse_moe.experts.51.w3", "model.layers.57.block_sparse_moe.experts.52.w3", "model.layers.57.block_sparse_moe.experts.53.w3", "model.layers.57.block_sparse_moe.experts.54.w3", "model.layers.57.block_sparse_moe.experts.55.w3", "model.layers.57.block_sparse_moe.experts.56.w3", "model.layers.57.block_sparse_moe.experts.57.w3", "model.layers.57.block_sparse_moe.experts.58.w3", "model.layers.57.block_sparse_moe.experts.59.w3", "model.layers.57.block_sparse_moe.experts.60.w3", "model.layers.57.block_sparse_moe.experts.61.w3", "model.layers.57.block_sparse_moe.experts.62.w3", "model.layers.57.block_sparse_moe.experts.63.w3", "model.layers.57.block_sparse_moe.experts.64.w3", "model.layers.57.block_sparse_moe.experts.65.w3", "model.layers.57.block_sparse_moe.experts.66.w3", "model.layers.57.block_sparse_moe.experts.67.w3", "model.layers.57.block_sparse_moe.experts.68.w3", "model.layers.57.block_sparse_moe.experts.69.w3", "model.layers.57.block_sparse_moe.experts.70.w3", "model.layers.57.block_sparse_moe.experts.71.w3", "model.layers.57.block_sparse_moe.experts.72.w3", "model.layers.57.block_sparse_moe.experts.73.w3", "model.layers.57.block_sparse_moe.experts.74.w3", "model.layers.57.block_sparse_moe.experts.75.w3", "model.layers.57.block_sparse_moe.experts.76.w3", "model.layers.57.block_sparse_moe.experts.77.w3", "model.layers.57.block_sparse_moe.experts.78.w3", "model.layers.57.block_sparse_moe.experts.79.w3", "model.layers.57.block_sparse_moe.experts.80.w3", "model.layers.57.block_sparse_moe.experts.81.w3", "model.layers.57.block_sparse_moe.experts.82.w3", "model.layers.57.block_sparse_moe.experts.83.w3", "model.layers.57.block_sparse_moe.experts.84.w3", "model.layers.57.block_sparse_moe.experts.85.w3", "model.layers.57.block_sparse_moe.experts.86.w3", "model.layers.57.block_sparse_moe.experts.87.w3", "model.layers.57.block_sparse_moe.experts.88.w3", "model.layers.57.block_sparse_moe.experts.89.w3", "model.layers.57.block_sparse_moe.experts.90.w3", "model.layers.57.block_sparse_moe.experts.91.w3", "model.layers.57.block_sparse_moe.experts.92.w3", "model.layers.57.block_sparse_moe.experts.93.w3", "model.layers.57.block_sparse_moe.experts.94.w3", "model.layers.57.block_sparse_moe.experts.95.w3", "model.layers.57.block_sparse_moe.experts.96.w3", "model.layers.57.block_sparse_moe.experts.97.w3", "model.layers.57.block_sparse_moe.experts.98.w3", "model.layers.57.block_sparse_moe.experts.99.w3", "model.layers.57.block_sparse_moe.experts.100.w3", "model.layers.57.block_sparse_moe.experts.101.w3", "model.layers.57.block_sparse_moe.experts.102.w3", "model.layers.57.block_sparse_moe.experts.103.w3", "model.layers.57.block_sparse_moe.experts.104.w3", "model.layers.57.block_sparse_moe.experts.105.w3", "model.layers.57.block_sparse_moe.experts.106.w3", "model.layers.57.block_sparse_moe.experts.107.w3", "model.layers.57.block_sparse_moe.experts.108.w3", "model.layers.57.block_sparse_moe.experts.109.w3", "model.layers.57.block_sparse_moe.experts.110.w3", "model.layers.57.block_sparse_moe.experts.111.w3", "model.layers.57.block_sparse_moe.experts.112.w3", "model.layers.57.block_sparse_moe.experts.113.w3", "model.layers.57.block_sparse_moe.experts.114.w3", "model.layers.57.block_sparse_moe.experts.115.w3", "model.layers.57.block_sparse_moe.experts.116.w3", "model.layers.57.block_sparse_moe.experts.117.w3", "model.layers.57.block_sparse_moe.experts.118.w3", "model.layers.57.block_sparse_moe.experts.119.w3", "model.layers.57.block_sparse_moe.experts.120.w3", "model.layers.57.block_sparse_moe.experts.121.w3", "model.layers.57.block_sparse_moe.experts.122.w3", "model.layers.57.block_sparse_moe.experts.123.w3", "model.layers.57.block_sparse_moe.experts.124.w3", "model.layers.57.block_sparse_moe.experts.125.w3", "model.layers.57.block_sparse_moe.experts.126.w3", "model.layers.57.block_sparse_moe.experts.127.w3", "model.layers.57.block_sparse_moe.experts.128.w3", "model.layers.57.block_sparse_moe.experts.129.w3", "model.layers.57.block_sparse_moe.experts.130.w3", "model.layers.57.block_sparse_moe.experts.131.w3", "model.layers.57.block_sparse_moe.experts.132.w3", "model.layers.57.block_sparse_moe.experts.133.w3", "model.layers.57.block_sparse_moe.experts.134.w3", "model.layers.57.block_sparse_moe.experts.135.w3", "model.layers.57.block_sparse_moe.experts.136.w3", "model.layers.57.block_sparse_moe.experts.137.w3", "model.layers.57.block_sparse_moe.experts.138.w3", "model.layers.57.block_sparse_moe.experts.139.w3", "model.layers.57.block_sparse_moe.experts.140.w3", "model.layers.57.block_sparse_moe.experts.141.w3", "model.layers.57.block_sparse_moe.experts.142.w3", "model.layers.57.block_sparse_moe.experts.143.w3", "model.layers.57.block_sparse_moe.experts.144.w3", "model.layers.57.block_sparse_moe.experts.145.w3", "model.layers.57.block_sparse_moe.experts.146.w3", "model.layers.57.block_sparse_moe.experts.147.w3", "model.layers.57.block_sparse_moe.experts.148.w3", "model.layers.57.block_sparse_moe.experts.149.w3", "model.layers.57.block_sparse_moe.experts.150.w3", "model.layers.57.block_sparse_moe.experts.151.w3", "model.layers.57.block_sparse_moe.experts.152.w3", "model.layers.57.block_sparse_moe.experts.153.w3", "model.layers.57.block_sparse_moe.experts.154.w3", "model.layers.57.block_sparse_moe.experts.155.w3", "model.layers.57.block_sparse_moe.experts.156.w3", "model.layers.57.block_sparse_moe.experts.157.w3", "model.layers.57.block_sparse_moe.experts.158.w3", "model.layers.57.block_sparse_moe.experts.159.w3", "model.layers.57.block_sparse_moe.experts.160.w3", "model.layers.57.block_sparse_moe.experts.161.w3", "model.layers.57.block_sparse_moe.experts.162.w3", "model.layers.57.block_sparse_moe.experts.163.w3", "model.layers.57.block_sparse_moe.experts.164.w3", "model.layers.57.block_sparse_moe.experts.165.w3", "model.layers.57.block_sparse_moe.experts.166.w3", "model.layers.57.block_sparse_moe.experts.167.w3", "model.layers.57.block_sparse_moe.experts.168.w3", "model.layers.57.block_sparse_moe.experts.169.w3", "model.layers.57.block_sparse_moe.experts.170.w3", "model.layers.57.block_sparse_moe.experts.171.w3", "model.layers.57.block_sparse_moe.experts.172.w3", "model.layers.57.block_sparse_moe.experts.173.w3", "model.layers.57.block_sparse_moe.experts.174.w3", "model.layers.57.block_sparse_moe.experts.175.w3", "model.layers.57.block_sparse_moe.experts.176.w3", "model.layers.57.block_sparse_moe.experts.177.w3", "model.layers.57.block_sparse_moe.experts.178.w3", "model.layers.57.block_sparse_moe.experts.179.w3", "model.layers.57.block_sparse_moe.experts.180.w3", "model.layers.57.block_sparse_moe.experts.181.w3", "model.layers.57.block_sparse_moe.experts.182.w3", "model.layers.57.block_sparse_moe.experts.183.w3", "model.layers.57.block_sparse_moe.experts.184.w3", "model.layers.57.block_sparse_moe.experts.185.w3", "model.layers.57.block_sparse_moe.experts.186.w3", "model.layers.57.block_sparse_moe.experts.187.w3", "model.layers.57.block_sparse_moe.experts.188.w3", "model.layers.57.block_sparse_moe.experts.189.w3", "model.layers.57.block_sparse_moe.experts.190.w3", "model.layers.57.block_sparse_moe.experts.191.w3", "model.layers.57.block_sparse_moe.experts.192.w3", "model.layers.57.block_sparse_moe.experts.193.w3", "model.layers.57.block_sparse_moe.experts.194.w3", "model.layers.57.block_sparse_moe.experts.195.w3", "model.layers.57.block_sparse_moe.experts.196.w3", "model.layers.57.block_sparse_moe.experts.197.w3", "model.layers.57.block_sparse_moe.experts.198.w3", "model.layers.57.block_sparse_moe.experts.199.w3", "model.layers.57.block_sparse_moe.experts.200.w3", "model.layers.57.block_sparse_moe.experts.201.w3", "model.layers.57.block_sparse_moe.experts.202.w3", "model.layers.57.block_sparse_moe.experts.203.w3", "model.layers.57.block_sparse_moe.experts.204.w3", "model.layers.57.block_sparse_moe.experts.205.w3", "model.layers.57.block_sparse_moe.experts.206.w3", "model.layers.57.block_sparse_moe.experts.207.w3", "model.layers.57.block_sparse_moe.experts.208.w3", "model.layers.57.block_sparse_moe.experts.209.w3", "model.layers.57.block_sparse_moe.experts.210.w3", "model.layers.57.block_sparse_moe.experts.211.w3", "model.layers.57.block_sparse_moe.experts.212.w3", "model.layers.57.block_sparse_moe.experts.213.w3", "model.layers.57.block_sparse_moe.experts.214.w3", "model.layers.57.block_sparse_moe.experts.215.w3", "model.layers.57.block_sparse_moe.experts.216.w3", "model.layers.57.block_sparse_moe.experts.217.w3", "model.layers.57.block_sparse_moe.experts.218.w3", "model.layers.57.block_sparse_moe.experts.219.w3", "model.layers.57.block_sparse_moe.experts.220.w3", "model.layers.57.block_sparse_moe.experts.221.w3", "model.layers.57.block_sparse_moe.experts.222.w3", "model.layers.57.block_sparse_moe.experts.223.w3", "model.layers.57.block_sparse_moe.experts.224.w3", "model.layers.57.block_sparse_moe.experts.225.w3", "model.layers.57.block_sparse_moe.experts.226.w3", "model.layers.57.block_sparse_moe.experts.227.w3", "model.layers.57.block_sparse_moe.experts.228.w3", "model.layers.57.block_sparse_moe.experts.229.w3", "model.layers.57.block_sparse_moe.experts.230.w3", "model.layers.57.block_sparse_moe.experts.231.w3", "model.layers.57.block_sparse_moe.experts.232.w3", "model.layers.57.block_sparse_moe.experts.233.w3", "model.layers.57.block_sparse_moe.experts.234.w3", "model.layers.57.block_sparse_moe.experts.235.w3", "model.layers.57.block_sparse_moe.experts.236.w3", "model.layers.57.block_sparse_moe.experts.237.w3", "model.layers.57.block_sparse_moe.experts.238.w3", "model.layers.57.block_sparse_moe.experts.239.w3", "model.layers.57.block_sparse_moe.experts.240.w3", "model.layers.57.block_sparse_moe.experts.241.w3", "model.layers.57.block_sparse_moe.experts.242.w3", "model.layers.57.block_sparse_moe.experts.243.w3", "model.layers.57.block_sparse_moe.experts.244.w3", "model.layers.57.block_sparse_moe.experts.245.w3", "model.layers.57.block_sparse_moe.experts.246.w3", "model.layers.57.block_sparse_moe.experts.247.w3", "model.layers.57.block_sparse_moe.experts.248.w3", "model.layers.57.block_sparse_moe.experts.249.w3", "model.layers.57.block_sparse_moe.experts.250.w3", "model.layers.57.block_sparse_moe.experts.251.w3", "model.layers.57.block_sparse_moe.experts.252.w3", "model.layers.57.block_sparse_moe.experts.253.w3", "model.layers.57.block_sparse_moe.experts.254.w3", "model.layers.57.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -7.372163236141205e-05, "dbits": 2415919104 } ] }, { "idx": 289, "layers": [ "model.layers.57.block_sparse_moe.experts.0.w2", "model.layers.57.block_sparse_moe.experts.1.w2", "model.layers.57.block_sparse_moe.experts.2.w2", "model.layers.57.block_sparse_moe.experts.3.w2", "model.layers.57.block_sparse_moe.experts.4.w2", "model.layers.57.block_sparse_moe.experts.5.w2", "model.layers.57.block_sparse_moe.experts.6.w2", "model.layers.57.block_sparse_moe.experts.7.w2", "model.layers.57.block_sparse_moe.experts.8.w2", "model.layers.57.block_sparse_moe.experts.9.w2", "model.layers.57.block_sparse_moe.experts.10.w2", "model.layers.57.block_sparse_moe.experts.11.w2", "model.layers.57.block_sparse_moe.experts.12.w2", "model.layers.57.block_sparse_moe.experts.13.w2", "model.layers.57.block_sparse_moe.experts.14.w2", "model.layers.57.block_sparse_moe.experts.15.w2", "model.layers.57.block_sparse_moe.experts.16.w2", "model.layers.57.block_sparse_moe.experts.17.w2", "model.layers.57.block_sparse_moe.experts.18.w2", "model.layers.57.block_sparse_moe.experts.19.w2", "model.layers.57.block_sparse_moe.experts.20.w2", "model.layers.57.block_sparse_moe.experts.21.w2", "model.layers.57.block_sparse_moe.experts.22.w2", "model.layers.57.block_sparse_moe.experts.23.w2", "model.layers.57.block_sparse_moe.experts.24.w2", "model.layers.57.block_sparse_moe.experts.25.w2", "model.layers.57.block_sparse_moe.experts.26.w2", "model.layers.57.block_sparse_moe.experts.27.w2", "model.layers.57.block_sparse_moe.experts.28.w2", "model.layers.57.block_sparse_moe.experts.29.w2", "model.layers.57.block_sparse_moe.experts.30.w2", "model.layers.57.block_sparse_moe.experts.31.w2", "model.layers.57.block_sparse_moe.experts.32.w2", "model.layers.57.block_sparse_moe.experts.33.w2", "model.layers.57.block_sparse_moe.experts.34.w2", "model.layers.57.block_sparse_moe.experts.35.w2", "model.layers.57.block_sparse_moe.experts.36.w2", "model.layers.57.block_sparse_moe.experts.37.w2", "model.layers.57.block_sparse_moe.experts.38.w2", "model.layers.57.block_sparse_moe.experts.39.w2", "model.layers.57.block_sparse_moe.experts.40.w2", "model.layers.57.block_sparse_moe.experts.41.w2", "model.layers.57.block_sparse_moe.experts.42.w2", "model.layers.57.block_sparse_moe.experts.43.w2", "model.layers.57.block_sparse_moe.experts.44.w2", "model.layers.57.block_sparse_moe.experts.45.w2", "model.layers.57.block_sparse_moe.experts.46.w2", "model.layers.57.block_sparse_moe.experts.47.w2", "model.layers.57.block_sparse_moe.experts.48.w2", "model.layers.57.block_sparse_moe.experts.49.w2", "model.layers.57.block_sparse_moe.experts.50.w2", "model.layers.57.block_sparse_moe.experts.51.w2", "model.layers.57.block_sparse_moe.experts.52.w2", "model.layers.57.block_sparse_moe.experts.53.w2", "model.layers.57.block_sparse_moe.experts.54.w2", "model.layers.57.block_sparse_moe.experts.55.w2", "model.layers.57.block_sparse_moe.experts.56.w2", "model.layers.57.block_sparse_moe.experts.57.w2", "model.layers.57.block_sparse_moe.experts.58.w2", "model.layers.57.block_sparse_moe.experts.59.w2", "model.layers.57.block_sparse_moe.experts.60.w2", "model.layers.57.block_sparse_moe.experts.61.w2", "model.layers.57.block_sparse_moe.experts.62.w2", "model.layers.57.block_sparse_moe.experts.63.w2", "model.layers.57.block_sparse_moe.experts.64.w2", "model.layers.57.block_sparse_moe.experts.65.w2", "model.layers.57.block_sparse_moe.experts.66.w2", "model.layers.57.block_sparse_moe.experts.67.w2", "model.layers.57.block_sparse_moe.experts.68.w2", "model.layers.57.block_sparse_moe.experts.69.w2", "model.layers.57.block_sparse_moe.experts.70.w2", "model.layers.57.block_sparse_moe.experts.71.w2", "model.layers.57.block_sparse_moe.experts.72.w2", "model.layers.57.block_sparse_moe.experts.73.w2", "model.layers.57.block_sparse_moe.experts.74.w2", "model.layers.57.block_sparse_moe.experts.75.w2", "model.layers.57.block_sparse_moe.experts.76.w2", "model.layers.57.block_sparse_moe.experts.77.w2", "model.layers.57.block_sparse_moe.experts.78.w2", "model.layers.57.block_sparse_moe.experts.79.w2", "model.layers.57.block_sparse_moe.experts.80.w2", "model.layers.57.block_sparse_moe.experts.81.w2", "model.layers.57.block_sparse_moe.experts.82.w2", "model.layers.57.block_sparse_moe.experts.83.w2", "model.layers.57.block_sparse_moe.experts.84.w2", "model.layers.57.block_sparse_moe.experts.85.w2", "model.layers.57.block_sparse_moe.experts.86.w2", "model.layers.57.block_sparse_moe.experts.87.w2", "model.layers.57.block_sparse_moe.experts.88.w2", "model.layers.57.block_sparse_moe.experts.89.w2", "model.layers.57.block_sparse_moe.experts.90.w2", "model.layers.57.block_sparse_moe.experts.91.w2", "model.layers.57.block_sparse_moe.experts.92.w2", "model.layers.57.block_sparse_moe.experts.93.w2", "model.layers.57.block_sparse_moe.experts.94.w2", "model.layers.57.block_sparse_moe.experts.95.w2", "model.layers.57.block_sparse_moe.experts.96.w2", "model.layers.57.block_sparse_moe.experts.97.w2", "model.layers.57.block_sparse_moe.experts.98.w2", "model.layers.57.block_sparse_moe.experts.99.w2", "model.layers.57.block_sparse_moe.experts.100.w2", "model.layers.57.block_sparse_moe.experts.101.w2", "model.layers.57.block_sparse_moe.experts.102.w2", "model.layers.57.block_sparse_moe.experts.103.w2", "model.layers.57.block_sparse_moe.experts.104.w2", "model.layers.57.block_sparse_moe.experts.105.w2", "model.layers.57.block_sparse_moe.experts.106.w2", "model.layers.57.block_sparse_moe.experts.107.w2", "model.layers.57.block_sparse_moe.experts.108.w2", "model.layers.57.block_sparse_moe.experts.109.w2", "model.layers.57.block_sparse_moe.experts.110.w2", "model.layers.57.block_sparse_moe.experts.111.w2", "model.layers.57.block_sparse_moe.experts.112.w2", "model.layers.57.block_sparse_moe.experts.113.w2", "model.layers.57.block_sparse_moe.experts.114.w2", "model.layers.57.block_sparse_moe.experts.115.w2", "model.layers.57.block_sparse_moe.experts.116.w2", "model.layers.57.block_sparse_moe.experts.117.w2", "model.layers.57.block_sparse_moe.experts.118.w2", "model.layers.57.block_sparse_moe.experts.119.w2", "model.layers.57.block_sparse_moe.experts.120.w2", "model.layers.57.block_sparse_moe.experts.121.w2", "model.layers.57.block_sparse_moe.experts.122.w2", "model.layers.57.block_sparse_moe.experts.123.w2", "model.layers.57.block_sparse_moe.experts.124.w2", "model.layers.57.block_sparse_moe.experts.125.w2", "model.layers.57.block_sparse_moe.experts.126.w2", "model.layers.57.block_sparse_moe.experts.127.w2", "model.layers.57.block_sparse_moe.experts.128.w2", "model.layers.57.block_sparse_moe.experts.129.w2", "model.layers.57.block_sparse_moe.experts.130.w2", "model.layers.57.block_sparse_moe.experts.131.w2", "model.layers.57.block_sparse_moe.experts.132.w2", "model.layers.57.block_sparse_moe.experts.133.w2", "model.layers.57.block_sparse_moe.experts.134.w2", "model.layers.57.block_sparse_moe.experts.135.w2", "model.layers.57.block_sparse_moe.experts.136.w2", "model.layers.57.block_sparse_moe.experts.137.w2", "model.layers.57.block_sparse_moe.experts.138.w2", "model.layers.57.block_sparse_moe.experts.139.w2", "model.layers.57.block_sparse_moe.experts.140.w2", "model.layers.57.block_sparse_moe.experts.141.w2", "model.layers.57.block_sparse_moe.experts.142.w2", "model.layers.57.block_sparse_moe.experts.143.w2", "model.layers.57.block_sparse_moe.experts.144.w2", "model.layers.57.block_sparse_moe.experts.145.w2", "model.layers.57.block_sparse_moe.experts.146.w2", "model.layers.57.block_sparse_moe.experts.147.w2", "model.layers.57.block_sparse_moe.experts.148.w2", "model.layers.57.block_sparse_moe.experts.149.w2", "model.layers.57.block_sparse_moe.experts.150.w2", "model.layers.57.block_sparse_moe.experts.151.w2", "model.layers.57.block_sparse_moe.experts.152.w2", "model.layers.57.block_sparse_moe.experts.153.w2", "model.layers.57.block_sparse_moe.experts.154.w2", "model.layers.57.block_sparse_moe.experts.155.w2", "model.layers.57.block_sparse_moe.experts.156.w2", "model.layers.57.block_sparse_moe.experts.157.w2", "model.layers.57.block_sparse_moe.experts.158.w2", "model.layers.57.block_sparse_moe.experts.159.w2", "model.layers.57.block_sparse_moe.experts.160.w2", "model.layers.57.block_sparse_moe.experts.161.w2", "model.layers.57.block_sparse_moe.experts.162.w2", "model.layers.57.block_sparse_moe.experts.163.w2", "model.layers.57.block_sparse_moe.experts.164.w2", "model.layers.57.block_sparse_moe.experts.165.w2", "model.layers.57.block_sparse_moe.experts.166.w2", "model.layers.57.block_sparse_moe.experts.167.w2", "model.layers.57.block_sparse_moe.experts.168.w2", "model.layers.57.block_sparse_moe.experts.169.w2", "model.layers.57.block_sparse_moe.experts.170.w2", "model.layers.57.block_sparse_moe.experts.171.w2", "model.layers.57.block_sparse_moe.experts.172.w2", "model.layers.57.block_sparse_moe.experts.173.w2", "model.layers.57.block_sparse_moe.experts.174.w2", "model.layers.57.block_sparse_moe.experts.175.w2", "model.layers.57.block_sparse_moe.experts.176.w2", "model.layers.57.block_sparse_moe.experts.177.w2", "model.layers.57.block_sparse_moe.experts.178.w2", "model.layers.57.block_sparse_moe.experts.179.w2", "model.layers.57.block_sparse_moe.experts.180.w2", "model.layers.57.block_sparse_moe.experts.181.w2", "model.layers.57.block_sparse_moe.experts.182.w2", "model.layers.57.block_sparse_moe.experts.183.w2", "model.layers.57.block_sparse_moe.experts.184.w2", "model.layers.57.block_sparse_moe.experts.185.w2", "model.layers.57.block_sparse_moe.experts.186.w2", "model.layers.57.block_sparse_moe.experts.187.w2", "model.layers.57.block_sparse_moe.experts.188.w2", "model.layers.57.block_sparse_moe.experts.189.w2", "model.layers.57.block_sparse_moe.experts.190.w2", "model.layers.57.block_sparse_moe.experts.191.w2", "model.layers.57.block_sparse_moe.experts.192.w2", "model.layers.57.block_sparse_moe.experts.193.w2", "model.layers.57.block_sparse_moe.experts.194.w2", "model.layers.57.block_sparse_moe.experts.195.w2", "model.layers.57.block_sparse_moe.experts.196.w2", "model.layers.57.block_sparse_moe.experts.197.w2", "model.layers.57.block_sparse_moe.experts.198.w2", "model.layers.57.block_sparse_moe.experts.199.w2", "model.layers.57.block_sparse_moe.experts.200.w2", "model.layers.57.block_sparse_moe.experts.201.w2", "model.layers.57.block_sparse_moe.experts.202.w2", "model.layers.57.block_sparse_moe.experts.203.w2", "model.layers.57.block_sparse_moe.experts.204.w2", "model.layers.57.block_sparse_moe.experts.205.w2", "model.layers.57.block_sparse_moe.experts.206.w2", "model.layers.57.block_sparse_moe.experts.207.w2", "model.layers.57.block_sparse_moe.experts.208.w2", "model.layers.57.block_sparse_moe.experts.209.w2", "model.layers.57.block_sparse_moe.experts.210.w2", "model.layers.57.block_sparse_moe.experts.211.w2", "model.layers.57.block_sparse_moe.experts.212.w2", "model.layers.57.block_sparse_moe.experts.213.w2", "model.layers.57.block_sparse_moe.experts.214.w2", "model.layers.57.block_sparse_moe.experts.215.w2", "model.layers.57.block_sparse_moe.experts.216.w2", "model.layers.57.block_sparse_moe.experts.217.w2", "model.layers.57.block_sparse_moe.experts.218.w2", "model.layers.57.block_sparse_moe.experts.219.w2", "model.layers.57.block_sparse_moe.experts.220.w2", "model.layers.57.block_sparse_moe.experts.221.w2", "model.layers.57.block_sparse_moe.experts.222.w2", "model.layers.57.block_sparse_moe.experts.223.w2", "model.layers.57.block_sparse_moe.experts.224.w2", "model.layers.57.block_sparse_moe.experts.225.w2", "model.layers.57.block_sparse_moe.experts.226.w2", "model.layers.57.block_sparse_moe.experts.227.w2", "model.layers.57.block_sparse_moe.experts.228.w2", "model.layers.57.block_sparse_moe.experts.229.w2", "model.layers.57.block_sparse_moe.experts.230.w2", "model.layers.57.block_sparse_moe.experts.231.w2", "model.layers.57.block_sparse_moe.experts.232.w2", "model.layers.57.block_sparse_moe.experts.233.w2", "model.layers.57.block_sparse_moe.experts.234.w2", "model.layers.57.block_sparse_moe.experts.235.w2", "model.layers.57.block_sparse_moe.experts.236.w2", "model.layers.57.block_sparse_moe.experts.237.w2", "model.layers.57.block_sparse_moe.experts.238.w2", "model.layers.57.block_sparse_moe.experts.239.w2", "model.layers.57.block_sparse_moe.experts.240.w2", "model.layers.57.block_sparse_moe.experts.241.w2", "model.layers.57.block_sparse_moe.experts.242.w2", "model.layers.57.block_sparse_moe.experts.243.w2", "model.layers.57.block_sparse_moe.experts.244.w2", "model.layers.57.block_sparse_moe.experts.245.w2", "model.layers.57.block_sparse_moe.experts.246.w2", "model.layers.57.block_sparse_moe.experts.247.w2", "model.layers.57.block_sparse_moe.experts.248.w2", "model.layers.57.block_sparse_moe.experts.249.w2", "model.layers.57.block_sparse_moe.experts.250.w2", "model.layers.57.block_sparse_moe.experts.251.w2", "model.layers.57.block_sparse_moe.experts.252.w2", "model.layers.57.block_sparse_moe.experts.253.w2", "model.layers.57.block_sparse_moe.experts.254.w2", "model.layers.57.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -8.733943104743958e-05, "dbits": 1207959552 } ] }, { "idx": 290, "layers": [ "model.layers.58.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0001601438969373592, "dbits": 18874368 } ] }, { "idx": 291, "layers": [ "model.layers.58.self_attn.k_proj", "model.layers.58.self_attn.v_proj" ], "candidates": [ { "dkld": -0.0019059114158153645, "dbits": 6291456 } ] }, { "idx": 292, "layers": [ "model.layers.58.self_attn.o_proj" ], "candidates": [ { "dkld": 0.0004890445619821215, "dbits": 18874368 } ] }, { "idx": 293, "layers": [ "model.layers.58.block_sparse_moe.experts.0.w1", "model.layers.58.block_sparse_moe.experts.1.w1", "model.layers.58.block_sparse_moe.experts.2.w1", "model.layers.58.block_sparse_moe.experts.3.w1", "model.layers.58.block_sparse_moe.experts.4.w1", "model.layers.58.block_sparse_moe.experts.5.w1", "model.layers.58.block_sparse_moe.experts.6.w1", "model.layers.58.block_sparse_moe.experts.7.w1", "model.layers.58.block_sparse_moe.experts.8.w1", "model.layers.58.block_sparse_moe.experts.9.w1", "model.layers.58.block_sparse_moe.experts.10.w1", "model.layers.58.block_sparse_moe.experts.11.w1", "model.layers.58.block_sparse_moe.experts.12.w1", "model.layers.58.block_sparse_moe.experts.13.w1", "model.layers.58.block_sparse_moe.experts.14.w1", "model.layers.58.block_sparse_moe.experts.15.w1", "model.layers.58.block_sparse_moe.experts.16.w1", "model.layers.58.block_sparse_moe.experts.17.w1", "model.layers.58.block_sparse_moe.experts.18.w1", "model.layers.58.block_sparse_moe.experts.19.w1", "model.layers.58.block_sparse_moe.experts.20.w1", "model.layers.58.block_sparse_moe.experts.21.w1", "model.layers.58.block_sparse_moe.experts.22.w1", "model.layers.58.block_sparse_moe.experts.23.w1", "model.layers.58.block_sparse_moe.experts.24.w1", "model.layers.58.block_sparse_moe.experts.25.w1", "model.layers.58.block_sparse_moe.experts.26.w1", "model.layers.58.block_sparse_moe.experts.27.w1", "model.layers.58.block_sparse_moe.experts.28.w1", "model.layers.58.block_sparse_moe.experts.29.w1", "model.layers.58.block_sparse_moe.experts.30.w1", "model.layers.58.block_sparse_moe.experts.31.w1", "model.layers.58.block_sparse_moe.experts.32.w1", "model.layers.58.block_sparse_moe.experts.33.w1", "model.layers.58.block_sparse_moe.experts.34.w1", "model.layers.58.block_sparse_moe.experts.35.w1", "model.layers.58.block_sparse_moe.experts.36.w1", "model.layers.58.block_sparse_moe.experts.37.w1", "model.layers.58.block_sparse_moe.experts.38.w1", "model.layers.58.block_sparse_moe.experts.39.w1", "model.layers.58.block_sparse_moe.experts.40.w1", "model.layers.58.block_sparse_moe.experts.41.w1", "model.layers.58.block_sparse_moe.experts.42.w1", "model.layers.58.block_sparse_moe.experts.43.w1", "model.layers.58.block_sparse_moe.experts.44.w1", "model.layers.58.block_sparse_moe.experts.45.w1", "model.layers.58.block_sparse_moe.experts.46.w1", "model.layers.58.block_sparse_moe.experts.47.w1", "model.layers.58.block_sparse_moe.experts.48.w1", "model.layers.58.block_sparse_moe.experts.49.w1", "model.layers.58.block_sparse_moe.experts.50.w1", "model.layers.58.block_sparse_moe.experts.51.w1", "model.layers.58.block_sparse_moe.experts.52.w1", "model.layers.58.block_sparse_moe.experts.53.w1", "model.layers.58.block_sparse_moe.experts.54.w1", "model.layers.58.block_sparse_moe.experts.55.w1", "model.layers.58.block_sparse_moe.experts.56.w1", "model.layers.58.block_sparse_moe.experts.57.w1", "model.layers.58.block_sparse_moe.experts.58.w1", "model.layers.58.block_sparse_moe.experts.59.w1", "model.layers.58.block_sparse_moe.experts.60.w1", "model.layers.58.block_sparse_moe.experts.61.w1", "model.layers.58.block_sparse_moe.experts.62.w1", "model.layers.58.block_sparse_moe.experts.63.w1", "model.layers.58.block_sparse_moe.experts.64.w1", "model.layers.58.block_sparse_moe.experts.65.w1", "model.layers.58.block_sparse_moe.experts.66.w1", "model.layers.58.block_sparse_moe.experts.67.w1", "model.layers.58.block_sparse_moe.experts.68.w1", "model.layers.58.block_sparse_moe.experts.69.w1", "model.layers.58.block_sparse_moe.experts.70.w1", "model.layers.58.block_sparse_moe.experts.71.w1", "model.layers.58.block_sparse_moe.experts.72.w1", "model.layers.58.block_sparse_moe.experts.73.w1", "model.layers.58.block_sparse_moe.experts.74.w1", "model.layers.58.block_sparse_moe.experts.75.w1", "model.layers.58.block_sparse_moe.experts.76.w1", "model.layers.58.block_sparse_moe.experts.77.w1", "model.layers.58.block_sparse_moe.experts.78.w1", "model.layers.58.block_sparse_moe.experts.79.w1", "model.layers.58.block_sparse_moe.experts.80.w1", "model.layers.58.block_sparse_moe.experts.81.w1", "model.layers.58.block_sparse_moe.experts.82.w1", "model.layers.58.block_sparse_moe.experts.83.w1", "model.layers.58.block_sparse_moe.experts.84.w1", "model.layers.58.block_sparse_moe.experts.85.w1", "model.layers.58.block_sparse_moe.experts.86.w1", "model.layers.58.block_sparse_moe.experts.87.w1", "model.layers.58.block_sparse_moe.experts.88.w1", "model.layers.58.block_sparse_moe.experts.89.w1", "model.layers.58.block_sparse_moe.experts.90.w1", "model.layers.58.block_sparse_moe.experts.91.w1", "model.layers.58.block_sparse_moe.experts.92.w1", "model.layers.58.block_sparse_moe.experts.93.w1", "model.layers.58.block_sparse_moe.experts.94.w1", "model.layers.58.block_sparse_moe.experts.95.w1", "model.layers.58.block_sparse_moe.experts.96.w1", "model.layers.58.block_sparse_moe.experts.97.w1", "model.layers.58.block_sparse_moe.experts.98.w1", "model.layers.58.block_sparse_moe.experts.99.w1", "model.layers.58.block_sparse_moe.experts.100.w1", "model.layers.58.block_sparse_moe.experts.101.w1", "model.layers.58.block_sparse_moe.experts.102.w1", "model.layers.58.block_sparse_moe.experts.103.w1", "model.layers.58.block_sparse_moe.experts.104.w1", "model.layers.58.block_sparse_moe.experts.105.w1", "model.layers.58.block_sparse_moe.experts.106.w1", "model.layers.58.block_sparse_moe.experts.107.w1", "model.layers.58.block_sparse_moe.experts.108.w1", "model.layers.58.block_sparse_moe.experts.109.w1", "model.layers.58.block_sparse_moe.experts.110.w1", "model.layers.58.block_sparse_moe.experts.111.w1", "model.layers.58.block_sparse_moe.experts.112.w1", "model.layers.58.block_sparse_moe.experts.113.w1", "model.layers.58.block_sparse_moe.experts.114.w1", "model.layers.58.block_sparse_moe.experts.115.w1", "model.layers.58.block_sparse_moe.experts.116.w1", "model.layers.58.block_sparse_moe.experts.117.w1", "model.layers.58.block_sparse_moe.experts.118.w1", "model.layers.58.block_sparse_moe.experts.119.w1", "model.layers.58.block_sparse_moe.experts.120.w1", "model.layers.58.block_sparse_moe.experts.121.w1", "model.layers.58.block_sparse_moe.experts.122.w1", "model.layers.58.block_sparse_moe.experts.123.w1", "model.layers.58.block_sparse_moe.experts.124.w1", "model.layers.58.block_sparse_moe.experts.125.w1", "model.layers.58.block_sparse_moe.experts.126.w1", "model.layers.58.block_sparse_moe.experts.127.w1", "model.layers.58.block_sparse_moe.experts.128.w1", "model.layers.58.block_sparse_moe.experts.129.w1", "model.layers.58.block_sparse_moe.experts.130.w1", "model.layers.58.block_sparse_moe.experts.131.w1", "model.layers.58.block_sparse_moe.experts.132.w1", "model.layers.58.block_sparse_moe.experts.133.w1", "model.layers.58.block_sparse_moe.experts.134.w1", "model.layers.58.block_sparse_moe.experts.135.w1", "model.layers.58.block_sparse_moe.experts.136.w1", "model.layers.58.block_sparse_moe.experts.137.w1", "model.layers.58.block_sparse_moe.experts.138.w1", "model.layers.58.block_sparse_moe.experts.139.w1", "model.layers.58.block_sparse_moe.experts.140.w1", "model.layers.58.block_sparse_moe.experts.141.w1", "model.layers.58.block_sparse_moe.experts.142.w1", "model.layers.58.block_sparse_moe.experts.143.w1", "model.layers.58.block_sparse_moe.experts.144.w1", "model.layers.58.block_sparse_moe.experts.145.w1", "model.layers.58.block_sparse_moe.experts.146.w1", "model.layers.58.block_sparse_moe.experts.147.w1", "model.layers.58.block_sparse_moe.experts.148.w1", "model.layers.58.block_sparse_moe.experts.149.w1", "model.layers.58.block_sparse_moe.experts.150.w1", "model.layers.58.block_sparse_moe.experts.151.w1", "model.layers.58.block_sparse_moe.experts.152.w1", "model.layers.58.block_sparse_moe.experts.153.w1", "model.layers.58.block_sparse_moe.experts.154.w1", "model.layers.58.block_sparse_moe.experts.155.w1", "model.layers.58.block_sparse_moe.experts.156.w1", "model.layers.58.block_sparse_moe.experts.157.w1", "model.layers.58.block_sparse_moe.experts.158.w1", "model.layers.58.block_sparse_moe.experts.159.w1", "model.layers.58.block_sparse_moe.experts.160.w1", "model.layers.58.block_sparse_moe.experts.161.w1", "model.layers.58.block_sparse_moe.experts.162.w1", "model.layers.58.block_sparse_moe.experts.163.w1", "model.layers.58.block_sparse_moe.experts.164.w1", "model.layers.58.block_sparse_moe.experts.165.w1", "model.layers.58.block_sparse_moe.experts.166.w1", "model.layers.58.block_sparse_moe.experts.167.w1", "model.layers.58.block_sparse_moe.experts.168.w1", "model.layers.58.block_sparse_moe.experts.169.w1", "model.layers.58.block_sparse_moe.experts.170.w1", "model.layers.58.block_sparse_moe.experts.171.w1", "model.layers.58.block_sparse_moe.experts.172.w1", "model.layers.58.block_sparse_moe.experts.173.w1", "model.layers.58.block_sparse_moe.experts.174.w1", "model.layers.58.block_sparse_moe.experts.175.w1", "model.layers.58.block_sparse_moe.experts.176.w1", "model.layers.58.block_sparse_moe.experts.177.w1", "model.layers.58.block_sparse_moe.experts.178.w1", "model.layers.58.block_sparse_moe.experts.179.w1", "model.layers.58.block_sparse_moe.experts.180.w1", "model.layers.58.block_sparse_moe.experts.181.w1", "model.layers.58.block_sparse_moe.experts.182.w1", "model.layers.58.block_sparse_moe.experts.183.w1", "model.layers.58.block_sparse_moe.experts.184.w1", "model.layers.58.block_sparse_moe.experts.185.w1", "model.layers.58.block_sparse_moe.experts.186.w1", "model.layers.58.block_sparse_moe.experts.187.w1", "model.layers.58.block_sparse_moe.experts.188.w1", "model.layers.58.block_sparse_moe.experts.189.w1", "model.layers.58.block_sparse_moe.experts.190.w1", "model.layers.58.block_sparse_moe.experts.191.w1", "model.layers.58.block_sparse_moe.experts.192.w1", "model.layers.58.block_sparse_moe.experts.193.w1", "model.layers.58.block_sparse_moe.experts.194.w1", "model.layers.58.block_sparse_moe.experts.195.w1", "model.layers.58.block_sparse_moe.experts.196.w1", "model.layers.58.block_sparse_moe.experts.197.w1", "model.layers.58.block_sparse_moe.experts.198.w1", "model.layers.58.block_sparse_moe.experts.199.w1", "model.layers.58.block_sparse_moe.experts.200.w1", "model.layers.58.block_sparse_moe.experts.201.w1", "model.layers.58.block_sparse_moe.experts.202.w1", "model.layers.58.block_sparse_moe.experts.203.w1", "model.layers.58.block_sparse_moe.experts.204.w1", "model.layers.58.block_sparse_moe.experts.205.w1", "model.layers.58.block_sparse_moe.experts.206.w1", "model.layers.58.block_sparse_moe.experts.207.w1", "model.layers.58.block_sparse_moe.experts.208.w1", "model.layers.58.block_sparse_moe.experts.209.w1", "model.layers.58.block_sparse_moe.experts.210.w1", "model.layers.58.block_sparse_moe.experts.211.w1", "model.layers.58.block_sparse_moe.experts.212.w1", "model.layers.58.block_sparse_moe.experts.213.w1", "model.layers.58.block_sparse_moe.experts.214.w1", "model.layers.58.block_sparse_moe.experts.215.w1", "model.layers.58.block_sparse_moe.experts.216.w1", "model.layers.58.block_sparse_moe.experts.217.w1", "model.layers.58.block_sparse_moe.experts.218.w1", "model.layers.58.block_sparse_moe.experts.219.w1", "model.layers.58.block_sparse_moe.experts.220.w1", "model.layers.58.block_sparse_moe.experts.221.w1", "model.layers.58.block_sparse_moe.experts.222.w1", "model.layers.58.block_sparse_moe.experts.223.w1", "model.layers.58.block_sparse_moe.experts.224.w1", "model.layers.58.block_sparse_moe.experts.225.w1", "model.layers.58.block_sparse_moe.experts.226.w1", "model.layers.58.block_sparse_moe.experts.227.w1", "model.layers.58.block_sparse_moe.experts.228.w1", "model.layers.58.block_sparse_moe.experts.229.w1", "model.layers.58.block_sparse_moe.experts.230.w1", "model.layers.58.block_sparse_moe.experts.231.w1", "model.layers.58.block_sparse_moe.experts.232.w1", "model.layers.58.block_sparse_moe.experts.233.w1", "model.layers.58.block_sparse_moe.experts.234.w1", "model.layers.58.block_sparse_moe.experts.235.w1", "model.layers.58.block_sparse_moe.experts.236.w1", "model.layers.58.block_sparse_moe.experts.237.w1", "model.layers.58.block_sparse_moe.experts.238.w1", "model.layers.58.block_sparse_moe.experts.239.w1", "model.layers.58.block_sparse_moe.experts.240.w1", "model.layers.58.block_sparse_moe.experts.241.w1", "model.layers.58.block_sparse_moe.experts.242.w1", "model.layers.58.block_sparse_moe.experts.243.w1", "model.layers.58.block_sparse_moe.experts.244.w1", "model.layers.58.block_sparse_moe.experts.245.w1", "model.layers.58.block_sparse_moe.experts.246.w1", "model.layers.58.block_sparse_moe.experts.247.w1", "model.layers.58.block_sparse_moe.experts.248.w1", "model.layers.58.block_sparse_moe.experts.249.w1", "model.layers.58.block_sparse_moe.experts.250.w1", "model.layers.58.block_sparse_moe.experts.251.w1", "model.layers.58.block_sparse_moe.experts.252.w1", "model.layers.58.block_sparse_moe.experts.253.w1", "model.layers.58.block_sparse_moe.experts.254.w1", "model.layers.58.block_sparse_moe.experts.255.w1", "model.layers.58.block_sparse_moe.experts.0.w3", "model.layers.58.block_sparse_moe.experts.1.w3", "model.layers.58.block_sparse_moe.experts.2.w3", "model.layers.58.block_sparse_moe.experts.3.w3", "model.layers.58.block_sparse_moe.experts.4.w3", "model.layers.58.block_sparse_moe.experts.5.w3", "model.layers.58.block_sparse_moe.experts.6.w3", "model.layers.58.block_sparse_moe.experts.7.w3", "model.layers.58.block_sparse_moe.experts.8.w3", "model.layers.58.block_sparse_moe.experts.9.w3", "model.layers.58.block_sparse_moe.experts.10.w3", "model.layers.58.block_sparse_moe.experts.11.w3", "model.layers.58.block_sparse_moe.experts.12.w3", "model.layers.58.block_sparse_moe.experts.13.w3", "model.layers.58.block_sparse_moe.experts.14.w3", "model.layers.58.block_sparse_moe.experts.15.w3", "model.layers.58.block_sparse_moe.experts.16.w3", "model.layers.58.block_sparse_moe.experts.17.w3", "model.layers.58.block_sparse_moe.experts.18.w3", "model.layers.58.block_sparse_moe.experts.19.w3", "model.layers.58.block_sparse_moe.experts.20.w3", "model.layers.58.block_sparse_moe.experts.21.w3", "model.layers.58.block_sparse_moe.experts.22.w3", "model.layers.58.block_sparse_moe.experts.23.w3", "model.layers.58.block_sparse_moe.experts.24.w3", "model.layers.58.block_sparse_moe.experts.25.w3", "model.layers.58.block_sparse_moe.experts.26.w3", "model.layers.58.block_sparse_moe.experts.27.w3", "model.layers.58.block_sparse_moe.experts.28.w3", "model.layers.58.block_sparse_moe.experts.29.w3", "model.layers.58.block_sparse_moe.experts.30.w3", "model.layers.58.block_sparse_moe.experts.31.w3", "model.layers.58.block_sparse_moe.experts.32.w3", "model.layers.58.block_sparse_moe.experts.33.w3", "model.layers.58.block_sparse_moe.experts.34.w3", "model.layers.58.block_sparse_moe.experts.35.w3", "model.layers.58.block_sparse_moe.experts.36.w3", "model.layers.58.block_sparse_moe.experts.37.w3", "model.layers.58.block_sparse_moe.experts.38.w3", "model.layers.58.block_sparse_moe.experts.39.w3", "model.layers.58.block_sparse_moe.experts.40.w3", "model.layers.58.block_sparse_moe.experts.41.w3", "model.layers.58.block_sparse_moe.experts.42.w3", "model.layers.58.block_sparse_moe.experts.43.w3", "model.layers.58.block_sparse_moe.experts.44.w3", "model.layers.58.block_sparse_moe.experts.45.w3", "model.layers.58.block_sparse_moe.experts.46.w3", "model.layers.58.block_sparse_moe.experts.47.w3", "model.layers.58.block_sparse_moe.experts.48.w3", "model.layers.58.block_sparse_moe.experts.49.w3", "model.layers.58.block_sparse_moe.experts.50.w3", "model.layers.58.block_sparse_moe.experts.51.w3", "model.layers.58.block_sparse_moe.experts.52.w3", "model.layers.58.block_sparse_moe.experts.53.w3", "model.layers.58.block_sparse_moe.experts.54.w3", "model.layers.58.block_sparse_moe.experts.55.w3", "model.layers.58.block_sparse_moe.experts.56.w3", "model.layers.58.block_sparse_moe.experts.57.w3", "model.layers.58.block_sparse_moe.experts.58.w3", "model.layers.58.block_sparse_moe.experts.59.w3", "model.layers.58.block_sparse_moe.experts.60.w3", "model.layers.58.block_sparse_moe.experts.61.w3", "model.layers.58.block_sparse_moe.experts.62.w3", "model.layers.58.block_sparse_moe.experts.63.w3", "model.layers.58.block_sparse_moe.experts.64.w3", "model.layers.58.block_sparse_moe.experts.65.w3", "model.layers.58.block_sparse_moe.experts.66.w3", "model.layers.58.block_sparse_moe.experts.67.w3", "model.layers.58.block_sparse_moe.experts.68.w3", "model.layers.58.block_sparse_moe.experts.69.w3", "model.layers.58.block_sparse_moe.experts.70.w3", "model.layers.58.block_sparse_moe.experts.71.w3", "model.layers.58.block_sparse_moe.experts.72.w3", "model.layers.58.block_sparse_moe.experts.73.w3", "model.layers.58.block_sparse_moe.experts.74.w3", "model.layers.58.block_sparse_moe.experts.75.w3", "model.layers.58.block_sparse_moe.experts.76.w3", "model.layers.58.block_sparse_moe.experts.77.w3", "model.layers.58.block_sparse_moe.experts.78.w3", "model.layers.58.block_sparse_moe.experts.79.w3", "model.layers.58.block_sparse_moe.experts.80.w3", "model.layers.58.block_sparse_moe.experts.81.w3", "model.layers.58.block_sparse_moe.experts.82.w3", "model.layers.58.block_sparse_moe.experts.83.w3", "model.layers.58.block_sparse_moe.experts.84.w3", "model.layers.58.block_sparse_moe.experts.85.w3", "model.layers.58.block_sparse_moe.experts.86.w3", "model.layers.58.block_sparse_moe.experts.87.w3", "model.layers.58.block_sparse_moe.experts.88.w3", "model.layers.58.block_sparse_moe.experts.89.w3", "model.layers.58.block_sparse_moe.experts.90.w3", "model.layers.58.block_sparse_moe.experts.91.w3", "model.layers.58.block_sparse_moe.experts.92.w3", "model.layers.58.block_sparse_moe.experts.93.w3", "model.layers.58.block_sparse_moe.experts.94.w3", "model.layers.58.block_sparse_moe.experts.95.w3", "model.layers.58.block_sparse_moe.experts.96.w3", "model.layers.58.block_sparse_moe.experts.97.w3", "model.layers.58.block_sparse_moe.experts.98.w3", "model.layers.58.block_sparse_moe.experts.99.w3", "model.layers.58.block_sparse_moe.experts.100.w3", "model.layers.58.block_sparse_moe.experts.101.w3", "model.layers.58.block_sparse_moe.experts.102.w3", "model.layers.58.block_sparse_moe.experts.103.w3", "model.layers.58.block_sparse_moe.experts.104.w3", "model.layers.58.block_sparse_moe.experts.105.w3", "model.layers.58.block_sparse_moe.experts.106.w3", "model.layers.58.block_sparse_moe.experts.107.w3", "model.layers.58.block_sparse_moe.experts.108.w3", "model.layers.58.block_sparse_moe.experts.109.w3", "model.layers.58.block_sparse_moe.experts.110.w3", "model.layers.58.block_sparse_moe.experts.111.w3", "model.layers.58.block_sparse_moe.experts.112.w3", "model.layers.58.block_sparse_moe.experts.113.w3", "model.layers.58.block_sparse_moe.experts.114.w3", "model.layers.58.block_sparse_moe.experts.115.w3", "model.layers.58.block_sparse_moe.experts.116.w3", "model.layers.58.block_sparse_moe.experts.117.w3", "model.layers.58.block_sparse_moe.experts.118.w3", "model.layers.58.block_sparse_moe.experts.119.w3", "model.layers.58.block_sparse_moe.experts.120.w3", "model.layers.58.block_sparse_moe.experts.121.w3", "model.layers.58.block_sparse_moe.experts.122.w3", "model.layers.58.block_sparse_moe.experts.123.w3", "model.layers.58.block_sparse_moe.experts.124.w3", "model.layers.58.block_sparse_moe.experts.125.w3", "model.layers.58.block_sparse_moe.experts.126.w3", "model.layers.58.block_sparse_moe.experts.127.w3", "model.layers.58.block_sparse_moe.experts.128.w3", "model.layers.58.block_sparse_moe.experts.129.w3", "model.layers.58.block_sparse_moe.experts.130.w3", "model.layers.58.block_sparse_moe.experts.131.w3", "model.layers.58.block_sparse_moe.experts.132.w3", "model.layers.58.block_sparse_moe.experts.133.w3", "model.layers.58.block_sparse_moe.experts.134.w3", "model.layers.58.block_sparse_moe.experts.135.w3", "model.layers.58.block_sparse_moe.experts.136.w3", "model.layers.58.block_sparse_moe.experts.137.w3", "model.layers.58.block_sparse_moe.experts.138.w3", "model.layers.58.block_sparse_moe.experts.139.w3", "model.layers.58.block_sparse_moe.experts.140.w3", "model.layers.58.block_sparse_moe.experts.141.w3", "model.layers.58.block_sparse_moe.experts.142.w3", "model.layers.58.block_sparse_moe.experts.143.w3", "model.layers.58.block_sparse_moe.experts.144.w3", "model.layers.58.block_sparse_moe.experts.145.w3", "model.layers.58.block_sparse_moe.experts.146.w3", "model.layers.58.block_sparse_moe.experts.147.w3", "model.layers.58.block_sparse_moe.experts.148.w3", "model.layers.58.block_sparse_moe.experts.149.w3", "model.layers.58.block_sparse_moe.experts.150.w3", "model.layers.58.block_sparse_moe.experts.151.w3", "model.layers.58.block_sparse_moe.experts.152.w3", "model.layers.58.block_sparse_moe.experts.153.w3", "model.layers.58.block_sparse_moe.experts.154.w3", "model.layers.58.block_sparse_moe.experts.155.w3", "model.layers.58.block_sparse_moe.experts.156.w3", "model.layers.58.block_sparse_moe.experts.157.w3", "model.layers.58.block_sparse_moe.experts.158.w3", "model.layers.58.block_sparse_moe.experts.159.w3", "model.layers.58.block_sparse_moe.experts.160.w3", "model.layers.58.block_sparse_moe.experts.161.w3", "model.layers.58.block_sparse_moe.experts.162.w3", "model.layers.58.block_sparse_moe.experts.163.w3", "model.layers.58.block_sparse_moe.experts.164.w3", "model.layers.58.block_sparse_moe.experts.165.w3", "model.layers.58.block_sparse_moe.experts.166.w3", "model.layers.58.block_sparse_moe.experts.167.w3", "model.layers.58.block_sparse_moe.experts.168.w3", "model.layers.58.block_sparse_moe.experts.169.w3", "model.layers.58.block_sparse_moe.experts.170.w3", "model.layers.58.block_sparse_moe.experts.171.w3", "model.layers.58.block_sparse_moe.experts.172.w3", "model.layers.58.block_sparse_moe.experts.173.w3", "model.layers.58.block_sparse_moe.experts.174.w3", "model.layers.58.block_sparse_moe.experts.175.w3", "model.layers.58.block_sparse_moe.experts.176.w3", "model.layers.58.block_sparse_moe.experts.177.w3", "model.layers.58.block_sparse_moe.experts.178.w3", "model.layers.58.block_sparse_moe.experts.179.w3", "model.layers.58.block_sparse_moe.experts.180.w3", "model.layers.58.block_sparse_moe.experts.181.w3", "model.layers.58.block_sparse_moe.experts.182.w3", "model.layers.58.block_sparse_moe.experts.183.w3", "model.layers.58.block_sparse_moe.experts.184.w3", "model.layers.58.block_sparse_moe.experts.185.w3", "model.layers.58.block_sparse_moe.experts.186.w3", "model.layers.58.block_sparse_moe.experts.187.w3", "model.layers.58.block_sparse_moe.experts.188.w3", "model.layers.58.block_sparse_moe.experts.189.w3", "model.layers.58.block_sparse_moe.experts.190.w3", "model.layers.58.block_sparse_moe.experts.191.w3", "model.layers.58.block_sparse_moe.experts.192.w3", "model.layers.58.block_sparse_moe.experts.193.w3", "model.layers.58.block_sparse_moe.experts.194.w3", "model.layers.58.block_sparse_moe.experts.195.w3", "model.layers.58.block_sparse_moe.experts.196.w3", "model.layers.58.block_sparse_moe.experts.197.w3", "model.layers.58.block_sparse_moe.experts.198.w3", "model.layers.58.block_sparse_moe.experts.199.w3", "model.layers.58.block_sparse_moe.experts.200.w3", "model.layers.58.block_sparse_moe.experts.201.w3", "model.layers.58.block_sparse_moe.experts.202.w3", "model.layers.58.block_sparse_moe.experts.203.w3", "model.layers.58.block_sparse_moe.experts.204.w3", "model.layers.58.block_sparse_moe.experts.205.w3", "model.layers.58.block_sparse_moe.experts.206.w3", "model.layers.58.block_sparse_moe.experts.207.w3", "model.layers.58.block_sparse_moe.experts.208.w3", "model.layers.58.block_sparse_moe.experts.209.w3", "model.layers.58.block_sparse_moe.experts.210.w3", "model.layers.58.block_sparse_moe.experts.211.w3", "model.layers.58.block_sparse_moe.experts.212.w3", "model.layers.58.block_sparse_moe.experts.213.w3", "model.layers.58.block_sparse_moe.experts.214.w3", "model.layers.58.block_sparse_moe.experts.215.w3", "model.layers.58.block_sparse_moe.experts.216.w3", "model.layers.58.block_sparse_moe.experts.217.w3", "model.layers.58.block_sparse_moe.experts.218.w3", "model.layers.58.block_sparse_moe.experts.219.w3", "model.layers.58.block_sparse_moe.experts.220.w3", "model.layers.58.block_sparse_moe.experts.221.w3", "model.layers.58.block_sparse_moe.experts.222.w3", "model.layers.58.block_sparse_moe.experts.223.w3", "model.layers.58.block_sparse_moe.experts.224.w3", "model.layers.58.block_sparse_moe.experts.225.w3", "model.layers.58.block_sparse_moe.experts.226.w3", "model.layers.58.block_sparse_moe.experts.227.w3", "model.layers.58.block_sparse_moe.experts.228.w3", "model.layers.58.block_sparse_moe.experts.229.w3", "model.layers.58.block_sparse_moe.experts.230.w3", "model.layers.58.block_sparse_moe.experts.231.w3", "model.layers.58.block_sparse_moe.experts.232.w3", "model.layers.58.block_sparse_moe.experts.233.w3", "model.layers.58.block_sparse_moe.experts.234.w3", "model.layers.58.block_sparse_moe.experts.235.w3", "model.layers.58.block_sparse_moe.experts.236.w3", "model.layers.58.block_sparse_moe.experts.237.w3", "model.layers.58.block_sparse_moe.experts.238.w3", "model.layers.58.block_sparse_moe.experts.239.w3", "model.layers.58.block_sparse_moe.experts.240.w3", "model.layers.58.block_sparse_moe.experts.241.w3", "model.layers.58.block_sparse_moe.experts.242.w3", "model.layers.58.block_sparse_moe.experts.243.w3", "model.layers.58.block_sparse_moe.experts.244.w3", "model.layers.58.block_sparse_moe.experts.245.w3", "model.layers.58.block_sparse_moe.experts.246.w3", "model.layers.58.block_sparse_moe.experts.247.w3", "model.layers.58.block_sparse_moe.experts.248.w3", "model.layers.58.block_sparse_moe.experts.249.w3", "model.layers.58.block_sparse_moe.experts.250.w3", "model.layers.58.block_sparse_moe.experts.251.w3", "model.layers.58.block_sparse_moe.experts.252.w3", "model.layers.58.block_sparse_moe.experts.253.w3", "model.layers.58.block_sparse_moe.experts.254.w3", "model.layers.58.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": 0.000268053263425827, "dbits": 2415919104 } ] }, { "idx": 294, "layers": [ "model.layers.58.block_sparse_moe.experts.0.w2", "model.layers.58.block_sparse_moe.experts.1.w2", "model.layers.58.block_sparse_moe.experts.2.w2", "model.layers.58.block_sparse_moe.experts.3.w2", "model.layers.58.block_sparse_moe.experts.4.w2", "model.layers.58.block_sparse_moe.experts.5.w2", "model.layers.58.block_sparse_moe.experts.6.w2", "model.layers.58.block_sparse_moe.experts.7.w2", "model.layers.58.block_sparse_moe.experts.8.w2", "model.layers.58.block_sparse_moe.experts.9.w2", "model.layers.58.block_sparse_moe.experts.10.w2", "model.layers.58.block_sparse_moe.experts.11.w2", "model.layers.58.block_sparse_moe.experts.12.w2", "model.layers.58.block_sparse_moe.experts.13.w2", "model.layers.58.block_sparse_moe.experts.14.w2", "model.layers.58.block_sparse_moe.experts.15.w2", "model.layers.58.block_sparse_moe.experts.16.w2", "model.layers.58.block_sparse_moe.experts.17.w2", "model.layers.58.block_sparse_moe.experts.18.w2", "model.layers.58.block_sparse_moe.experts.19.w2", "model.layers.58.block_sparse_moe.experts.20.w2", "model.layers.58.block_sparse_moe.experts.21.w2", "model.layers.58.block_sparse_moe.experts.22.w2", "model.layers.58.block_sparse_moe.experts.23.w2", "model.layers.58.block_sparse_moe.experts.24.w2", "model.layers.58.block_sparse_moe.experts.25.w2", "model.layers.58.block_sparse_moe.experts.26.w2", "model.layers.58.block_sparse_moe.experts.27.w2", "model.layers.58.block_sparse_moe.experts.28.w2", "model.layers.58.block_sparse_moe.experts.29.w2", "model.layers.58.block_sparse_moe.experts.30.w2", "model.layers.58.block_sparse_moe.experts.31.w2", "model.layers.58.block_sparse_moe.experts.32.w2", "model.layers.58.block_sparse_moe.experts.33.w2", "model.layers.58.block_sparse_moe.experts.34.w2", "model.layers.58.block_sparse_moe.experts.35.w2", "model.layers.58.block_sparse_moe.experts.36.w2", "model.layers.58.block_sparse_moe.experts.37.w2", "model.layers.58.block_sparse_moe.experts.38.w2", "model.layers.58.block_sparse_moe.experts.39.w2", "model.layers.58.block_sparse_moe.experts.40.w2", "model.layers.58.block_sparse_moe.experts.41.w2", "model.layers.58.block_sparse_moe.experts.42.w2", "model.layers.58.block_sparse_moe.experts.43.w2", "model.layers.58.block_sparse_moe.experts.44.w2", "model.layers.58.block_sparse_moe.experts.45.w2", "model.layers.58.block_sparse_moe.experts.46.w2", "model.layers.58.block_sparse_moe.experts.47.w2", "model.layers.58.block_sparse_moe.experts.48.w2", "model.layers.58.block_sparse_moe.experts.49.w2", "model.layers.58.block_sparse_moe.experts.50.w2", "model.layers.58.block_sparse_moe.experts.51.w2", "model.layers.58.block_sparse_moe.experts.52.w2", "model.layers.58.block_sparse_moe.experts.53.w2", "model.layers.58.block_sparse_moe.experts.54.w2", "model.layers.58.block_sparse_moe.experts.55.w2", "model.layers.58.block_sparse_moe.experts.56.w2", "model.layers.58.block_sparse_moe.experts.57.w2", "model.layers.58.block_sparse_moe.experts.58.w2", "model.layers.58.block_sparse_moe.experts.59.w2", "model.layers.58.block_sparse_moe.experts.60.w2", "model.layers.58.block_sparse_moe.experts.61.w2", "model.layers.58.block_sparse_moe.experts.62.w2", "model.layers.58.block_sparse_moe.experts.63.w2", "model.layers.58.block_sparse_moe.experts.64.w2", "model.layers.58.block_sparse_moe.experts.65.w2", "model.layers.58.block_sparse_moe.experts.66.w2", "model.layers.58.block_sparse_moe.experts.67.w2", "model.layers.58.block_sparse_moe.experts.68.w2", "model.layers.58.block_sparse_moe.experts.69.w2", "model.layers.58.block_sparse_moe.experts.70.w2", "model.layers.58.block_sparse_moe.experts.71.w2", "model.layers.58.block_sparse_moe.experts.72.w2", "model.layers.58.block_sparse_moe.experts.73.w2", "model.layers.58.block_sparse_moe.experts.74.w2", "model.layers.58.block_sparse_moe.experts.75.w2", "model.layers.58.block_sparse_moe.experts.76.w2", "model.layers.58.block_sparse_moe.experts.77.w2", "model.layers.58.block_sparse_moe.experts.78.w2", "model.layers.58.block_sparse_moe.experts.79.w2", "model.layers.58.block_sparse_moe.experts.80.w2", "model.layers.58.block_sparse_moe.experts.81.w2", "model.layers.58.block_sparse_moe.experts.82.w2", "model.layers.58.block_sparse_moe.experts.83.w2", "model.layers.58.block_sparse_moe.experts.84.w2", "model.layers.58.block_sparse_moe.experts.85.w2", "model.layers.58.block_sparse_moe.experts.86.w2", "model.layers.58.block_sparse_moe.experts.87.w2", "model.layers.58.block_sparse_moe.experts.88.w2", "model.layers.58.block_sparse_moe.experts.89.w2", "model.layers.58.block_sparse_moe.experts.90.w2", "model.layers.58.block_sparse_moe.experts.91.w2", "model.layers.58.block_sparse_moe.experts.92.w2", "model.layers.58.block_sparse_moe.experts.93.w2", "model.layers.58.block_sparse_moe.experts.94.w2", "model.layers.58.block_sparse_moe.experts.95.w2", "model.layers.58.block_sparse_moe.experts.96.w2", "model.layers.58.block_sparse_moe.experts.97.w2", "model.layers.58.block_sparse_moe.experts.98.w2", "model.layers.58.block_sparse_moe.experts.99.w2", "model.layers.58.block_sparse_moe.experts.100.w2", "model.layers.58.block_sparse_moe.experts.101.w2", "model.layers.58.block_sparse_moe.experts.102.w2", "model.layers.58.block_sparse_moe.experts.103.w2", "model.layers.58.block_sparse_moe.experts.104.w2", "model.layers.58.block_sparse_moe.experts.105.w2", "model.layers.58.block_sparse_moe.experts.106.w2", "model.layers.58.block_sparse_moe.experts.107.w2", "model.layers.58.block_sparse_moe.experts.108.w2", "model.layers.58.block_sparse_moe.experts.109.w2", "model.layers.58.block_sparse_moe.experts.110.w2", "model.layers.58.block_sparse_moe.experts.111.w2", "model.layers.58.block_sparse_moe.experts.112.w2", "model.layers.58.block_sparse_moe.experts.113.w2", "model.layers.58.block_sparse_moe.experts.114.w2", "model.layers.58.block_sparse_moe.experts.115.w2", "model.layers.58.block_sparse_moe.experts.116.w2", "model.layers.58.block_sparse_moe.experts.117.w2", "model.layers.58.block_sparse_moe.experts.118.w2", "model.layers.58.block_sparse_moe.experts.119.w2", "model.layers.58.block_sparse_moe.experts.120.w2", "model.layers.58.block_sparse_moe.experts.121.w2", "model.layers.58.block_sparse_moe.experts.122.w2", "model.layers.58.block_sparse_moe.experts.123.w2", "model.layers.58.block_sparse_moe.experts.124.w2", "model.layers.58.block_sparse_moe.experts.125.w2", "model.layers.58.block_sparse_moe.experts.126.w2", "model.layers.58.block_sparse_moe.experts.127.w2", "model.layers.58.block_sparse_moe.experts.128.w2", "model.layers.58.block_sparse_moe.experts.129.w2", "model.layers.58.block_sparse_moe.experts.130.w2", "model.layers.58.block_sparse_moe.experts.131.w2", "model.layers.58.block_sparse_moe.experts.132.w2", "model.layers.58.block_sparse_moe.experts.133.w2", "model.layers.58.block_sparse_moe.experts.134.w2", "model.layers.58.block_sparse_moe.experts.135.w2", "model.layers.58.block_sparse_moe.experts.136.w2", "model.layers.58.block_sparse_moe.experts.137.w2", "model.layers.58.block_sparse_moe.experts.138.w2", "model.layers.58.block_sparse_moe.experts.139.w2", "model.layers.58.block_sparse_moe.experts.140.w2", "model.layers.58.block_sparse_moe.experts.141.w2", "model.layers.58.block_sparse_moe.experts.142.w2", "model.layers.58.block_sparse_moe.experts.143.w2", "model.layers.58.block_sparse_moe.experts.144.w2", "model.layers.58.block_sparse_moe.experts.145.w2", "model.layers.58.block_sparse_moe.experts.146.w2", "model.layers.58.block_sparse_moe.experts.147.w2", "model.layers.58.block_sparse_moe.experts.148.w2", "model.layers.58.block_sparse_moe.experts.149.w2", "model.layers.58.block_sparse_moe.experts.150.w2", "model.layers.58.block_sparse_moe.experts.151.w2", "model.layers.58.block_sparse_moe.experts.152.w2", "model.layers.58.block_sparse_moe.experts.153.w2", "model.layers.58.block_sparse_moe.experts.154.w2", "model.layers.58.block_sparse_moe.experts.155.w2", "model.layers.58.block_sparse_moe.experts.156.w2", "model.layers.58.block_sparse_moe.experts.157.w2", "model.layers.58.block_sparse_moe.experts.158.w2", "model.layers.58.block_sparse_moe.experts.159.w2", "model.layers.58.block_sparse_moe.experts.160.w2", "model.layers.58.block_sparse_moe.experts.161.w2", "model.layers.58.block_sparse_moe.experts.162.w2", "model.layers.58.block_sparse_moe.experts.163.w2", "model.layers.58.block_sparse_moe.experts.164.w2", "model.layers.58.block_sparse_moe.experts.165.w2", "model.layers.58.block_sparse_moe.experts.166.w2", "model.layers.58.block_sparse_moe.experts.167.w2", "model.layers.58.block_sparse_moe.experts.168.w2", "model.layers.58.block_sparse_moe.experts.169.w2", "model.layers.58.block_sparse_moe.experts.170.w2", "model.layers.58.block_sparse_moe.experts.171.w2", "model.layers.58.block_sparse_moe.experts.172.w2", "model.layers.58.block_sparse_moe.experts.173.w2", "model.layers.58.block_sparse_moe.experts.174.w2", "model.layers.58.block_sparse_moe.experts.175.w2", "model.layers.58.block_sparse_moe.experts.176.w2", "model.layers.58.block_sparse_moe.experts.177.w2", "model.layers.58.block_sparse_moe.experts.178.w2", "model.layers.58.block_sparse_moe.experts.179.w2", "model.layers.58.block_sparse_moe.experts.180.w2", "model.layers.58.block_sparse_moe.experts.181.w2", "model.layers.58.block_sparse_moe.experts.182.w2", "model.layers.58.block_sparse_moe.experts.183.w2", "model.layers.58.block_sparse_moe.experts.184.w2", "model.layers.58.block_sparse_moe.experts.185.w2", "model.layers.58.block_sparse_moe.experts.186.w2", "model.layers.58.block_sparse_moe.experts.187.w2", "model.layers.58.block_sparse_moe.experts.188.w2", "model.layers.58.block_sparse_moe.experts.189.w2", "model.layers.58.block_sparse_moe.experts.190.w2", "model.layers.58.block_sparse_moe.experts.191.w2", "model.layers.58.block_sparse_moe.experts.192.w2", "model.layers.58.block_sparse_moe.experts.193.w2", "model.layers.58.block_sparse_moe.experts.194.w2", "model.layers.58.block_sparse_moe.experts.195.w2", "model.layers.58.block_sparse_moe.experts.196.w2", "model.layers.58.block_sparse_moe.experts.197.w2", "model.layers.58.block_sparse_moe.experts.198.w2", "model.layers.58.block_sparse_moe.experts.199.w2", "model.layers.58.block_sparse_moe.experts.200.w2", "model.layers.58.block_sparse_moe.experts.201.w2", "model.layers.58.block_sparse_moe.experts.202.w2", "model.layers.58.block_sparse_moe.experts.203.w2", "model.layers.58.block_sparse_moe.experts.204.w2", "model.layers.58.block_sparse_moe.experts.205.w2", "model.layers.58.block_sparse_moe.experts.206.w2", "model.layers.58.block_sparse_moe.experts.207.w2", "model.layers.58.block_sparse_moe.experts.208.w2", "model.layers.58.block_sparse_moe.experts.209.w2", "model.layers.58.block_sparse_moe.experts.210.w2", "model.layers.58.block_sparse_moe.experts.211.w2", "model.layers.58.block_sparse_moe.experts.212.w2", "model.layers.58.block_sparse_moe.experts.213.w2", "model.layers.58.block_sparse_moe.experts.214.w2", "model.layers.58.block_sparse_moe.experts.215.w2", "model.layers.58.block_sparse_moe.experts.216.w2", "model.layers.58.block_sparse_moe.experts.217.w2", "model.layers.58.block_sparse_moe.experts.218.w2", "model.layers.58.block_sparse_moe.experts.219.w2", "model.layers.58.block_sparse_moe.experts.220.w2", "model.layers.58.block_sparse_moe.experts.221.w2", "model.layers.58.block_sparse_moe.experts.222.w2", "model.layers.58.block_sparse_moe.experts.223.w2", "model.layers.58.block_sparse_moe.experts.224.w2", "model.layers.58.block_sparse_moe.experts.225.w2", "model.layers.58.block_sparse_moe.experts.226.w2", "model.layers.58.block_sparse_moe.experts.227.w2", "model.layers.58.block_sparse_moe.experts.228.w2", "model.layers.58.block_sparse_moe.experts.229.w2", "model.layers.58.block_sparse_moe.experts.230.w2", "model.layers.58.block_sparse_moe.experts.231.w2", "model.layers.58.block_sparse_moe.experts.232.w2", "model.layers.58.block_sparse_moe.experts.233.w2", "model.layers.58.block_sparse_moe.experts.234.w2", "model.layers.58.block_sparse_moe.experts.235.w2", "model.layers.58.block_sparse_moe.experts.236.w2", "model.layers.58.block_sparse_moe.experts.237.w2", "model.layers.58.block_sparse_moe.experts.238.w2", "model.layers.58.block_sparse_moe.experts.239.w2", "model.layers.58.block_sparse_moe.experts.240.w2", "model.layers.58.block_sparse_moe.experts.241.w2", "model.layers.58.block_sparse_moe.experts.242.w2", "model.layers.58.block_sparse_moe.experts.243.w2", "model.layers.58.block_sparse_moe.experts.244.w2", "model.layers.58.block_sparse_moe.experts.245.w2", "model.layers.58.block_sparse_moe.experts.246.w2", "model.layers.58.block_sparse_moe.experts.247.w2", "model.layers.58.block_sparse_moe.experts.248.w2", "model.layers.58.block_sparse_moe.experts.249.w2", "model.layers.58.block_sparse_moe.experts.250.w2", "model.layers.58.block_sparse_moe.experts.251.w2", "model.layers.58.block_sparse_moe.experts.252.w2", "model.layers.58.block_sparse_moe.experts.253.w2", "model.layers.58.block_sparse_moe.experts.254.w2", "model.layers.58.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 4.064403474329792e-05, "dbits": 1207959552 } ] }, { "idx": 295, "layers": [ "model.layers.59.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0001753501594066731, "dbits": 18874368 } ] }, { "idx": 296, "layers": [ "model.layers.59.self_attn.k_proj", "model.layers.59.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0006388753652572521, "dbits": 6291456 } ] }, { "idx": 297, "layers": [ "model.layers.59.self_attn.o_proj" ], "candidates": [ { "dkld": 0.001108071580529224, "dbits": 18874368 } ] }, { "idx": 298, "layers": [ "model.layers.59.block_sparse_moe.experts.0.w1", "model.layers.59.block_sparse_moe.experts.1.w1", "model.layers.59.block_sparse_moe.experts.2.w1", "model.layers.59.block_sparse_moe.experts.3.w1", "model.layers.59.block_sparse_moe.experts.4.w1", "model.layers.59.block_sparse_moe.experts.5.w1", "model.layers.59.block_sparse_moe.experts.6.w1", "model.layers.59.block_sparse_moe.experts.7.w1", "model.layers.59.block_sparse_moe.experts.8.w1", "model.layers.59.block_sparse_moe.experts.9.w1", "model.layers.59.block_sparse_moe.experts.10.w1", "model.layers.59.block_sparse_moe.experts.11.w1", "model.layers.59.block_sparse_moe.experts.12.w1", "model.layers.59.block_sparse_moe.experts.13.w1", "model.layers.59.block_sparse_moe.experts.14.w1", "model.layers.59.block_sparse_moe.experts.15.w1", "model.layers.59.block_sparse_moe.experts.16.w1", "model.layers.59.block_sparse_moe.experts.17.w1", "model.layers.59.block_sparse_moe.experts.18.w1", "model.layers.59.block_sparse_moe.experts.19.w1", "model.layers.59.block_sparse_moe.experts.20.w1", "model.layers.59.block_sparse_moe.experts.21.w1", "model.layers.59.block_sparse_moe.experts.22.w1", "model.layers.59.block_sparse_moe.experts.23.w1", "model.layers.59.block_sparse_moe.experts.24.w1", "model.layers.59.block_sparse_moe.experts.25.w1", "model.layers.59.block_sparse_moe.experts.26.w1", "model.layers.59.block_sparse_moe.experts.27.w1", "model.layers.59.block_sparse_moe.experts.28.w1", "model.layers.59.block_sparse_moe.experts.29.w1", "model.layers.59.block_sparse_moe.experts.30.w1", "model.layers.59.block_sparse_moe.experts.31.w1", "model.layers.59.block_sparse_moe.experts.32.w1", "model.layers.59.block_sparse_moe.experts.33.w1", "model.layers.59.block_sparse_moe.experts.34.w1", "model.layers.59.block_sparse_moe.experts.35.w1", "model.layers.59.block_sparse_moe.experts.36.w1", "model.layers.59.block_sparse_moe.experts.37.w1", "model.layers.59.block_sparse_moe.experts.38.w1", "model.layers.59.block_sparse_moe.experts.39.w1", "model.layers.59.block_sparse_moe.experts.40.w1", "model.layers.59.block_sparse_moe.experts.41.w1", "model.layers.59.block_sparse_moe.experts.42.w1", "model.layers.59.block_sparse_moe.experts.43.w1", "model.layers.59.block_sparse_moe.experts.44.w1", "model.layers.59.block_sparse_moe.experts.45.w1", "model.layers.59.block_sparse_moe.experts.46.w1", "model.layers.59.block_sparse_moe.experts.47.w1", "model.layers.59.block_sparse_moe.experts.48.w1", "model.layers.59.block_sparse_moe.experts.49.w1", "model.layers.59.block_sparse_moe.experts.50.w1", "model.layers.59.block_sparse_moe.experts.51.w1", "model.layers.59.block_sparse_moe.experts.52.w1", "model.layers.59.block_sparse_moe.experts.53.w1", "model.layers.59.block_sparse_moe.experts.54.w1", "model.layers.59.block_sparse_moe.experts.55.w1", "model.layers.59.block_sparse_moe.experts.56.w1", "model.layers.59.block_sparse_moe.experts.57.w1", "model.layers.59.block_sparse_moe.experts.58.w1", "model.layers.59.block_sparse_moe.experts.59.w1", "model.layers.59.block_sparse_moe.experts.60.w1", "model.layers.59.block_sparse_moe.experts.61.w1", "model.layers.59.block_sparse_moe.experts.62.w1", "model.layers.59.block_sparse_moe.experts.63.w1", "model.layers.59.block_sparse_moe.experts.64.w1", "model.layers.59.block_sparse_moe.experts.65.w1", "model.layers.59.block_sparse_moe.experts.66.w1", "model.layers.59.block_sparse_moe.experts.67.w1", "model.layers.59.block_sparse_moe.experts.68.w1", "model.layers.59.block_sparse_moe.experts.69.w1", "model.layers.59.block_sparse_moe.experts.70.w1", "model.layers.59.block_sparse_moe.experts.71.w1", "model.layers.59.block_sparse_moe.experts.72.w1", "model.layers.59.block_sparse_moe.experts.73.w1", "model.layers.59.block_sparse_moe.experts.74.w1", "model.layers.59.block_sparse_moe.experts.75.w1", "model.layers.59.block_sparse_moe.experts.76.w1", "model.layers.59.block_sparse_moe.experts.77.w1", "model.layers.59.block_sparse_moe.experts.78.w1", "model.layers.59.block_sparse_moe.experts.79.w1", "model.layers.59.block_sparse_moe.experts.80.w1", "model.layers.59.block_sparse_moe.experts.81.w1", "model.layers.59.block_sparse_moe.experts.82.w1", "model.layers.59.block_sparse_moe.experts.83.w1", "model.layers.59.block_sparse_moe.experts.84.w1", "model.layers.59.block_sparse_moe.experts.85.w1", "model.layers.59.block_sparse_moe.experts.86.w1", "model.layers.59.block_sparse_moe.experts.87.w1", "model.layers.59.block_sparse_moe.experts.88.w1", "model.layers.59.block_sparse_moe.experts.89.w1", "model.layers.59.block_sparse_moe.experts.90.w1", "model.layers.59.block_sparse_moe.experts.91.w1", "model.layers.59.block_sparse_moe.experts.92.w1", "model.layers.59.block_sparse_moe.experts.93.w1", "model.layers.59.block_sparse_moe.experts.94.w1", "model.layers.59.block_sparse_moe.experts.95.w1", "model.layers.59.block_sparse_moe.experts.96.w1", "model.layers.59.block_sparse_moe.experts.97.w1", "model.layers.59.block_sparse_moe.experts.98.w1", "model.layers.59.block_sparse_moe.experts.99.w1", "model.layers.59.block_sparse_moe.experts.100.w1", "model.layers.59.block_sparse_moe.experts.101.w1", "model.layers.59.block_sparse_moe.experts.102.w1", "model.layers.59.block_sparse_moe.experts.103.w1", "model.layers.59.block_sparse_moe.experts.104.w1", "model.layers.59.block_sparse_moe.experts.105.w1", "model.layers.59.block_sparse_moe.experts.106.w1", "model.layers.59.block_sparse_moe.experts.107.w1", "model.layers.59.block_sparse_moe.experts.108.w1", "model.layers.59.block_sparse_moe.experts.109.w1", "model.layers.59.block_sparse_moe.experts.110.w1", "model.layers.59.block_sparse_moe.experts.111.w1", "model.layers.59.block_sparse_moe.experts.112.w1", "model.layers.59.block_sparse_moe.experts.113.w1", "model.layers.59.block_sparse_moe.experts.114.w1", "model.layers.59.block_sparse_moe.experts.115.w1", "model.layers.59.block_sparse_moe.experts.116.w1", "model.layers.59.block_sparse_moe.experts.117.w1", "model.layers.59.block_sparse_moe.experts.118.w1", "model.layers.59.block_sparse_moe.experts.119.w1", "model.layers.59.block_sparse_moe.experts.120.w1", "model.layers.59.block_sparse_moe.experts.121.w1", "model.layers.59.block_sparse_moe.experts.122.w1", "model.layers.59.block_sparse_moe.experts.123.w1", "model.layers.59.block_sparse_moe.experts.124.w1", "model.layers.59.block_sparse_moe.experts.125.w1", "model.layers.59.block_sparse_moe.experts.126.w1", "model.layers.59.block_sparse_moe.experts.127.w1", "model.layers.59.block_sparse_moe.experts.128.w1", "model.layers.59.block_sparse_moe.experts.129.w1", "model.layers.59.block_sparse_moe.experts.130.w1", "model.layers.59.block_sparse_moe.experts.131.w1", "model.layers.59.block_sparse_moe.experts.132.w1", "model.layers.59.block_sparse_moe.experts.133.w1", "model.layers.59.block_sparse_moe.experts.134.w1", "model.layers.59.block_sparse_moe.experts.135.w1", "model.layers.59.block_sparse_moe.experts.136.w1", "model.layers.59.block_sparse_moe.experts.137.w1", "model.layers.59.block_sparse_moe.experts.138.w1", "model.layers.59.block_sparse_moe.experts.139.w1", "model.layers.59.block_sparse_moe.experts.140.w1", "model.layers.59.block_sparse_moe.experts.141.w1", "model.layers.59.block_sparse_moe.experts.142.w1", "model.layers.59.block_sparse_moe.experts.143.w1", "model.layers.59.block_sparse_moe.experts.144.w1", "model.layers.59.block_sparse_moe.experts.145.w1", "model.layers.59.block_sparse_moe.experts.146.w1", "model.layers.59.block_sparse_moe.experts.147.w1", "model.layers.59.block_sparse_moe.experts.148.w1", "model.layers.59.block_sparse_moe.experts.149.w1", "model.layers.59.block_sparse_moe.experts.150.w1", "model.layers.59.block_sparse_moe.experts.151.w1", "model.layers.59.block_sparse_moe.experts.152.w1", "model.layers.59.block_sparse_moe.experts.153.w1", "model.layers.59.block_sparse_moe.experts.154.w1", "model.layers.59.block_sparse_moe.experts.155.w1", "model.layers.59.block_sparse_moe.experts.156.w1", "model.layers.59.block_sparse_moe.experts.157.w1", "model.layers.59.block_sparse_moe.experts.158.w1", "model.layers.59.block_sparse_moe.experts.159.w1", "model.layers.59.block_sparse_moe.experts.160.w1", "model.layers.59.block_sparse_moe.experts.161.w1", "model.layers.59.block_sparse_moe.experts.162.w1", "model.layers.59.block_sparse_moe.experts.163.w1", "model.layers.59.block_sparse_moe.experts.164.w1", "model.layers.59.block_sparse_moe.experts.165.w1", "model.layers.59.block_sparse_moe.experts.166.w1", "model.layers.59.block_sparse_moe.experts.167.w1", "model.layers.59.block_sparse_moe.experts.168.w1", "model.layers.59.block_sparse_moe.experts.169.w1", "model.layers.59.block_sparse_moe.experts.170.w1", "model.layers.59.block_sparse_moe.experts.171.w1", "model.layers.59.block_sparse_moe.experts.172.w1", "model.layers.59.block_sparse_moe.experts.173.w1", "model.layers.59.block_sparse_moe.experts.174.w1", "model.layers.59.block_sparse_moe.experts.175.w1", "model.layers.59.block_sparse_moe.experts.176.w1", "model.layers.59.block_sparse_moe.experts.177.w1", "model.layers.59.block_sparse_moe.experts.178.w1", "model.layers.59.block_sparse_moe.experts.179.w1", "model.layers.59.block_sparse_moe.experts.180.w1", "model.layers.59.block_sparse_moe.experts.181.w1", "model.layers.59.block_sparse_moe.experts.182.w1", "model.layers.59.block_sparse_moe.experts.183.w1", "model.layers.59.block_sparse_moe.experts.184.w1", "model.layers.59.block_sparse_moe.experts.185.w1", "model.layers.59.block_sparse_moe.experts.186.w1", "model.layers.59.block_sparse_moe.experts.187.w1", "model.layers.59.block_sparse_moe.experts.188.w1", "model.layers.59.block_sparse_moe.experts.189.w1", "model.layers.59.block_sparse_moe.experts.190.w1", "model.layers.59.block_sparse_moe.experts.191.w1", "model.layers.59.block_sparse_moe.experts.192.w1", "model.layers.59.block_sparse_moe.experts.193.w1", "model.layers.59.block_sparse_moe.experts.194.w1", "model.layers.59.block_sparse_moe.experts.195.w1", "model.layers.59.block_sparse_moe.experts.196.w1", "model.layers.59.block_sparse_moe.experts.197.w1", "model.layers.59.block_sparse_moe.experts.198.w1", "model.layers.59.block_sparse_moe.experts.199.w1", "model.layers.59.block_sparse_moe.experts.200.w1", "model.layers.59.block_sparse_moe.experts.201.w1", "model.layers.59.block_sparse_moe.experts.202.w1", "model.layers.59.block_sparse_moe.experts.203.w1", "model.layers.59.block_sparse_moe.experts.204.w1", "model.layers.59.block_sparse_moe.experts.205.w1", "model.layers.59.block_sparse_moe.experts.206.w1", "model.layers.59.block_sparse_moe.experts.207.w1", "model.layers.59.block_sparse_moe.experts.208.w1", "model.layers.59.block_sparse_moe.experts.209.w1", "model.layers.59.block_sparse_moe.experts.210.w1", "model.layers.59.block_sparse_moe.experts.211.w1", "model.layers.59.block_sparse_moe.experts.212.w1", "model.layers.59.block_sparse_moe.experts.213.w1", "model.layers.59.block_sparse_moe.experts.214.w1", "model.layers.59.block_sparse_moe.experts.215.w1", "model.layers.59.block_sparse_moe.experts.216.w1", "model.layers.59.block_sparse_moe.experts.217.w1", "model.layers.59.block_sparse_moe.experts.218.w1", "model.layers.59.block_sparse_moe.experts.219.w1", "model.layers.59.block_sparse_moe.experts.220.w1", "model.layers.59.block_sparse_moe.experts.221.w1", "model.layers.59.block_sparse_moe.experts.222.w1", "model.layers.59.block_sparse_moe.experts.223.w1", "model.layers.59.block_sparse_moe.experts.224.w1", "model.layers.59.block_sparse_moe.experts.225.w1", "model.layers.59.block_sparse_moe.experts.226.w1", "model.layers.59.block_sparse_moe.experts.227.w1", "model.layers.59.block_sparse_moe.experts.228.w1", "model.layers.59.block_sparse_moe.experts.229.w1", "model.layers.59.block_sparse_moe.experts.230.w1", "model.layers.59.block_sparse_moe.experts.231.w1", "model.layers.59.block_sparse_moe.experts.232.w1", "model.layers.59.block_sparse_moe.experts.233.w1", "model.layers.59.block_sparse_moe.experts.234.w1", "model.layers.59.block_sparse_moe.experts.235.w1", "model.layers.59.block_sparse_moe.experts.236.w1", "model.layers.59.block_sparse_moe.experts.237.w1", "model.layers.59.block_sparse_moe.experts.238.w1", "model.layers.59.block_sparse_moe.experts.239.w1", "model.layers.59.block_sparse_moe.experts.240.w1", "model.layers.59.block_sparse_moe.experts.241.w1", "model.layers.59.block_sparse_moe.experts.242.w1", "model.layers.59.block_sparse_moe.experts.243.w1", "model.layers.59.block_sparse_moe.experts.244.w1", "model.layers.59.block_sparse_moe.experts.245.w1", "model.layers.59.block_sparse_moe.experts.246.w1", "model.layers.59.block_sparse_moe.experts.247.w1", "model.layers.59.block_sparse_moe.experts.248.w1", "model.layers.59.block_sparse_moe.experts.249.w1", "model.layers.59.block_sparse_moe.experts.250.w1", "model.layers.59.block_sparse_moe.experts.251.w1", "model.layers.59.block_sparse_moe.experts.252.w1", "model.layers.59.block_sparse_moe.experts.253.w1", "model.layers.59.block_sparse_moe.experts.254.w1", "model.layers.59.block_sparse_moe.experts.255.w1", "model.layers.59.block_sparse_moe.experts.0.w3", "model.layers.59.block_sparse_moe.experts.1.w3", "model.layers.59.block_sparse_moe.experts.2.w3", "model.layers.59.block_sparse_moe.experts.3.w3", "model.layers.59.block_sparse_moe.experts.4.w3", "model.layers.59.block_sparse_moe.experts.5.w3", "model.layers.59.block_sparse_moe.experts.6.w3", "model.layers.59.block_sparse_moe.experts.7.w3", "model.layers.59.block_sparse_moe.experts.8.w3", "model.layers.59.block_sparse_moe.experts.9.w3", "model.layers.59.block_sparse_moe.experts.10.w3", "model.layers.59.block_sparse_moe.experts.11.w3", "model.layers.59.block_sparse_moe.experts.12.w3", "model.layers.59.block_sparse_moe.experts.13.w3", "model.layers.59.block_sparse_moe.experts.14.w3", "model.layers.59.block_sparse_moe.experts.15.w3", "model.layers.59.block_sparse_moe.experts.16.w3", "model.layers.59.block_sparse_moe.experts.17.w3", "model.layers.59.block_sparse_moe.experts.18.w3", "model.layers.59.block_sparse_moe.experts.19.w3", "model.layers.59.block_sparse_moe.experts.20.w3", "model.layers.59.block_sparse_moe.experts.21.w3", "model.layers.59.block_sparse_moe.experts.22.w3", "model.layers.59.block_sparse_moe.experts.23.w3", "model.layers.59.block_sparse_moe.experts.24.w3", "model.layers.59.block_sparse_moe.experts.25.w3", "model.layers.59.block_sparse_moe.experts.26.w3", "model.layers.59.block_sparse_moe.experts.27.w3", "model.layers.59.block_sparse_moe.experts.28.w3", "model.layers.59.block_sparse_moe.experts.29.w3", "model.layers.59.block_sparse_moe.experts.30.w3", "model.layers.59.block_sparse_moe.experts.31.w3", "model.layers.59.block_sparse_moe.experts.32.w3", "model.layers.59.block_sparse_moe.experts.33.w3", "model.layers.59.block_sparse_moe.experts.34.w3", "model.layers.59.block_sparse_moe.experts.35.w3", "model.layers.59.block_sparse_moe.experts.36.w3", "model.layers.59.block_sparse_moe.experts.37.w3", "model.layers.59.block_sparse_moe.experts.38.w3", "model.layers.59.block_sparse_moe.experts.39.w3", "model.layers.59.block_sparse_moe.experts.40.w3", "model.layers.59.block_sparse_moe.experts.41.w3", "model.layers.59.block_sparse_moe.experts.42.w3", "model.layers.59.block_sparse_moe.experts.43.w3", "model.layers.59.block_sparse_moe.experts.44.w3", "model.layers.59.block_sparse_moe.experts.45.w3", "model.layers.59.block_sparse_moe.experts.46.w3", "model.layers.59.block_sparse_moe.experts.47.w3", "model.layers.59.block_sparse_moe.experts.48.w3", "model.layers.59.block_sparse_moe.experts.49.w3", "model.layers.59.block_sparse_moe.experts.50.w3", "model.layers.59.block_sparse_moe.experts.51.w3", "model.layers.59.block_sparse_moe.experts.52.w3", "model.layers.59.block_sparse_moe.experts.53.w3", "model.layers.59.block_sparse_moe.experts.54.w3", "model.layers.59.block_sparse_moe.experts.55.w3", "model.layers.59.block_sparse_moe.experts.56.w3", "model.layers.59.block_sparse_moe.experts.57.w3", "model.layers.59.block_sparse_moe.experts.58.w3", "model.layers.59.block_sparse_moe.experts.59.w3", "model.layers.59.block_sparse_moe.experts.60.w3", "model.layers.59.block_sparse_moe.experts.61.w3", "model.layers.59.block_sparse_moe.experts.62.w3", "model.layers.59.block_sparse_moe.experts.63.w3", "model.layers.59.block_sparse_moe.experts.64.w3", "model.layers.59.block_sparse_moe.experts.65.w3", "model.layers.59.block_sparse_moe.experts.66.w3", "model.layers.59.block_sparse_moe.experts.67.w3", "model.layers.59.block_sparse_moe.experts.68.w3", "model.layers.59.block_sparse_moe.experts.69.w3", "model.layers.59.block_sparse_moe.experts.70.w3", "model.layers.59.block_sparse_moe.experts.71.w3", "model.layers.59.block_sparse_moe.experts.72.w3", "model.layers.59.block_sparse_moe.experts.73.w3", "model.layers.59.block_sparse_moe.experts.74.w3", "model.layers.59.block_sparse_moe.experts.75.w3", "model.layers.59.block_sparse_moe.experts.76.w3", "model.layers.59.block_sparse_moe.experts.77.w3", "model.layers.59.block_sparse_moe.experts.78.w3", "model.layers.59.block_sparse_moe.experts.79.w3", "model.layers.59.block_sparse_moe.experts.80.w3", "model.layers.59.block_sparse_moe.experts.81.w3", "model.layers.59.block_sparse_moe.experts.82.w3", "model.layers.59.block_sparse_moe.experts.83.w3", "model.layers.59.block_sparse_moe.experts.84.w3", "model.layers.59.block_sparse_moe.experts.85.w3", "model.layers.59.block_sparse_moe.experts.86.w3", "model.layers.59.block_sparse_moe.experts.87.w3", "model.layers.59.block_sparse_moe.experts.88.w3", "model.layers.59.block_sparse_moe.experts.89.w3", "model.layers.59.block_sparse_moe.experts.90.w3", "model.layers.59.block_sparse_moe.experts.91.w3", "model.layers.59.block_sparse_moe.experts.92.w3", "model.layers.59.block_sparse_moe.experts.93.w3", "model.layers.59.block_sparse_moe.experts.94.w3", "model.layers.59.block_sparse_moe.experts.95.w3", "model.layers.59.block_sparse_moe.experts.96.w3", "model.layers.59.block_sparse_moe.experts.97.w3", "model.layers.59.block_sparse_moe.experts.98.w3", "model.layers.59.block_sparse_moe.experts.99.w3", "model.layers.59.block_sparse_moe.experts.100.w3", "model.layers.59.block_sparse_moe.experts.101.w3", "model.layers.59.block_sparse_moe.experts.102.w3", "model.layers.59.block_sparse_moe.experts.103.w3", "model.layers.59.block_sparse_moe.experts.104.w3", "model.layers.59.block_sparse_moe.experts.105.w3", "model.layers.59.block_sparse_moe.experts.106.w3", "model.layers.59.block_sparse_moe.experts.107.w3", "model.layers.59.block_sparse_moe.experts.108.w3", "model.layers.59.block_sparse_moe.experts.109.w3", "model.layers.59.block_sparse_moe.experts.110.w3", "model.layers.59.block_sparse_moe.experts.111.w3", "model.layers.59.block_sparse_moe.experts.112.w3", "model.layers.59.block_sparse_moe.experts.113.w3", "model.layers.59.block_sparse_moe.experts.114.w3", "model.layers.59.block_sparse_moe.experts.115.w3", "model.layers.59.block_sparse_moe.experts.116.w3", "model.layers.59.block_sparse_moe.experts.117.w3", "model.layers.59.block_sparse_moe.experts.118.w3", "model.layers.59.block_sparse_moe.experts.119.w3", "model.layers.59.block_sparse_moe.experts.120.w3", "model.layers.59.block_sparse_moe.experts.121.w3", "model.layers.59.block_sparse_moe.experts.122.w3", "model.layers.59.block_sparse_moe.experts.123.w3", "model.layers.59.block_sparse_moe.experts.124.w3", "model.layers.59.block_sparse_moe.experts.125.w3", "model.layers.59.block_sparse_moe.experts.126.w3", "model.layers.59.block_sparse_moe.experts.127.w3", "model.layers.59.block_sparse_moe.experts.128.w3", "model.layers.59.block_sparse_moe.experts.129.w3", "model.layers.59.block_sparse_moe.experts.130.w3", "model.layers.59.block_sparse_moe.experts.131.w3", "model.layers.59.block_sparse_moe.experts.132.w3", "model.layers.59.block_sparse_moe.experts.133.w3", "model.layers.59.block_sparse_moe.experts.134.w3", "model.layers.59.block_sparse_moe.experts.135.w3", "model.layers.59.block_sparse_moe.experts.136.w3", "model.layers.59.block_sparse_moe.experts.137.w3", "model.layers.59.block_sparse_moe.experts.138.w3", "model.layers.59.block_sparse_moe.experts.139.w3", "model.layers.59.block_sparse_moe.experts.140.w3", "model.layers.59.block_sparse_moe.experts.141.w3", "model.layers.59.block_sparse_moe.experts.142.w3", "model.layers.59.block_sparse_moe.experts.143.w3", "model.layers.59.block_sparse_moe.experts.144.w3", "model.layers.59.block_sparse_moe.experts.145.w3", "model.layers.59.block_sparse_moe.experts.146.w3", "model.layers.59.block_sparse_moe.experts.147.w3", "model.layers.59.block_sparse_moe.experts.148.w3", "model.layers.59.block_sparse_moe.experts.149.w3", "model.layers.59.block_sparse_moe.experts.150.w3", "model.layers.59.block_sparse_moe.experts.151.w3", "model.layers.59.block_sparse_moe.experts.152.w3", "model.layers.59.block_sparse_moe.experts.153.w3", "model.layers.59.block_sparse_moe.experts.154.w3", "model.layers.59.block_sparse_moe.experts.155.w3", "model.layers.59.block_sparse_moe.experts.156.w3", "model.layers.59.block_sparse_moe.experts.157.w3", "model.layers.59.block_sparse_moe.experts.158.w3", "model.layers.59.block_sparse_moe.experts.159.w3", "model.layers.59.block_sparse_moe.experts.160.w3", "model.layers.59.block_sparse_moe.experts.161.w3", "model.layers.59.block_sparse_moe.experts.162.w3", "model.layers.59.block_sparse_moe.experts.163.w3", "model.layers.59.block_sparse_moe.experts.164.w3", "model.layers.59.block_sparse_moe.experts.165.w3", "model.layers.59.block_sparse_moe.experts.166.w3", "model.layers.59.block_sparse_moe.experts.167.w3", "model.layers.59.block_sparse_moe.experts.168.w3", "model.layers.59.block_sparse_moe.experts.169.w3", "model.layers.59.block_sparse_moe.experts.170.w3", "model.layers.59.block_sparse_moe.experts.171.w3", "model.layers.59.block_sparse_moe.experts.172.w3", "model.layers.59.block_sparse_moe.experts.173.w3", "model.layers.59.block_sparse_moe.experts.174.w3", "model.layers.59.block_sparse_moe.experts.175.w3", "model.layers.59.block_sparse_moe.experts.176.w3", "model.layers.59.block_sparse_moe.experts.177.w3", "model.layers.59.block_sparse_moe.experts.178.w3", "model.layers.59.block_sparse_moe.experts.179.w3", "model.layers.59.block_sparse_moe.experts.180.w3", "model.layers.59.block_sparse_moe.experts.181.w3", "model.layers.59.block_sparse_moe.experts.182.w3", "model.layers.59.block_sparse_moe.experts.183.w3", "model.layers.59.block_sparse_moe.experts.184.w3", "model.layers.59.block_sparse_moe.experts.185.w3", "model.layers.59.block_sparse_moe.experts.186.w3", "model.layers.59.block_sparse_moe.experts.187.w3", "model.layers.59.block_sparse_moe.experts.188.w3", "model.layers.59.block_sparse_moe.experts.189.w3", "model.layers.59.block_sparse_moe.experts.190.w3", "model.layers.59.block_sparse_moe.experts.191.w3", "model.layers.59.block_sparse_moe.experts.192.w3", "model.layers.59.block_sparse_moe.experts.193.w3", "model.layers.59.block_sparse_moe.experts.194.w3", "model.layers.59.block_sparse_moe.experts.195.w3", "model.layers.59.block_sparse_moe.experts.196.w3", "model.layers.59.block_sparse_moe.experts.197.w3", "model.layers.59.block_sparse_moe.experts.198.w3", "model.layers.59.block_sparse_moe.experts.199.w3", "model.layers.59.block_sparse_moe.experts.200.w3", "model.layers.59.block_sparse_moe.experts.201.w3", "model.layers.59.block_sparse_moe.experts.202.w3", "model.layers.59.block_sparse_moe.experts.203.w3", "model.layers.59.block_sparse_moe.experts.204.w3", "model.layers.59.block_sparse_moe.experts.205.w3", "model.layers.59.block_sparse_moe.experts.206.w3", "model.layers.59.block_sparse_moe.experts.207.w3", "model.layers.59.block_sparse_moe.experts.208.w3", "model.layers.59.block_sparse_moe.experts.209.w3", "model.layers.59.block_sparse_moe.experts.210.w3", "model.layers.59.block_sparse_moe.experts.211.w3", "model.layers.59.block_sparse_moe.experts.212.w3", "model.layers.59.block_sparse_moe.experts.213.w3", "model.layers.59.block_sparse_moe.experts.214.w3", "model.layers.59.block_sparse_moe.experts.215.w3", "model.layers.59.block_sparse_moe.experts.216.w3", "model.layers.59.block_sparse_moe.experts.217.w3", "model.layers.59.block_sparse_moe.experts.218.w3", "model.layers.59.block_sparse_moe.experts.219.w3", "model.layers.59.block_sparse_moe.experts.220.w3", "model.layers.59.block_sparse_moe.experts.221.w3", "model.layers.59.block_sparse_moe.experts.222.w3", "model.layers.59.block_sparse_moe.experts.223.w3", "model.layers.59.block_sparse_moe.experts.224.w3", "model.layers.59.block_sparse_moe.experts.225.w3", "model.layers.59.block_sparse_moe.experts.226.w3", "model.layers.59.block_sparse_moe.experts.227.w3", "model.layers.59.block_sparse_moe.experts.228.w3", "model.layers.59.block_sparse_moe.experts.229.w3", "model.layers.59.block_sparse_moe.experts.230.w3", "model.layers.59.block_sparse_moe.experts.231.w3", "model.layers.59.block_sparse_moe.experts.232.w3", "model.layers.59.block_sparse_moe.experts.233.w3", "model.layers.59.block_sparse_moe.experts.234.w3", "model.layers.59.block_sparse_moe.experts.235.w3", "model.layers.59.block_sparse_moe.experts.236.w3", "model.layers.59.block_sparse_moe.experts.237.w3", "model.layers.59.block_sparse_moe.experts.238.w3", "model.layers.59.block_sparse_moe.experts.239.w3", "model.layers.59.block_sparse_moe.experts.240.w3", "model.layers.59.block_sparse_moe.experts.241.w3", "model.layers.59.block_sparse_moe.experts.242.w3", "model.layers.59.block_sparse_moe.experts.243.w3", "model.layers.59.block_sparse_moe.experts.244.w3", "model.layers.59.block_sparse_moe.experts.245.w3", "model.layers.59.block_sparse_moe.experts.246.w3", "model.layers.59.block_sparse_moe.experts.247.w3", "model.layers.59.block_sparse_moe.experts.248.w3", "model.layers.59.block_sparse_moe.experts.249.w3", "model.layers.59.block_sparse_moe.experts.250.w3", "model.layers.59.block_sparse_moe.experts.251.w3", "model.layers.59.block_sparse_moe.experts.252.w3", "model.layers.59.block_sparse_moe.experts.253.w3", "model.layers.59.block_sparse_moe.experts.254.w3", "model.layers.59.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.0005980573594570049, "dbits": 2415919104 } ] }, { "idx": 299, "layers": [ "model.layers.59.block_sparse_moe.experts.0.w2", "model.layers.59.block_sparse_moe.experts.1.w2", "model.layers.59.block_sparse_moe.experts.2.w2", "model.layers.59.block_sparse_moe.experts.3.w2", "model.layers.59.block_sparse_moe.experts.4.w2", "model.layers.59.block_sparse_moe.experts.5.w2", "model.layers.59.block_sparse_moe.experts.6.w2", "model.layers.59.block_sparse_moe.experts.7.w2", "model.layers.59.block_sparse_moe.experts.8.w2", "model.layers.59.block_sparse_moe.experts.9.w2", "model.layers.59.block_sparse_moe.experts.10.w2", "model.layers.59.block_sparse_moe.experts.11.w2", "model.layers.59.block_sparse_moe.experts.12.w2", "model.layers.59.block_sparse_moe.experts.13.w2", "model.layers.59.block_sparse_moe.experts.14.w2", "model.layers.59.block_sparse_moe.experts.15.w2", "model.layers.59.block_sparse_moe.experts.16.w2", "model.layers.59.block_sparse_moe.experts.17.w2", "model.layers.59.block_sparse_moe.experts.18.w2", "model.layers.59.block_sparse_moe.experts.19.w2", "model.layers.59.block_sparse_moe.experts.20.w2", "model.layers.59.block_sparse_moe.experts.21.w2", "model.layers.59.block_sparse_moe.experts.22.w2", "model.layers.59.block_sparse_moe.experts.23.w2", "model.layers.59.block_sparse_moe.experts.24.w2", "model.layers.59.block_sparse_moe.experts.25.w2", "model.layers.59.block_sparse_moe.experts.26.w2", "model.layers.59.block_sparse_moe.experts.27.w2", "model.layers.59.block_sparse_moe.experts.28.w2", "model.layers.59.block_sparse_moe.experts.29.w2", "model.layers.59.block_sparse_moe.experts.30.w2", "model.layers.59.block_sparse_moe.experts.31.w2", "model.layers.59.block_sparse_moe.experts.32.w2", "model.layers.59.block_sparse_moe.experts.33.w2", "model.layers.59.block_sparse_moe.experts.34.w2", "model.layers.59.block_sparse_moe.experts.35.w2", "model.layers.59.block_sparse_moe.experts.36.w2", "model.layers.59.block_sparse_moe.experts.37.w2", "model.layers.59.block_sparse_moe.experts.38.w2", "model.layers.59.block_sparse_moe.experts.39.w2", "model.layers.59.block_sparse_moe.experts.40.w2", "model.layers.59.block_sparse_moe.experts.41.w2", "model.layers.59.block_sparse_moe.experts.42.w2", "model.layers.59.block_sparse_moe.experts.43.w2", "model.layers.59.block_sparse_moe.experts.44.w2", "model.layers.59.block_sparse_moe.experts.45.w2", "model.layers.59.block_sparse_moe.experts.46.w2", "model.layers.59.block_sparse_moe.experts.47.w2", "model.layers.59.block_sparse_moe.experts.48.w2", "model.layers.59.block_sparse_moe.experts.49.w2", "model.layers.59.block_sparse_moe.experts.50.w2", "model.layers.59.block_sparse_moe.experts.51.w2", "model.layers.59.block_sparse_moe.experts.52.w2", "model.layers.59.block_sparse_moe.experts.53.w2", "model.layers.59.block_sparse_moe.experts.54.w2", "model.layers.59.block_sparse_moe.experts.55.w2", "model.layers.59.block_sparse_moe.experts.56.w2", "model.layers.59.block_sparse_moe.experts.57.w2", "model.layers.59.block_sparse_moe.experts.58.w2", "model.layers.59.block_sparse_moe.experts.59.w2", "model.layers.59.block_sparse_moe.experts.60.w2", "model.layers.59.block_sparse_moe.experts.61.w2", "model.layers.59.block_sparse_moe.experts.62.w2", "model.layers.59.block_sparse_moe.experts.63.w2", "model.layers.59.block_sparse_moe.experts.64.w2", "model.layers.59.block_sparse_moe.experts.65.w2", "model.layers.59.block_sparse_moe.experts.66.w2", "model.layers.59.block_sparse_moe.experts.67.w2", "model.layers.59.block_sparse_moe.experts.68.w2", "model.layers.59.block_sparse_moe.experts.69.w2", "model.layers.59.block_sparse_moe.experts.70.w2", "model.layers.59.block_sparse_moe.experts.71.w2", "model.layers.59.block_sparse_moe.experts.72.w2", "model.layers.59.block_sparse_moe.experts.73.w2", "model.layers.59.block_sparse_moe.experts.74.w2", "model.layers.59.block_sparse_moe.experts.75.w2", "model.layers.59.block_sparse_moe.experts.76.w2", "model.layers.59.block_sparse_moe.experts.77.w2", "model.layers.59.block_sparse_moe.experts.78.w2", "model.layers.59.block_sparse_moe.experts.79.w2", "model.layers.59.block_sparse_moe.experts.80.w2", "model.layers.59.block_sparse_moe.experts.81.w2", "model.layers.59.block_sparse_moe.experts.82.w2", "model.layers.59.block_sparse_moe.experts.83.w2", "model.layers.59.block_sparse_moe.experts.84.w2", "model.layers.59.block_sparse_moe.experts.85.w2", "model.layers.59.block_sparse_moe.experts.86.w2", "model.layers.59.block_sparse_moe.experts.87.w2", "model.layers.59.block_sparse_moe.experts.88.w2", "model.layers.59.block_sparse_moe.experts.89.w2", "model.layers.59.block_sparse_moe.experts.90.w2", "model.layers.59.block_sparse_moe.experts.91.w2", "model.layers.59.block_sparse_moe.experts.92.w2", "model.layers.59.block_sparse_moe.experts.93.w2", "model.layers.59.block_sparse_moe.experts.94.w2", "model.layers.59.block_sparse_moe.experts.95.w2", "model.layers.59.block_sparse_moe.experts.96.w2", "model.layers.59.block_sparse_moe.experts.97.w2", "model.layers.59.block_sparse_moe.experts.98.w2", "model.layers.59.block_sparse_moe.experts.99.w2", "model.layers.59.block_sparse_moe.experts.100.w2", "model.layers.59.block_sparse_moe.experts.101.w2", "model.layers.59.block_sparse_moe.experts.102.w2", "model.layers.59.block_sparse_moe.experts.103.w2", "model.layers.59.block_sparse_moe.experts.104.w2", "model.layers.59.block_sparse_moe.experts.105.w2", "model.layers.59.block_sparse_moe.experts.106.w2", "model.layers.59.block_sparse_moe.experts.107.w2", "model.layers.59.block_sparse_moe.experts.108.w2", "model.layers.59.block_sparse_moe.experts.109.w2", "model.layers.59.block_sparse_moe.experts.110.w2", "model.layers.59.block_sparse_moe.experts.111.w2", "model.layers.59.block_sparse_moe.experts.112.w2", "model.layers.59.block_sparse_moe.experts.113.w2", "model.layers.59.block_sparse_moe.experts.114.w2", "model.layers.59.block_sparse_moe.experts.115.w2", "model.layers.59.block_sparse_moe.experts.116.w2", "model.layers.59.block_sparse_moe.experts.117.w2", "model.layers.59.block_sparse_moe.experts.118.w2", "model.layers.59.block_sparse_moe.experts.119.w2", "model.layers.59.block_sparse_moe.experts.120.w2", "model.layers.59.block_sparse_moe.experts.121.w2", "model.layers.59.block_sparse_moe.experts.122.w2", "model.layers.59.block_sparse_moe.experts.123.w2", "model.layers.59.block_sparse_moe.experts.124.w2", "model.layers.59.block_sparse_moe.experts.125.w2", "model.layers.59.block_sparse_moe.experts.126.w2", "model.layers.59.block_sparse_moe.experts.127.w2", "model.layers.59.block_sparse_moe.experts.128.w2", "model.layers.59.block_sparse_moe.experts.129.w2", "model.layers.59.block_sparse_moe.experts.130.w2", "model.layers.59.block_sparse_moe.experts.131.w2", "model.layers.59.block_sparse_moe.experts.132.w2", "model.layers.59.block_sparse_moe.experts.133.w2", "model.layers.59.block_sparse_moe.experts.134.w2", "model.layers.59.block_sparse_moe.experts.135.w2", "model.layers.59.block_sparse_moe.experts.136.w2", "model.layers.59.block_sparse_moe.experts.137.w2", "model.layers.59.block_sparse_moe.experts.138.w2", "model.layers.59.block_sparse_moe.experts.139.w2", "model.layers.59.block_sparse_moe.experts.140.w2", "model.layers.59.block_sparse_moe.experts.141.w2", "model.layers.59.block_sparse_moe.experts.142.w2", "model.layers.59.block_sparse_moe.experts.143.w2", "model.layers.59.block_sparse_moe.experts.144.w2", "model.layers.59.block_sparse_moe.experts.145.w2", "model.layers.59.block_sparse_moe.experts.146.w2", "model.layers.59.block_sparse_moe.experts.147.w2", "model.layers.59.block_sparse_moe.experts.148.w2", "model.layers.59.block_sparse_moe.experts.149.w2", "model.layers.59.block_sparse_moe.experts.150.w2", "model.layers.59.block_sparse_moe.experts.151.w2", "model.layers.59.block_sparse_moe.experts.152.w2", "model.layers.59.block_sparse_moe.experts.153.w2", "model.layers.59.block_sparse_moe.experts.154.w2", "model.layers.59.block_sparse_moe.experts.155.w2", "model.layers.59.block_sparse_moe.experts.156.w2", "model.layers.59.block_sparse_moe.experts.157.w2", "model.layers.59.block_sparse_moe.experts.158.w2", "model.layers.59.block_sparse_moe.experts.159.w2", "model.layers.59.block_sparse_moe.experts.160.w2", "model.layers.59.block_sparse_moe.experts.161.w2", "model.layers.59.block_sparse_moe.experts.162.w2", "model.layers.59.block_sparse_moe.experts.163.w2", "model.layers.59.block_sparse_moe.experts.164.w2", "model.layers.59.block_sparse_moe.experts.165.w2", "model.layers.59.block_sparse_moe.experts.166.w2", "model.layers.59.block_sparse_moe.experts.167.w2", "model.layers.59.block_sparse_moe.experts.168.w2", "model.layers.59.block_sparse_moe.experts.169.w2", "model.layers.59.block_sparse_moe.experts.170.w2", "model.layers.59.block_sparse_moe.experts.171.w2", "model.layers.59.block_sparse_moe.experts.172.w2", "model.layers.59.block_sparse_moe.experts.173.w2", "model.layers.59.block_sparse_moe.experts.174.w2", "model.layers.59.block_sparse_moe.experts.175.w2", "model.layers.59.block_sparse_moe.experts.176.w2", "model.layers.59.block_sparse_moe.experts.177.w2", "model.layers.59.block_sparse_moe.experts.178.w2", "model.layers.59.block_sparse_moe.experts.179.w2", "model.layers.59.block_sparse_moe.experts.180.w2", "model.layers.59.block_sparse_moe.experts.181.w2", "model.layers.59.block_sparse_moe.experts.182.w2", "model.layers.59.block_sparse_moe.experts.183.w2", "model.layers.59.block_sparse_moe.experts.184.w2", "model.layers.59.block_sparse_moe.experts.185.w2", "model.layers.59.block_sparse_moe.experts.186.w2", "model.layers.59.block_sparse_moe.experts.187.w2", "model.layers.59.block_sparse_moe.experts.188.w2", "model.layers.59.block_sparse_moe.experts.189.w2", "model.layers.59.block_sparse_moe.experts.190.w2", "model.layers.59.block_sparse_moe.experts.191.w2", "model.layers.59.block_sparse_moe.experts.192.w2", "model.layers.59.block_sparse_moe.experts.193.w2", "model.layers.59.block_sparse_moe.experts.194.w2", "model.layers.59.block_sparse_moe.experts.195.w2", "model.layers.59.block_sparse_moe.experts.196.w2", "model.layers.59.block_sparse_moe.experts.197.w2", "model.layers.59.block_sparse_moe.experts.198.w2", "model.layers.59.block_sparse_moe.experts.199.w2", "model.layers.59.block_sparse_moe.experts.200.w2", "model.layers.59.block_sparse_moe.experts.201.w2", "model.layers.59.block_sparse_moe.experts.202.w2", "model.layers.59.block_sparse_moe.experts.203.w2", "model.layers.59.block_sparse_moe.experts.204.w2", "model.layers.59.block_sparse_moe.experts.205.w2", "model.layers.59.block_sparse_moe.experts.206.w2", "model.layers.59.block_sparse_moe.experts.207.w2", "model.layers.59.block_sparse_moe.experts.208.w2", "model.layers.59.block_sparse_moe.experts.209.w2", "model.layers.59.block_sparse_moe.experts.210.w2", "model.layers.59.block_sparse_moe.experts.211.w2", "model.layers.59.block_sparse_moe.experts.212.w2", "model.layers.59.block_sparse_moe.experts.213.w2", "model.layers.59.block_sparse_moe.experts.214.w2", "model.layers.59.block_sparse_moe.experts.215.w2", "model.layers.59.block_sparse_moe.experts.216.w2", "model.layers.59.block_sparse_moe.experts.217.w2", "model.layers.59.block_sparse_moe.experts.218.w2", "model.layers.59.block_sparse_moe.experts.219.w2", "model.layers.59.block_sparse_moe.experts.220.w2", "model.layers.59.block_sparse_moe.experts.221.w2", "model.layers.59.block_sparse_moe.experts.222.w2", "model.layers.59.block_sparse_moe.experts.223.w2", "model.layers.59.block_sparse_moe.experts.224.w2", "model.layers.59.block_sparse_moe.experts.225.w2", "model.layers.59.block_sparse_moe.experts.226.w2", "model.layers.59.block_sparse_moe.experts.227.w2", "model.layers.59.block_sparse_moe.experts.228.w2", "model.layers.59.block_sparse_moe.experts.229.w2", "model.layers.59.block_sparse_moe.experts.230.w2", "model.layers.59.block_sparse_moe.experts.231.w2", "model.layers.59.block_sparse_moe.experts.232.w2", "model.layers.59.block_sparse_moe.experts.233.w2", "model.layers.59.block_sparse_moe.experts.234.w2", "model.layers.59.block_sparse_moe.experts.235.w2", "model.layers.59.block_sparse_moe.experts.236.w2", "model.layers.59.block_sparse_moe.experts.237.w2", "model.layers.59.block_sparse_moe.experts.238.w2", "model.layers.59.block_sparse_moe.experts.239.w2", "model.layers.59.block_sparse_moe.experts.240.w2", "model.layers.59.block_sparse_moe.experts.241.w2", "model.layers.59.block_sparse_moe.experts.242.w2", "model.layers.59.block_sparse_moe.experts.243.w2", "model.layers.59.block_sparse_moe.experts.244.w2", "model.layers.59.block_sparse_moe.experts.245.w2", "model.layers.59.block_sparse_moe.experts.246.w2", "model.layers.59.block_sparse_moe.experts.247.w2", "model.layers.59.block_sparse_moe.experts.248.w2", "model.layers.59.block_sparse_moe.experts.249.w2", "model.layers.59.block_sparse_moe.experts.250.w2", "model.layers.59.block_sparse_moe.experts.251.w2", "model.layers.59.block_sparse_moe.experts.252.w2", "model.layers.59.block_sparse_moe.experts.253.w2", "model.layers.59.block_sparse_moe.experts.254.w2", "model.layers.59.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -6.788037717342377e-05, "dbits": 1207959552 } ] }, { "idx": 300, "layers": [ "model.layers.60.self_attn.q_proj" ], "candidates": [ { "dkld": -0.0002852335572242626, "dbits": 18874368 } ] }, { "idx": 301, "layers": [ "model.layers.60.self_attn.k_proj", "model.layers.60.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0001527968794107215, "dbits": 6291456 } ] }, { "idx": 302, "layers": [ "model.layers.60.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0010029759258031734, "dbits": 18874368 } ] }, { "idx": 303, "layers": [ "model.layers.60.block_sparse_moe.experts.0.w1", "model.layers.60.block_sparse_moe.experts.1.w1", "model.layers.60.block_sparse_moe.experts.2.w1", "model.layers.60.block_sparse_moe.experts.3.w1", "model.layers.60.block_sparse_moe.experts.4.w1", "model.layers.60.block_sparse_moe.experts.5.w1", "model.layers.60.block_sparse_moe.experts.6.w1", "model.layers.60.block_sparse_moe.experts.7.w1", "model.layers.60.block_sparse_moe.experts.8.w1", "model.layers.60.block_sparse_moe.experts.9.w1", "model.layers.60.block_sparse_moe.experts.10.w1", "model.layers.60.block_sparse_moe.experts.11.w1", "model.layers.60.block_sparse_moe.experts.12.w1", "model.layers.60.block_sparse_moe.experts.13.w1", "model.layers.60.block_sparse_moe.experts.14.w1", "model.layers.60.block_sparse_moe.experts.15.w1", "model.layers.60.block_sparse_moe.experts.16.w1", "model.layers.60.block_sparse_moe.experts.17.w1", "model.layers.60.block_sparse_moe.experts.18.w1", "model.layers.60.block_sparse_moe.experts.19.w1", "model.layers.60.block_sparse_moe.experts.20.w1", "model.layers.60.block_sparse_moe.experts.21.w1", "model.layers.60.block_sparse_moe.experts.22.w1", "model.layers.60.block_sparse_moe.experts.23.w1", "model.layers.60.block_sparse_moe.experts.24.w1", "model.layers.60.block_sparse_moe.experts.25.w1", "model.layers.60.block_sparse_moe.experts.26.w1", "model.layers.60.block_sparse_moe.experts.27.w1", "model.layers.60.block_sparse_moe.experts.28.w1", "model.layers.60.block_sparse_moe.experts.29.w1", "model.layers.60.block_sparse_moe.experts.30.w1", "model.layers.60.block_sparse_moe.experts.31.w1", "model.layers.60.block_sparse_moe.experts.32.w1", "model.layers.60.block_sparse_moe.experts.33.w1", "model.layers.60.block_sparse_moe.experts.34.w1", "model.layers.60.block_sparse_moe.experts.35.w1", "model.layers.60.block_sparse_moe.experts.36.w1", "model.layers.60.block_sparse_moe.experts.37.w1", "model.layers.60.block_sparse_moe.experts.38.w1", "model.layers.60.block_sparse_moe.experts.39.w1", "model.layers.60.block_sparse_moe.experts.40.w1", "model.layers.60.block_sparse_moe.experts.41.w1", "model.layers.60.block_sparse_moe.experts.42.w1", "model.layers.60.block_sparse_moe.experts.43.w1", "model.layers.60.block_sparse_moe.experts.44.w1", "model.layers.60.block_sparse_moe.experts.45.w1", "model.layers.60.block_sparse_moe.experts.46.w1", "model.layers.60.block_sparse_moe.experts.47.w1", "model.layers.60.block_sparse_moe.experts.48.w1", "model.layers.60.block_sparse_moe.experts.49.w1", "model.layers.60.block_sparse_moe.experts.50.w1", "model.layers.60.block_sparse_moe.experts.51.w1", "model.layers.60.block_sparse_moe.experts.52.w1", "model.layers.60.block_sparse_moe.experts.53.w1", "model.layers.60.block_sparse_moe.experts.54.w1", "model.layers.60.block_sparse_moe.experts.55.w1", "model.layers.60.block_sparse_moe.experts.56.w1", "model.layers.60.block_sparse_moe.experts.57.w1", "model.layers.60.block_sparse_moe.experts.58.w1", "model.layers.60.block_sparse_moe.experts.59.w1", "model.layers.60.block_sparse_moe.experts.60.w1", "model.layers.60.block_sparse_moe.experts.61.w1", "model.layers.60.block_sparse_moe.experts.62.w1", "model.layers.60.block_sparse_moe.experts.63.w1", "model.layers.60.block_sparse_moe.experts.64.w1", "model.layers.60.block_sparse_moe.experts.65.w1", "model.layers.60.block_sparse_moe.experts.66.w1", "model.layers.60.block_sparse_moe.experts.67.w1", "model.layers.60.block_sparse_moe.experts.68.w1", "model.layers.60.block_sparse_moe.experts.69.w1", "model.layers.60.block_sparse_moe.experts.70.w1", "model.layers.60.block_sparse_moe.experts.71.w1", "model.layers.60.block_sparse_moe.experts.72.w1", "model.layers.60.block_sparse_moe.experts.73.w1", "model.layers.60.block_sparse_moe.experts.74.w1", "model.layers.60.block_sparse_moe.experts.75.w1", "model.layers.60.block_sparse_moe.experts.76.w1", "model.layers.60.block_sparse_moe.experts.77.w1", "model.layers.60.block_sparse_moe.experts.78.w1", "model.layers.60.block_sparse_moe.experts.79.w1", "model.layers.60.block_sparse_moe.experts.80.w1", "model.layers.60.block_sparse_moe.experts.81.w1", "model.layers.60.block_sparse_moe.experts.82.w1", "model.layers.60.block_sparse_moe.experts.83.w1", "model.layers.60.block_sparse_moe.experts.84.w1", "model.layers.60.block_sparse_moe.experts.85.w1", "model.layers.60.block_sparse_moe.experts.86.w1", "model.layers.60.block_sparse_moe.experts.87.w1", "model.layers.60.block_sparse_moe.experts.88.w1", "model.layers.60.block_sparse_moe.experts.89.w1", "model.layers.60.block_sparse_moe.experts.90.w1", "model.layers.60.block_sparse_moe.experts.91.w1", "model.layers.60.block_sparse_moe.experts.92.w1", "model.layers.60.block_sparse_moe.experts.93.w1", "model.layers.60.block_sparse_moe.experts.94.w1", "model.layers.60.block_sparse_moe.experts.95.w1", "model.layers.60.block_sparse_moe.experts.96.w1", "model.layers.60.block_sparse_moe.experts.97.w1", "model.layers.60.block_sparse_moe.experts.98.w1", "model.layers.60.block_sparse_moe.experts.99.w1", "model.layers.60.block_sparse_moe.experts.100.w1", "model.layers.60.block_sparse_moe.experts.101.w1", "model.layers.60.block_sparse_moe.experts.102.w1", "model.layers.60.block_sparse_moe.experts.103.w1", "model.layers.60.block_sparse_moe.experts.104.w1", "model.layers.60.block_sparse_moe.experts.105.w1", "model.layers.60.block_sparse_moe.experts.106.w1", "model.layers.60.block_sparse_moe.experts.107.w1", "model.layers.60.block_sparse_moe.experts.108.w1", "model.layers.60.block_sparse_moe.experts.109.w1", "model.layers.60.block_sparse_moe.experts.110.w1", "model.layers.60.block_sparse_moe.experts.111.w1", "model.layers.60.block_sparse_moe.experts.112.w1", "model.layers.60.block_sparse_moe.experts.113.w1", "model.layers.60.block_sparse_moe.experts.114.w1", "model.layers.60.block_sparse_moe.experts.115.w1", "model.layers.60.block_sparse_moe.experts.116.w1", "model.layers.60.block_sparse_moe.experts.117.w1", "model.layers.60.block_sparse_moe.experts.118.w1", "model.layers.60.block_sparse_moe.experts.119.w1", "model.layers.60.block_sparse_moe.experts.120.w1", "model.layers.60.block_sparse_moe.experts.121.w1", "model.layers.60.block_sparse_moe.experts.122.w1", "model.layers.60.block_sparse_moe.experts.123.w1", "model.layers.60.block_sparse_moe.experts.124.w1", "model.layers.60.block_sparse_moe.experts.125.w1", "model.layers.60.block_sparse_moe.experts.126.w1", "model.layers.60.block_sparse_moe.experts.127.w1", "model.layers.60.block_sparse_moe.experts.128.w1", "model.layers.60.block_sparse_moe.experts.129.w1", "model.layers.60.block_sparse_moe.experts.130.w1", "model.layers.60.block_sparse_moe.experts.131.w1", "model.layers.60.block_sparse_moe.experts.132.w1", "model.layers.60.block_sparse_moe.experts.133.w1", "model.layers.60.block_sparse_moe.experts.134.w1", "model.layers.60.block_sparse_moe.experts.135.w1", "model.layers.60.block_sparse_moe.experts.136.w1", "model.layers.60.block_sparse_moe.experts.137.w1", "model.layers.60.block_sparse_moe.experts.138.w1", "model.layers.60.block_sparse_moe.experts.139.w1", "model.layers.60.block_sparse_moe.experts.140.w1", "model.layers.60.block_sparse_moe.experts.141.w1", "model.layers.60.block_sparse_moe.experts.142.w1", "model.layers.60.block_sparse_moe.experts.143.w1", "model.layers.60.block_sparse_moe.experts.144.w1", "model.layers.60.block_sparse_moe.experts.145.w1", "model.layers.60.block_sparse_moe.experts.146.w1", "model.layers.60.block_sparse_moe.experts.147.w1", "model.layers.60.block_sparse_moe.experts.148.w1", "model.layers.60.block_sparse_moe.experts.149.w1", "model.layers.60.block_sparse_moe.experts.150.w1", "model.layers.60.block_sparse_moe.experts.151.w1", "model.layers.60.block_sparse_moe.experts.152.w1", "model.layers.60.block_sparse_moe.experts.153.w1", "model.layers.60.block_sparse_moe.experts.154.w1", "model.layers.60.block_sparse_moe.experts.155.w1", "model.layers.60.block_sparse_moe.experts.156.w1", "model.layers.60.block_sparse_moe.experts.157.w1", "model.layers.60.block_sparse_moe.experts.158.w1", "model.layers.60.block_sparse_moe.experts.159.w1", "model.layers.60.block_sparse_moe.experts.160.w1", "model.layers.60.block_sparse_moe.experts.161.w1", "model.layers.60.block_sparse_moe.experts.162.w1", "model.layers.60.block_sparse_moe.experts.163.w1", "model.layers.60.block_sparse_moe.experts.164.w1", "model.layers.60.block_sparse_moe.experts.165.w1", "model.layers.60.block_sparse_moe.experts.166.w1", "model.layers.60.block_sparse_moe.experts.167.w1", "model.layers.60.block_sparse_moe.experts.168.w1", "model.layers.60.block_sparse_moe.experts.169.w1", "model.layers.60.block_sparse_moe.experts.170.w1", "model.layers.60.block_sparse_moe.experts.171.w1", "model.layers.60.block_sparse_moe.experts.172.w1", "model.layers.60.block_sparse_moe.experts.173.w1", "model.layers.60.block_sparse_moe.experts.174.w1", "model.layers.60.block_sparse_moe.experts.175.w1", "model.layers.60.block_sparse_moe.experts.176.w1", "model.layers.60.block_sparse_moe.experts.177.w1", "model.layers.60.block_sparse_moe.experts.178.w1", "model.layers.60.block_sparse_moe.experts.179.w1", "model.layers.60.block_sparse_moe.experts.180.w1", "model.layers.60.block_sparse_moe.experts.181.w1", "model.layers.60.block_sparse_moe.experts.182.w1", "model.layers.60.block_sparse_moe.experts.183.w1", "model.layers.60.block_sparse_moe.experts.184.w1", "model.layers.60.block_sparse_moe.experts.185.w1", "model.layers.60.block_sparse_moe.experts.186.w1", "model.layers.60.block_sparse_moe.experts.187.w1", "model.layers.60.block_sparse_moe.experts.188.w1", "model.layers.60.block_sparse_moe.experts.189.w1", "model.layers.60.block_sparse_moe.experts.190.w1", "model.layers.60.block_sparse_moe.experts.191.w1", "model.layers.60.block_sparse_moe.experts.192.w1", "model.layers.60.block_sparse_moe.experts.193.w1", "model.layers.60.block_sparse_moe.experts.194.w1", "model.layers.60.block_sparse_moe.experts.195.w1", "model.layers.60.block_sparse_moe.experts.196.w1", "model.layers.60.block_sparse_moe.experts.197.w1", "model.layers.60.block_sparse_moe.experts.198.w1", "model.layers.60.block_sparse_moe.experts.199.w1", "model.layers.60.block_sparse_moe.experts.200.w1", "model.layers.60.block_sparse_moe.experts.201.w1", "model.layers.60.block_sparse_moe.experts.202.w1", "model.layers.60.block_sparse_moe.experts.203.w1", "model.layers.60.block_sparse_moe.experts.204.w1", "model.layers.60.block_sparse_moe.experts.205.w1", "model.layers.60.block_sparse_moe.experts.206.w1", "model.layers.60.block_sparse_moe.experts.207.w1", "model.layers.60.block_sparse_moe.experts.208.w1", "model.layers.60.block_sparse_moe.experts.209.w1", "model.layers.60.block_sparse_moe.experts.210.w1", "model.layers.60.block_sparse_moe.experts.211.w1", "model.layers.60.block_sparse_moe.experts.212.w1", "model.layers.60.block_sparse_moe.experts.213.w1", "model.layers.60.block_sparse_moe.experts.214.w1", "model.layers.60.block_sparse_moe.experts.215.w1", "model.layers.60.block_sparse_moe.experts.216.w1", "model.layers.60.block_sparse_moe.experts.217.w1", "model.layers.60.block_sparse_moe.experts.218.w1", "model.layers.60.block_sparse_moe.experts.219.w1", "model.layers.60.block_sparse_moe.experts.220.w1", "model.layers.60.block_sparse_moe.experts.221.w1", "model.layers.60.block_sparse_moe.experts.222.w1", "model.layers.60.block_sparse_moe.experts.223.w1", "model.layers.60.block_sparse_moe.experts.224.w1", "model.layers.60.block_sparse_moe.experts.225.w1", "model.layers.60.block_sparse_moe.experts.226.w1", "model.layers.60.block_sparse_moe.experts.227.w1", "model.layers.60.block_sparse_moe.experts.228.w1", "model.layers.60.block_sparse_moe.experts.229.w1", "model.layers.60.block_sparse_moe.experts.230.w1", "model.layers.60.block_sparse_moe.experts.231.w1", "model.layers.60.block_sparse_moe.experts.232.w1", "model.layers.60.block_sparse_moe.experts.233.w1", "model.layers.60.block_sparse_moe.experts.234.w1", "model.layers.60.block_sparse_moe.experts.235.w1", "model.layers.60.block_sparse_moe.experts.236.w1", "model.layers.60.block_sparse_moe.experts.237.w1", "model.layers.60.block_sparse_moe.experts.238.w1", "model.layers.60.block_sparse_moe.experts.239.w1", "model.layers.60.block_sparse_moe.experts.240.w1", "model.layers.60.block_sparse_moe.experts.241.w1", "model.layers.60.block_sparse_moe.experts.242.w1", "model.layers.60.block_sparse_moe.experts.243.w1", "model.layers.60.block_sparse_moe.experts.244.w1", "model.layers.60.block_sparse_moe.experts.245.w1", "model.layers.60.block_sparse_moe.experts.246.w1", "model.layers.60.block_sparse_moe.experts.247.w1", "model.layers.60.block_sparse_moe.experts.248.w1", "model.layers.60.block_sparse_moe.experts.249.w1", "model.layers.60.block_sparse_moe.experts.250.w1", "model.layers.60.block_sparse_moe.experts.251.w1", "model.layers.60.block_sparse_moe.experts.252.w1", "model.layers.60.block_sparse_moe.experts.253.w1", "model.layers.60.block_sparse_moe.experts.254.w1", "model.layers.60.block_sparse_moe.experts.255.w1", "model.layers.60.block_sparse_moe.experts.0.w3", "model.layers.60.block_sparse_moe.experts.1.w3", "model.layers.60.block_sparse_moe.experts.2.w3", "model.layers.60.block_sparse_moe.experts.3.w3", "model.layers.60.block_sparse_moe.experts.4.w3", "model.layers.60.block_sparse_moe.experts.5.w3", "model.layers.60.block_sparse_moe.experts.6.w3", "model.layers.60.block_sparse_moe.experts.7.w3", "model.layers.60.block_sparse_moe.experts.8.w3", "model.layers.60.block_sparse_moe.experts.9.w3", "model.layers.60.block_sparse_moe.experts.10.w3", "model.layers.60.block_sparse_moe.experts.11.w3", "model.layers.60.block_sparse_moe.experts.12.w3", "model.layers.60.block_sparse_moe.experts.13.w3", "model.layers.60.block_sparse_moe.experts.14.w3", "model.layers.60.block_sparse_moe.experts.15.w3", "model.layers.60.block_sparse_moe.experts.16.w3", "model.layers.60.block_sparse_moe.experts.17.w3", "model.layers.60.block_sparse_moe.experts.18.w3", "model.layers.60.block_sparse_moe.experts.19.w3", "model.layers.60.block_sparse_moe.experts.20.w3", "model.layers.60.block_sparse_moe.experts.21.w3", "model.layers.60.block_sparse_moe.experts.22.w3", "model.layers.60.block_sparse_moe.experts.23.w3", "model.layers.60.block_sparse_moe.experts.24.w3", "model.layers.60.block_sparse_moe.experts.25.w3", "model.layers.60.block_sparse_moe.experts.26.w3", "model.layers.60.block_sparse_moe.experts.27.w3", "model.layers.60.block_sparse_moe.experts.28.w3", "model.layers.60.block_sparse_moe.experts.29.w3", "model.layers.60.block_sparse_moe.experts.30.w3", "model.layers.60.block_sparse_moe.experts.31.w3", "model.layers.60.block_sparse_moe.experts.32.w3", "model.layers.60.block_sparse_moe.experts.33.w3", "model.layers.60.block_sparse_moe.experts.34.w3", "model.layers.60.block_sparse_moe.experts.35.w3", "model.layers.60.block_sparse_moe.experts.36.w3", "model.layers.60.block_sparse_moe.experts.37.w3", "model.layers.60.block_sparse_moe.experts.38.w3", "model.layers.60.block_sparse_moe.experts.39.w3", "model.layers.60.block_sparse_moe.experts.40.w3", "model.layers.60.block_sparse_moe.experts.41.w3", "model.layers.60.block_sparse_moe.experts.42.w3", "model.layers.60.block_sparse_moe.experts.43.w3", "model.layers.60.block_sparse_moe.experts.44.w3", "model.layers.60.block_sparse_moe.experts.45.w3", "model.layers.60.block_sparse_moe.experts.46.w3", "model.layers.60.block_sparse_moe.experts.47.w3", "model.layers.60.block_sparse_moe.experts.48.w3", "model.layers.60.block_sparse_moe.experts.49.w3", "model.layers.60.block_sparse_moe.experts.50.w3", "model.layers.60.block_sparse_moe.experts.51.w3", "model.layers.60.block_sparse_moe.experts.52.w3", "model.layers.60.block_sparse_moe.experts.53.w3", "model.layers.60.block_sparse_moe.experts.54.w3", "model.layers.60.block_sparse_moe.experts.55.w3", "model.layers.60.block_sparse_moe.experts.56.w3", "model.layers.60.block_sparse_moe.experts.57.w3", "model.layers.60.block_sparse_moe.experts.58.w3", "model.layers.60.block_sparse_moe.experts.59.w3", "model.layers.60.block_sparse_moe.experts.60.w3", "model.layers.60.block_sparse_moe.experts.61.w3", "model.layers.60.block_sparse_moe.experts.62.w3", "model.layers.60.block_sparse_moe.experts.63.w3", "model.layers.60.block_sparse_moe.experts.64.w3", "model.layers.60.block_sparse_moe.experts.65.w3", "model.layers.60.block_sparse_moe.experts.66.w3", "model.layers.60.block_sparse_moe.experts.67.w3", "model.layers.60.block_sparse_moe.experts.68.w3", "model.layers.60.block_sparse_moe.experts.69.w3", "model.layers.60.block_sparse_moe.experts.70.w3", "model.layers.60.block_sparse_moe.experts.71.w3", "model.layers.60.block_sparse_moe.experts.72.w3", "model.layers.60.block_sparse_moe.experts.73.w3", "model.layers.60.block_sparse_moe.experts.74.w3", "model.layers.60.block_sparse_moe.experts.75.w3", "model.layers.60.block_sparse_moe.experts.76.w3", "model.layers.60.block_sparse_moe.experts.77.w3", "model.layers.60.block_sparse_moe.experts.78.w3", "model.layers.60.block_sparse_moe.experts.79.w3", "model.layers.60.block_sparse_moe.experts.80.w3", "model.layers.60.block_sparse_moe.experts.81.w3", "model.layers.60.block_sparse_moe.experts.82.w3", "model.layers.60.block_sparse_moe.experts.83.w3", "model.layers.60.block_sparse_moe.experts.84.w3", "model.layers.60.block_sparse_moe.experts.85.w3", "model.layers.60.block_sparse_moe.experts.86.w3", "model.layers.60.block_sparse_moe.experts.87.w3", "model.layers.60.block_sparse_moe.experts.88.w3", "model.layers.60.block_sparse_moe.experts.89.w3", "model.layers.60.block_sparse_moe.experts.90.w3", "model.layers.60.block_sparse_moe.experts.91.w3", "model.layers.60.block_sparse_moe.experts.92.w3", "model.layers.60.block_sparse_moe.experts.93.w3", "model.layers.60.block_sparse_moe.experts.94.w3", "model.layers.60.block_sparse_moe.experts.95.w3", "model.layers.60.block_sparse_moe.experts.96.w3", "model.layers.60.block_sparse_moe.experts.97.w3", "model.layers.60.block_sparse_moe.experts.98.w3", "model.layers.60.block_sparse_moe.experts.99.w3", "model.layers.60.block_sparse_moe.experts.100.w3", "model.layers.60.block_sparse_moe.experts.101.w3", "model.layers.60.block_sparse_moe.experts.102.w3", "model.layers.60.block_sparse_moe.experts.103.w3", "model.layers.60.block_sparse_moe.experts.104.w3", "model.layers.60.block_sparse_moe.experts.105.w3", "model.layers.60.block_sparse_moe.experts.106.w3", "model.layers.60.block_sparse_moe.experts.107.w3", "model.layers.60.block_sparse_moe.experts.108.w3", "model.layers.60.block_sparse_moe.experts.109.w3", "model.layers.60.block_sparse_moe.experts.110.w3", "model.layers.60.block_sparse_moe.experts.111.w3", "model.layers.60.block_sparse_moe.experts.112.w3", "model.layers.60.block_sparse_moe.experts.113.w3", "model.layers.60.block_sparse_moe.experts.114.w3", "model.layers.60.block_sparse_moe.experts.115.w3", "model.layers.60.block_sparse_moe.experts.116.w3", "model.layers.60.block_sparse_moe.experts.117.w3", "model.layers.60.block_sparse_moe.experts.118.w3", "model.layers.60.block_sparse_moe.experts.119.w3", "model.layers.60.block_sparse_moe.experts.120.w3", "model.layers.60.block_sparse_moe.experts.121.w3", "model.layers.60.block_sparse_moe.experts.122.w3", "model.layers.60.block_sparse_moe.experts.123.w3", "model.layers.60.block_sparse_moe.experts.124.w3", "model.layers.60.block_sparse_moe.experts.125.w3", "model.layers.60.block_sparse_moe.experts.126.w3", "model.layers.60.block_sparse_moe.experts.127.w3", "model.layers.60.block_sparse_moe.experts.128.w3", "model.layers.60.block_sparse_moe.experts.129.w3", "model.layers.60.block_sparse_moe.experts.130.w3", "model.layers.60.block_sparse_moe.experts.131.w3", "model.layers.60.block_sparse_moe.experts.132.w3", "model.layers.60.block_sparse_moe.experts.133.w3", "model.layers.60.block_sparse_moe.experts.134.w3", "model.layers.60.block_sparse_moe.experts.135.w3", "model.layers.60.block_sparse_moe.experts.136.w3", "model.layers.60.block_sparse_moe.experts.137.w3", "model.layers.60.block_sparse_moe.experts.138.w3", "model.layers.60.block_sparse_moe.experts.139.w3", "model.layers.60.block_sparse_moe.experts.140.w3", "model.layers.60.block_sparse_moe.experts.141.w3", "model.layers.60.block_sparse_moe.experts.142.w3", "model.layers.60.block_sparse_moe.experts.143.w3", "model.layers.60.block_sparse_moe.experts.144.w3", "model.layers.60.block_sparse_moe.experts.145.w3", "model.layers.60.block_sparse_moe.experts.146.w3", "model.layers.60.block_sparse_moe.experts.147.w3", "model.layers.60.block_sparse_moe.experts.148.w3", "model.layers.60.block_sparse_moe.experts.149.w3", "model.layers.60.block_sparse_moe.experts.150.w3", "model.layers.60.block_sparse_moe.experts.151.w3", "model.layers.60.block_sparse_moe.experts.152.w3", "model.layers.60.block_sparse_moe.experts.153.w3", "model.layers.60.block_sparse_moe.experts.154.w3", "model.layers.60.block_sparse_moe.experts.155.w3", "model.layers.60.block_sparse_moe.experts.156.w3", "model.layers.60.block_sparse_moe.experts.157.w3", "model.layers.60.block_sparse_moe.experts.158.w3", "model.layers.60.block_sparse_moe.experts.159.w3", "model.layers.60.block_sparse_moe.experts.160.w3", "model.layers.60.block_sparse_moe.experts.161.w3", "model.layers.60.block_sparse_moe.experts.162.w3", "model.layers.60.block_sparse_moe.experts.163.w3", "model.layers.60.block_sparse_moe.experts.164.w3", "model.layers.60.block_sparse_moe.experts.165.w3", "model.layers.60.block_sparse_moe.experts.166.w3", "model.layers.60.block_sparse_moe.experts.167.w3", "model.layers.60.block_sparse_moe.experts.168.w3", "model.layers.60.block_sparse_moe.experts.169.w3", "model.layers.60.block_sparse_moe.experts.170.w3", "model.layers.60.block_sparse_moe.experts.171.w3", "model.layers.60.block_sparse_moe.experts.172.w3", "model.layers.60.block_sparse_moe.experts.173.w3", "model.layers.60.block_sparse_moe.experts.174.w3", "model.layers.60.block_sparse_moe.experts.175.w3", "model.layers.60.block_sparse_moe.experts.176.w3", "model.layers.60.block_sparse_moe.experts.177.w3", "model.layers.60.block_sparse_moe.experts.178.w3", "model.layers.60.block_sparse_moe.experts.179.w3", "model.layers.60.block_sparse_moe.experts.180.w3", "model.layers.60.block_sparse_moe.experts.181.w3", "model.layers.60.block_sparse_moe.experts.182.w3", "model.layers.60.block_sparse_moe.experts.183.w3", "model.layers.60.block_sparse_moe.experts.184.w3", "model.layers.60.block_sparse_moe.experts.185.w3", "model.layers.60.block_sparse_moe.experts.186.w3", "model.layers.60.block_sparse_moe.experts.187.w3", "model.layers.60.block_sparse_moe.experts.188.w3", "model.layers.60.block_sparse_moe.experts.189.w3", "model.layers.60.block_sparse_moe.experts.190.w3", "model.layers.60.block_sparse_moe.experts.191.w3", "model.layers.60.block_sparse_moe.experts.192.w3", "model.layers.60.block_sparse_moe.experts.193.w3", "model.layers.60.block_sparse_moe.experts.194.w3", "model.layers.60.block_sparse_moe.experts.195.w3", "model.layers.60.block_sparse_moe.experts.196.w3", "model.layers.60.block_sparse_moe.experts.197.w3", "model.layers.60.block_sparse_moe.experts.198.w3", "model.layers.60.block_sparse_moe.experts.199.w3", "model.layers.60.block_sparse_moe.experts.200.w3", "model.layers.60.block_sparse_moe.experts.201.w3", "model.layers.60.block_sparse_moe.experts.202.w3", "model.layers.60.block_sparse_moe.experts.203.w3", "model.layers.60.block_sparse_moe.experts.204.w3", "model.layers.60.block_sparse_moe.experts.205.w3", "model.layers.60.block_sparse_moe.experts.206.w3", "model.layers.60.block_sparse_moe.experts.207.w3", "model.layers.60.block_sparse_moe.experts.208.w3", "model.layers.60.block_sparse_moe.experts.209.w3", "model.layers.60.block_sparse_moe.experts.210.w3", "model.layers.60.block_sparse_moe.experts.211.w3", "model.layers.60.block_sparse_moe.experts.212.w3", "model.layers.60.block_sparse_moe.experts.213.w3", "model.layers.60.block_sparse_moe.experts.214.w3", "model.layers.60.block_sparse_moe.experts.215.w3", "model.layers.60.block_sparse_moe.experts.216.w3", "model.layers.60.block_sparse_moe.experts.217.w3", "model.layers.60.block_sparse_moe.experts.218.w3", "model.layers.60.block_sparse_moe.experts.219.w3", "model.layers.60.block_sparse_moe.experts.220.w3", "model.layers.60.block_sparse_moe.experts.221.w3", "model.layers.60.block_sparse_moe.experts.222.w3", "model.layers.60.block_sparse_moe.experts.223.w3", "model.layers.60.block_sparse_moe.experts.224.w3", "model.layers.60.block_sparse_moe.experts.225.w3", "model.layers.60.block_sparse_moe.experts.226.w3", "model.layers.60.block_sparse_moe.experts.227.w3", "model.layers.60.block_sparse_moe.experts.228.w3", "model.layers.60.block_sparse_moe.experts.229.w3", "model.layers.60.block_sparse_moe.experts.230.w3", "model.layers.60.block_sparse_moe.experts.231.w3", "model.layers.60.block_sparse_moe.experts.232.w3", "model.layers.60.block_sparse_moe.experts.233.w3", "model.layers.60.block_sparse_moe.experts.234.w3", "model.layers.60.block_sparse_moe.experts.235.w3", "model.layers.60.block_sparse_moe.experts.236.w3", "model.layers.60.block_sparse_moe.experts.237.w3", "model.layers.60.block_sparse_moe.experts.238.w3", "model.layers.60.block_sparse_moe.experts.239.w3", "model.layers.60.block_sparse_moe.experts.240.w3", "model.layers.60.block_sparse_moe.experts.241.w3", "model.layers.60.block_sparse_moe.experts.242.w3", "model.layers.60.block_sparse_moe.experts.243.w3", "model.layers.60.block_sparse_moe.experts.244.w3", "model.layers.60.block_sparse_moe.experts.245.w3", "model.layers.60.block_sparse_moe.experts.246.w3", "model.layers.60.block_sparse_moe.experts.247.w3", "model.layers.60.block_sparse_moe.experts.248.w3", "model.layers.60.block_sparse_moe.experts.249.w3", "model.layers.60.block_sparse_moe.experts.250.w3", "model.layers.60.block_sparse_moe.experts.251.w3", "model.layers.60.block_sparse_moe.experts.252.w3", "model.layers.60.block_sparse_moe.experts.253.w3", "model.layers.60.block_sparse_moe.experts.254.w3", "model.layers.60.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.00024091750383375965, "dbits": 2415919104 } ] }, { "idx": 304, "layers": [ "model.layers.60.block_sparse_moe.experts.0.w2", "model.layers.60.block_sparse_moe.experts.1.w2", "model.layers.60.block_sparse_moe.experts.2.w2", "model.layers.60.block_sparse_moe.experts.3.w2", "model.layers.60.block_sparse_moe.experts.4.w2", "model.layers.60.block_sparse_moe.experts.5.w2", "model.layers.60.block_sparse_moe.experts.6.w2", "model.layers.60.block_sparse_moe.experts.7.w2", "model.layers.60.block_sparse_moe.experts.8.w2", "model.layers.60.block_sparse_moe.experts.9.w2", "model.layers.60.block_sparse_moe.experts.10.w2", "model.layers.60.block_sparse_moe.experts.11.w2", "model.layers.60.block_sparse_moe.experts.12.w2", "model.layers.60.block_sparse_moe.experts.13.w2", "model.layers.60.block_sparse_moe.experts.14.w2", "model.layers.60.block_sparse_moe.experts.15.w2", "model.layers.60.block_sparse_moe.experts.16.w2", "model.layers.60.block_sparse_moe.experts.17.w2", "model.layers.60.block_sparse_moe.experts.18.w2", "model.layers.60.block_sparse_moe.experts.19.w2", "model.layers.60.block_sparse_moe.experts.20.w2", "model.layers.60.block_sparse_moe.experts.21.w2", "model.layers.60.block_sparse_moe.experts.22.w2", "model.layers.60.block_sparse_moe.experts.23.w2", "model.layers.60.block_sparse_moe.experts.24.w2", "model.layers.60.block_sparse_moe.experts.25.w2", "model.layers.60.block_sparse_moe.experts.26.w2", "model.layers.60.block_sparse_moe.experts.27.w2", "model.layers.60.block_sparse_moe.experts.28.w2", "model.layers.60.block_sparse_moe.experts.29.w2", "model.layers.60.block_sparse_moe.experts.30.w2", "model.layers.60.block_sparse_moe.experts.31.w2", "model.layers.60.block_sparse_moe.experts.32.w2", "model.layers.60.block_sparse_moe.experts.33.w2", "model.layers.60.block_sparse_moe.experts.34.w2", "model.layers.60.block_sparse_moe.experts.35.w2", "model.layers.60.block_sparse_moe.experts.36.w2", "model.layers.60.block_sparse_moe.experts.37.w2", "model.layers.60.block_sparse_moe.experts.38.w2", "model.layers.60.block_sparse_moe.experts.39.w2", "model.layers.60.block_sparse_moe.experts.40.w2", "model.layers.60.block_sparse_moe.experts.41.w2", "model.layers.60.block_sparse_moe.experts.42.w2", "model.layers.60.block_sparse_moe.experts.43.w2", "model.layers.60.block_sparse_moe.experts.44.w2", "model.layers.60.block_sparse_moe.experts.45.w2", "model.layers.60.block_sparse_moe.experts.46.w2", "model.layers.60.block_sparse_moe.experts.47.w2", "model.layers.60.block_sparse_moe.experts.48.w2", "model.layers.60.block_sparse_moe.experts.49.w2", "model.layers.60.block_sparse_moe.experts.50.w2", "model.layers.60.block_sparse_moe.experts.51.w2", "model.layers.60.block_sparse_moe.experts.52.w2", "model.layers.60.block_sparse_moe.experts.53.w2", "model.layers.60.block_sparse_moe.experts.54.w2", "model.layers.60.block_sparse_moe.experts.55.w2", "model.layers.60.block_sparse_moe.experts.56.w2", "model.layers.60.block_sparse_moe.experts.57.w2", "model.layers.60.block_sparse_moe.experts.58.w2", "model.layers.60.block_sparse_moe.experts.59.w2", "model.layers.60.block_sparse_moe.experts.60.w2", "model.layers.60.block_sparse_moe.experts.61.w2", "model.layers.60.block_sparse_moe.experts.62.w2", "model.layers.60.block_sparse_moe.experts.63.w2", "model.layers.60.block_sparse_moe.experts.64.w2", "model.layers.60.block_sparse_moe.experts.65.w2", "model.layers.60.block_sparse_moe.experts.66.w2", "model.layers.60.block_sparse_moe.experts.67.w2", "model.layers.60.block_sparse_moe.experts.68.w2", "model.layers.60.block_sparse_moe.experts.69.w2", "model.layers.60.block_sparse_moe.experts.70.w2", "model.layers.60.block_sparse_moe.experts.71.w2", "model.layers.60.block_sparse_moe.experts.72.w2", "model.layers.60.block_sparse_moe.experts.73.w2", "model.layers.60.block_sparse_moe.experts.74.w2", "model.layers.60.block_sparse_moe.experts.75.w2", "model.layers.60.block_sparse_moe.experts.76.w2", "model.layers.60.block_sparse_moe.experts.77.w2", "model.layers.60.block_sparse_moe.experts.78.w2", "model.layers.60.block_sparse_moe.experts.79.w2", "model.layers.60.block_sparse_moe.experts.80.w2", "model.layers.60.block_sparse_moe.experts.81.w2", "model.layers.60.block_sparse_moe.experts.82.w2", "model.layers.60.block_sparse_moe.experts.83.w2", "model.layers.60.block_sparse_moe.experts.84.w2", "model.layers.60.block_sparse_moe.experts.85.w2", "model.layers.60.block_sparse_moe.experts.86.w2", "model.layers.60.block_sparse_moe.experts.87.w2", "model.layers.60.block_sparse_moe.experts.88.w2", "model.layers.60.block_sparse_moe.experts.89.w2", "model.layers.60.block_sparse_moe.experts.90.w2", "model.layers.60.block_sparse_moe.experts.91.w2", "model.layers.60.block_sparse_moe.experts.92.w2", "model.layers.60.block_sparse_moe.experts.93.w2", "model.layers.60.block_sparse_moe.experts.94.w2", "model.layers.60.block_sparse_moe.experts.95.w2", "model.layers.60.block_sparse_moe.experts.96.w2", "model.layers.60.block_sparse_moe.experts.97.w2", "model.layers.60.block_sparse_moe.experts.98.w2", "model.layers.60.block_sparse_moe.experts.99.w2", "model.layers.60.block_sparse_moe.experts.100.w2", "model.layers.60.block_sparse_moe.experts.101.w2", "model.layers.60.block_sparse_moe.experts.102.w2", "model.layers.60.block_sparse_moe.experts.103.w2", "model.layers.60.block_sparse_moe.experts.104.w2", "model.layers.60.block_sparse_moe.experts.105.w2", "model.layers.60.block_sparse_moe.experts.106.w2", "model.layers.60.block_sparse_moe.experts.107.w2", "model.layers.60.block_sparse_moe.experts.108.w2", "model.layers.60.block_sparse_moe.experts.109.w2", "model.layers.60.block_sparse_moe.experts.110.w2", "model.layers.60.block_sparse_moe.experts.111.w2", "model.layers.60.block_sparse_moe.experts.112.w2", "model.layers.60.block_sparse_moe.experts.113.w2", "model.layers.60.block_sparse_moe.experts.114.w2", "model.layers.60.block_sparse_moe.experts.115.w2", "model.layers.60.block_sparse_moe.experts.116.w2", "model.layers.60.block_sparse_moe.experts.117.w2", "model.layers.60.block_sparse_moe.experts.118.w2", "model.layers.60.block_sparse_moe.experts.119.w2", "model.layers.60.block_sparse_moe.experts.120.w2", "model.layers.60.block_sparse_moe.experts.121.w2", "model.layers.60.block_sparse_moe.experts.122.w2", "model.layers.60.block_sparse_moe.experts.123.w2", "model.layers.60.block_sparse_moe.experts.124.w2", "model.layers.60.block_sparse_moe.experts.125.w2", "model.layers.60.block_sparse_moe.experts.126.w2", "model.layers.60.block_sparse_moe.experts.127.w2", "model.layers.60.block_sparse_moe.experts.128.w2", "model.layers.60.block_sparse_moe.experts.129.w2", "model.layers.60.block_sparse_moe.experts.130.w2", "model.layers.60.block_sparse_moe.experts.131.w2", "model.layers.60.block_sparse_moe.experts.132.w2", "model.layers.60.block_sparse_moe.experts.133.w2", "model.layers.60.block_sparse_moe.experts.134.w2", "model.layers.60.block_sparse_moe.experts.135.w2", "model.layers.60.block_sparse_moe.experts.136.w2", "model.layers.60.block_sparse_moe.experts.137.w2", "model.layers.60.block_sparse_moe.experts.138.w2", "model.layers.60.block_sparse_moe.experts.139.w2", "model.layers.60.block_sparse_moe.experts.140.w2", "model.layers.60.block_sparse_moe.experts.141.w2", "model.layers.60.block_sparse_moe.experts.142.w2", "model.layers.60.block_sparse_moe.experts.143.w2", "model.layers.60.block_sparse_moe.experts.144.w2", "model.layers.60.block_sparse_moe.experts.145.w2", "model.layers.60.block_sparse_moe.experts.146.w2", "model.layers.60.block_sparse_moe.experts.147.w2", "model.layers.60.block_sparse_moe.experts.148.w2", "model.layers.60.block_sparse_moe.experts.149.w2", "model.layers.60.block_sparse_moe.experts.150.w2", "model.layers.60.block_sparse_moe.experts.151.w2", "model.layers.60.block_sparse_moe.experts.152.w2", "model.layers.60.block_sparse_moe.experts.153.w2", "model.layers.60.block_sparse_moe.experts.154.w2", "model.layers.60.block_sparse_moe.experts.155.w2", "model.layers.60.block_sparse_moe.experts.156.w2", "model.layers.60.block_sparse_moe.experts.157.w2", "model.layers.60.block_sparse_moe.experts.158.w2", "model.layers.60.block_sparse_moe.experts.159.w2", "model.layers.60.block_sparse_moe.experts.160.w2", "model.layers.60.block_sparse_moe.experts.161.w2", "model.layers.60.block_sparse_moe.experts.162.w2", "model.layers.60.block_sparse_moe.experts.163.w2", "model.layers.60.block_sparse_moe.experts.164.w2", "model.layers.60.block_sparse_moe.experts.165.w2", "model.layers.60.block_sparse_moe.experts.166.w2", "model.layers.60.block_sparse_moe.experts.167.w2", "model.layers.60.block_sparse_moe.experts.168.w2", "model.layers.60.block_sparse_moe.experts.169.w2", "model.layers.60.block_sparse_moe.experts.170.w2", "model.layers.60.block_sparse_moe.experts.171.w2", "model.layers.60.block_sparse_moe.experts.172.w2", "model.layers.60.block_sparse_moe.experts.173.w2", "model.layers.60.block_sparse_moe.experts.174.w2", "model.layers.60.block_sparse_moe.experts.175.w2", "model.layers.60.block_sparse_moe.experts.176.w2", "model.layers.60.block_sparse_moe.experts.177.w2", "model.layers.60.block_sparse_moe.experts.178.w2", "model.layers.60.block_sparse_moe.experts.179.w2", "model.layers.60.block_sparse_moe.experts.180.w2", "model.layers.60.block_sparse_moe.experts.181.w2", "model.layers.60.block_sparse_moe.experts.182.w2", "model.layers.60.block_sparse_moe.experts.183.w2", "model.layers.60.block_sparse_moe.experts.184.w2", "model.layers.60.block_sparse_moe.experts.185.w2", "model.layers.60.block_sparse_moe.experts.186.w2", "model.layers.60.block_sparse_moe.experts.187.w2", "model.layers.60.block_sparse_moe.experts.188.w2", "model.layers.60.block_sparse_moe.experts.189.w2", "model.layers.60.block_sparse_moe.experts.190.w2", "model.layers.60.block_sparse_moe.experts.191.w2", "model.layers.60.block_sparse_moe.experts.192.w2", "model.layers.60.block_sparse_moe.experts.193.w2", "model.layers.60.block_sparse_moe.experts.194.w2", "model.layers.60.block_sparse_moe.experts.195.w2", "model.layers.60.block_sparse_moe.experts.196.w2", "model.layers.60.block_sparse_moe.experts.197.w2", "model.layers.60.block_sparse_moe.experts.198.w2", "model.layers.60.block_sparse_moe.experts.199.w2", "model.layers.60.block_sparse_moe.experts.200.w2", "model.layers.60.block_sparse_moe.experts.201.w2", "model.layers.60.block_sparse_moe.experts.202.w2", "model.layers.60.block_sparse_moe.experts.203.w2", "model.layers.60.block_sparse_moe.experts.204.w2", "model.layers.60.block_sparse_moe.experts.205.w2", "model.layers.60.block_sparse_moe.experts.206.w2", "model.layers.60.block_sparse_moe.experts.207.w2", "model.layers.60.block_sparse_moe.experts.208.w2", "model.layers.60.block_sparse_moe.experts.209.w2", "model.layers.60.block_sparse_moe.experts.210.w2", "model.layers.60.block_sparse_moe.experts.211.w2", "model.layers.60.block_sparse_moe.experts.212.w2", "model.layers.60.block_sparse_moe.experts.213.w2", "model.layers.60.block_sparse_moe.experts.214.w2", "model.layers.60.block_sparse_moe.experts.215.w2", "model.layers.60.block_sparse_moe.experts.216.w2", "model.layers.60.block_sparse_moe.experts.217.w2", "model.layers.60.block_sparse_moe.experts.218.w2", "model.layers.60.block_sparse_moe.experts.219.w2", "model.layers.60.block_sparse_moe.experts.220.w2", "model.layers.60.block_sparse_moe.experts.221.w2", "model.layers.60.block_sparse_moe.experts.222.w2", "model.layers.60.block_sparse_moe.experts.223.w2", "model.layers.60.block_sparse_moe.experts.224.w2", "model.layers.60.block_sparse_moe.experts.225.w2", "model.layers.60.block_sparse_moe.experts.226.w2", "model.layers.60.block_sparse_moe.experts.227.w2", "model.layers.60.block_sparse_moe.experts.228.w2", "model.layers.60.block_sparse_moe.experts.229.w2", "model.layers.60.block_sparse_moe.experts.230.w2", "model.layers.60.block_sparse_moe.experts.231.w2", "model.layers.60.block_sparse_moe.experts.232.w2", "model.layers.60.block_sparse_moe.experts.233.w2", "model.layers.60.block_sparse_moe.experts.234.w2", "model.layers.60.block_sparse_moe.experts.235.w2", "model.layers.60.block_sparse_moe.experts.236.w2", "model.layers.60.block_sparse_moe.experts.237.w2", "model.layers.60.block_sparse_moe.experts.238.w2", "model.layers.60.block_sparse_moe.experts.239.w2", "model.layers.60.block_sparse_moe.experts.240.w2", "model.layers.60.block_sparse_moe.experts.241.w2", "model.layers.60.block_sparse_moe.experts.242.w2", "model.layers.60.block_sparse_moe.experts.243.w2", "model.layers.60.block_sparse_moe.experts.244.w2", "model.layers.60.block_sparse_moe.experts.245.w2", "model.layers.60.block_sparse_moe.experts.246.w2", "model.layers.60.block_sparse_moe.experts.247.w2", "model.layers.60.block_sparse_moe.experts.248.w2", "model.layers.60.block_sparse_moe.experts.249.w2", "model.layers.60.block_sparse_moe.experts.250.w2", "model.layers.60.block_sparse_moe.experts.251.w2", "model.layers.60.block_sparse_moe.experts.252.w2", "model.layers.60.block_sparse_moe.experts.253.w2", "model.layers.60.block_sparse_moe.experts.254.w2", "model.layers.60.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": 2.5078654289356628e-06, "dbits": 1207959552 } ] }, { "idx": 305, "layers": [ "model.layers.61.self_attn.q_proj" ], "candidates": [ { "dkld": -6.285160779956334e-05, "dbits": 18874368 } ] }, { "idx": 306, "layers": [ "model.layers.61.self_attn.k_proj", "model.layers.61.self_attn.v_proj" ], "candidates": [ { "dkld": 0.0030027229338884243, "dbits": 6291456 } ] }, { "idx": 307, "layers": [ "model.layers.61.self_attn.o_proj" ], "candidates": [ { "dkld": -0.0008462104946375115, "dbits": 18874368 } ] }, { "idx": 308, "layers": [ "model.layers.61.block_sparse_moe.experts.0.w1", "model.layers.61.block_sparse_moe.experts.1.w1", "model.layers.61.block_sparse_moe.experts.2.w1", "model.layers.61.block_sparse_moe.experts.3.w1", "model.layers.61.block_sparse_moe.experts.4.w1", "model.layers.61.block_sparse_moe.experts.5.w1", "model.layers.61.block_sparse_moe.experts.6.w1", "model.layers.61.block_sparse_moe.experts.7.w1", "model.layers.61.block_sparse_moe.experts.8.w1", "model.layers.61.block_sparse_moe.experts.9.w1", "model.layers.61.block_sparse_moe.experts.10.w1", "model.layers.61.block_sparse_moe.experts.11.w1", "model.layers.61.block_sparse_moe.experts.12.w1", "model.layers.61.block_sparse_moe.experts.13.w1", "model.layers.61.block_sparse_moe.experts.14.w1", "model.layers.61.block_sparse_moe.experts.15.w1", "model.layers.61.block_sparse_moe.experts.16.w1", "model.layers.61.block_sparse_moe.experts.17.w1", "model.layers.61.block_sparse_moe.experts.18.w1", "model.layers.61.block_sparse_moe.experts.19.w1", "model.layers.61.block_sparse_moe.experts.20.w1", "model.layers.61.block_sparse_moe.experts.21.w1", "model.layers.61.block_sparse_moe.experts.22.w1", "model.layers.61.block_sparse_moe.experts.23.w1", "model.layers.61.block_sparse_moe.experts.24.w1", "model.layers.61.block_sparse_moe.experts.25.w1", "model.layers.61.block_sparse_moe.experts.26.w1", "model.layers.61.block_sparse_moe.experts.27.w1", "model.layers.61.block_sparse_moe.experts.28.w1", "model.layers.61.block_sparse_moe.experts.29.w1", "model.layers.61.block_sparse_moe.experts.30.w1", "model.layers.61.block_sparse_moe.experts.31.w1", "model.layers.61.block_sparse_moe.experts.32.w1", "model.layers.61.block_sparse_moe.experts.33.w1", "model.layers.61.block_sparse_moe.experts.34.w1", "model.layers.61.block_sparse_moe.experts.35.w1", "model.layers.61.block_sparse_moe.experts.36.w1", "model.layers.61.block_sparse_moe.experts.37.w1", "model.layers.61.block_sparse_moe.experts.38.w1", "model.layers.61.block_sparse_moe.experts.39.w1", "model.layers.61.block_sparse_moe.experts.40.w1", "model.layers.61.block_sparse_moe.experts.41.w1", "model.layers.61.block_sparse_moe.experts.42.w1", "model.layers.61.block_sparse_moe.experts.43.w1", "model.layers.61.block_sparse_moe.experts.44.w1", "model.layers.61.block_sparse_moe.experts.45.w1", "model.layers.61.block_sparse_moe.experts.46.w1", "model.layers.61.block_sparse_moe.experts.47.w1", "model.layers.61.block_sparse_moe.experts.48.w1", "model.layers.61.block_sparse_moe.experts.49.w1", "model.layers.61.block_sparse_moe.experts.50.w1", "model.layers.61.block_sparse_moe.experts.51.w1", "model.layers.61.block_sparse_moe.experts.52.w1", "model.layers.61.block_sparse_moe.experts.53.w1", "model.layers.61.block_sparse_moe.experts.54.w1", "model.layers.61.block_sparse_moe.experts.55.w1", "model.layers.61.block_sparse_moe.experts.56.w1", "model.layers.61.block_sparse_moe.experts.57.w1", "model.layers.61.block_sparse_moe.experts.58.w1", "model.layers.61.block_sparse_moe.experts.59.w1", "model.layers.61.block_sparse_moe.experts.60.w1", "model.layers.61.block_sparse_moe.experts.61.w1", "model.layers.61.block_sparse_moe.experts.62.w1", "model.layers.61.block_sparse_moe.experts.63.w1", "model.layers.61.block_sparse_moe.experts.64.w1", "model.layers.61.block_sparse_moe.experts.65.w1", "model.layers.61.block_sparse_moe.experts.66.w1", "model.layers.61.block_sparse_moe.experts.67.w1", "model.layers.61.block_sparse_moe.experts.68.w1", "model.layers.61.block_sparse_moe.experts.69.w1", "model.layers.61.block_sparse_moe.experts.70.w1", "model.layers.61.block_sparse_moe.experts.71.w1", "model.layers.61.block_sparse_moe.experts.72.w1", "model.layers.61.block_sparse_moe.experts.73.w1", "model.layers.61.block_sparse_moe.experts.74.w1", "model.layers.61.block_sparse_moe.experts.75.w1", "model.layers.61.block_sparse_moe.experts.76.w1", "model.layers.61.block_sparse_moe.experts.77.w1", "model.layers.61.block_sparse_moe.experts.78.w1", "model.layers.61.block_sparse_moe.experts.79.w1", "model.layers.61.block_sparse_moe.experts.80.w1", "model.layers.61.block_sparse_moe.experts.81.w1", "model.layers.61.block_sparse_moe.experts.82.w1", "model.layers.61.block_sparse_moe.experts.83.w1", "model.layers.61.block_sparse_moe.experts.84.w1", "model.layers.61.block_sparse_moe.experts.85.w1", "model.layers.61.block_sparse_moe.experts.86.w1", "model.layers.61.block_sparse_moe.experts.87.w1", "model.layers.61.block_sparse_moe.experts.88.w1", "model.layers.61.block_sparse_moe.experts.89.w1", "model.layers.61.block_sparse_moe.experts.90.w1", "model.layers.61.block_sparse_moe.experts.91.w1", "model.layers.61.block_sparse_moe.experts.92.w1", "model.layers.61.block_sparse_moe.experts.93.w1", "model.layers.61.block_sparse_moe.experts.94.w1", "model.layers.61.block_sparse_moe.experts.95.w1", "model.layers.61.block_sparse_moe.experts.96.w1", "model.layers.61.block_sparse_moe.experts.97.w1", "model.layers.61.block_sparse_moe.experts.98.w1", "model.layers.61.block_sparse_moe.experts.99.w1", "model.layers.61.block_sparse_moe.experts.100.w1", "model.layers.61.block_sparse_moe.experts.101.w1", "model.layers.61.block_sparse_moe.experts.102.w1", "model.layers.61.block_sparse_moe.experts.103.w1", "model.layers.61.block_sparse_moe.experts.104.w1", "model.layers.61.block_sparse_moe.experts.105.w1", "model.layers.61.block_sparse_moe.experts.106.w1", "model.layers.61.block_sparse_moe.experts.107.w1", "model.layers.61.block_sparse_moe.experts.108.w1", "model.layers.61.block_sparse_moe.experts.109.w1", "model.layers.61.block_sparse_moe.experts.110.w1", "model.layers.61.block_sparse_moe.experts.111.w1", "model.layers.61.block_sparse_moe.experts.112.w1", "model.layers.61.block_sparse_moe.experts.113.w1", "model.layers.61.block_sparse_moe.experts.114.w1", "model.layers.61.block_sparse_moe.experts.115.w1", "model.layers.61.block_sparse_moe.experts.116.w1", "model.layers.61.block_sparse_moe.experts.117.w1", "model.layers.61.block_sparse_moe.experts.118.w1", "model.layers.61.block_sparse_moe.experts.119.w1", "model.layers.61.block_sparse_moe.experts.120.w1", "model.layers.61.block_sparse_moe.experts.121.w1", "model.layers.61.block_sparse_moe.experts.122.w1", "model.layers.61.block_sparse_moe.experts.123.w1", "model.layers.61.block_sparse_moe.experts.124.w1", "model.layers.61.block_sparse_moe.experts.125.w1", "model.layers.61.block_sparse_moe.experts.126.w1", "model.layers.61.block_sparse_moe.experts.127.w1", "model.layers.61.block_sparse_moe.experts.128.w1", "model.layers.61.block_sparse_moe.experts.129.w1", "model.layers.61.block_sparse_moe.experts.130.w1", "model.layers.61.block_sparse_moe.experts.131.w1", "model.layers.61.block_sparse_moe.experts.132.w1", "model.layers.61.block_sparse_moe.experts.133.w1", "model.layers.61.block_sparse_moe.experts.134.w1", "model.layers.61.block_sparse_moe.experts.135.w1", "model.layers.61.block_sparse_moe.experts.136.w1", "model.layers.61.block_sparse_moe.experts.137.w1", "model.layers.61.block_sparse_moe.experts.138.w1", "model.layers.61.block_sparse_moe.experts.139.w1", "model.layers.61.block_sparse_moe.experts.140.w1", "model.layers.61.block_sparse_moe.experts.141.w1", "model.layers.61.block_sparse_moe.experts.142.w1", "model.layers.61.block_sparse_moe.experts.143.w1", "model.layers.61.block_sparse_moe.experts.144.w1", "model.layers.61.block_sparse_moe.experts.145.w1", "model.layers.61.block_sparse_moe.experts.146.w1", "model.layers.61.block_sparse_moe.experts.147.w1", "model.layers.61.block_sparse_moe.experts.148.w1", "model.layers.61.block_sparse_moe.experts.149.w1", "model.layers.61.block_sparse_moe.experts.150.w1", "model.layers.61.block_sparse_moe.experts.151.w1", "model.layers.61.block_sparse_moe.experts.152.w1", "model.layers.61.block_sparse_moe.experts.153.w1", "model.layers.61.block_sparse_moe.experts.154.w1", "model.layers.61.block_sparse_moe.experts.155.w1", "model.layers.61.block_sparse_moe.experts.156.w1", "model.layers.61.block_sparse_moe.experts.157.w1", "model.layers.61.block_sparse_moe.experts.158.w1", "model.layers.61.block_sparse_moe.experts.159.w1", "model.layers.61.block_sparse_moe.experts.160.w1", "model.layers.61.block_sparse_moe.experts.161.w1", "model.layers.61.block_sparse_moe.experts.162.w1", "model.layers.61.block_sparse_moe.experts.163.w1", "model.layers.61.block_sparse_moe.experts.164.w1", "model.layers.61.block_sparse_moe.experts.165.w1", "model.layers.61.block_sparse_moe.experts.166.w1", "model.layers.61.block_sparse_moe.experts.167.w1", "model.layers.61.block_sparse_moe.experts.168.w1", "model.layers.61.block_sparse_moe.experts.169.w1", "model.layers.61.block_sparse_moe.experts.170.w1", "model.layers.61.block_sparse_moe.experts.171.w1", "model.layers.61.block_sparse_moe.experts.172.w1", "model.layers.61.block_sparse_moe.experts.173.w1", "model.layers.61.block_sparse_moe.experts.174.w1", "model.layers.61.block_sparse_moe.experts.175.w1", "model.layers.61.block_sparse_moe.experts.176.w1", "model.layers.61.block_sparse_moe.experts.177.w1", "model.layers.61.block_sparse_moe.experts.178.w1", "model.layers.61.block_sparse_moe.experts.179.w1", "model.layers.61.block_sparse_moe.experts.180.w1", "model.layers.61.block_sparse_moe.experts.181.w1", "model.layers.61.block_sparse_moe.experts.182.w1", "model.layers.61.block_sparse_moe.experts.183.w1", "model.layers.61.block_sparse_moe.experts.184.w1", "model.layers.61.block_sparse_moe.experts.185.w1", "model.layers.61.block_sparse_moe.experts.186.w1", "model.layers.61.block_sparse_moe.experts.187.w1", "model.layers.61.block_sparse_moe.experts.188.w1", "model.layers.61.block_sparse_moe.experts.189.w1", "model.layers.61.block_sparse_moe.experts.190.w1", "model.layers.61.block_sparse_moe.experts.191.w1", "model.layers.61.block_sparse_moe.experts.192.w1", "model.layers.61.block_sparse_moe.experts.193.w1", "model.layers.61.block_sparse_moe.experts.194.w1", "model.layers.61.block_sparse_moe.experts.195.w1", "model.layers.61.block_sparse_moe.experts.196.w1", "model.layers.61.block_sparse_moe.experts.197.w1", "model.layers.61.block_sparse_moe.experts.198.w1", "model.layers.61.block_sparse_moe.experts.199.w1", "model.layers.61.block_sparse_moe.experts.200.w1", "model.layers.61.block_sparse_moe.experts.201.w1", "model.layers.61.block_sparse_moe.experts.202.w1", "model.layers.61.block_sparse_moe.experts.203.w1", "model.layers.61.block_sparse_moe.experts.204.w1", "model.layers.61.block_sparse_moe.experts.205.w1", "model.layers.61.block_sparse_moe.experts.206.w1", "model.layers.61.block_sparse_moe.experts.207.w1", "model.layers.61.block_sparse_moe.experts.208.w1", "model.layers.61.block_sparse_moe.experts.209.w1", "model.layers.61.block_sparse_moe.experts.210.w1", "model.layers.61.block_sparse_moe.experts.211.w1", "model.layers.61.block_sparse_moe.experts.212.w1", "model.layers.61.block_sparse_moe.experts.213.w1", "model.layers.61.block_sparse_moe.experts.214.w1", "model.layers.61.block_sparse_moe.experts.215.w1", "model.layers.61.block_sparse_moe.experts.216.w1", "model.layers.61.block_sparse_moe.experts.217.w1", "model.layers.61.block_sparse_moe.experts.218.w1", "model.layers.61.block_sparse_moe.experts.219.w1", "model.layers.61.block_sparse_moe.experts.220.w1", "model.layers.61.block_sparse_moe.experts.221.w1", "model.layers.61.block_sparse_moe.experts.222.w1", "model.layers.61.block_sparse_moe.experts.223.w1", "model.layers.61.block_sparse_moe.experts.224.w1", "model.layers.61.block_sparse_moe.experts.225.w1", "model.layers.61.block_sparse_moe.experts.226.w1", "model.layers.61.block_sparse_moe.experts.227.w1", "model.layers.61.block_sparse_moe.experts.228.w1", "model.layers.61.block_sparse_moe.experts.229.w1", "model.layers.61.block_sparse_moe.experts.230.w1", "model.layers.61.block_sparse_moe.experts.231.w1", "model.layers.61.block_sparse_moe.experts.232.w1", "model.layers.61.block_sparse_moe.experts.233.w1", "model.layers.61.block_sparse_moe.experts.234.w1", "model.layers.61.block_sparse_moe.experts.235.w1", "model.layers.61.block_sparse_moe.experts.236.w1", "model.layers.61.block_sparse_moe.experts.237.w1", "model.layers.61.block_sparse_moe.experts.238.w1", "model.layers.61.block_sparse_moe.experts.239.w1", "model.layers.61.block_sparse_moe.experts.240.w1", "model.layers.61.block_sparse_moe.experts.241.w1", "model.layers.61.block_sparse_moe.experts.242.w1", "model.layers.61.block_sparse_moe.experts.243.w1", "model.layers.61.block_sparse_moe.experts.244.w1", "model.layers.61.block_sparse_moe.experts.245.w1", "model.layers.61.block_sparse_moe.experts.246.w1", "model.layers.61.block_sparse_moe.experts.247.w1", "model.layers.61.block_sparse_moe.experts.248.w1", "model.layers.61.block_sparse_moe.experts.249.w1", "model.layers.61.block_sparse_moe.experts.250.w1", "model.layers.61.block_sparse_moe.experts.251.w1", "model.layers.61.block_sparse_moe.experts.252.w1", "model.layers.61.block_sparse_moe.experts.253.w1", "model.layers.61.block_sparse_moe.experts.254.w1", "model.layers.61.block_sparse_moe.experts.255.w1", "model.layers.61.block_sparse_moe.experts.0.w3", "model.layers.61.block_sparse_moe.experts.1.w3", "model.layers.61.block_sparse_moe.experts.2.w3", "model.layers.61.block_sparse_moe.experts.3.w3", "model.layers.61.block_sparse_moe.experts.4.w3", "model.layers.61.block_sparse_moe.experts.5.w3", "model.layers.61.block_sparse_moe.experts.6.w3", "model.layers.61.block_sparse_moe.experts.7.w3", "model.layers.61.block_sparse_moe.experts.8.w3", "model.layers.61.block_sparse_moe.experts.9.w3", "model.layers.61.block_sparse_moe.experts.10.w3", "model.layers.61.block_sparse_moe.experts.11.w3", "model.layers.61.block_sparse_moe.experts.12.w3", "model.layers.61.block_sparse_moe.experts.13.w3", "model.layers.61.block_sparse_moe.experts.14.w3", "model.layers.61.block_sparse_moe.experts.15.w3", "model.layers.61.block_sparse_moe.experts.16.w3", "model.layers.61.block_sparse_moe.experts.17.w3", "model.layers.61.block_sparse_moe.experts.18.w3", "model.layers.61.block_sparse_moe.experts.19.w3", "model.layers.61.block_sparse_moe.experts.20.w3", "model.layers.61.block_sparse_moe.experts.21.w3", "model.layers.61.block_sparse_moe.experts.22.w3", "model.layers.61.block_sparse_moe.experts.23.w3", "model.layers.61.block_sparse_moe.experts.24.w3", "model.layers.61.block_sparse_moe.experts.25.w3", "model.layers.61.block_sparse_moe.experts.26.w3", "model.layers.61.block_sparse_moe.experts.27.w3", "model.layers.61.block_sparse_moe.experts.28.w3", "model.layers.61.block_sparse_moe.experts.29.w3", "model.layers.61.block_sparse_moe.experts.30.w3", "model.layers.61.block_sparse_moe.experts.31.w3", "model.layers.61.block_sparse_moe.experts.32.w3", "model.layers.61.block_sparse_moe.experts.33.w3", "model.layers.61.block_sparse_moe.experts.34.w3", "model.layers.61.block_sparse_moe.experts.35.w3", "model.layers.61.block_sparse_moe.experts.36.w3", "model.layers.61.block_sparse_moe.experts.37.w3", "model.layers.61.block_sparse_moe.experts.38.w3", "model.layers.61.block_sparse_moe.experts.39.w3", "model.layers.61.block_sparse_moe.experts.40.w3", "model.layers.61.block_sparse_moe.experts.41.w3", "model.layers.61.block_sparse_moe.experts.42.w3", "model.layers.61.block_sparse_moe.experts.43.w3", "model.layers.61.block_sparse_moe.experts.44.w3", "model.layers.61.block_sparse_moe.experts.45.w3", "model.layers.61.block_sparse_moe.experts.46.w3", "model.layers.61.block_sparse_moe.experts.47.w3", "model.layers.61.block_sparse_moe.experts.48.w3", "model.layers.61.block_sparse_moe.experts.49.w3", "model.layers.61.block_sparse_moe.experts.50.w3", "model.layers.61.block_sparse_moe.experts.51.w3", "model.layers.61.block_sparse_moe.experts.52.w3", "model.layers.61.block_sparse_moe.experts.53.w3", "model.layers.61.block_sparse_moe.experts.54.w3", "model.layers.61.block_sparse_moe.experts.55.w3", "model.layers.61.block_sparse_moe.experts.56.w3", "model.layers.61.block_sparse_moe.experts.57.w3", "model.layers.61.block_sparse_moe.experts.58.w3", "model.layers.61.block_sparse_moe.experts.59.w3", "model.layers.61.block_sparse_moe.experts.60.w3", "model.layers.61.block_sparse_moe.experts.61.w3", "model.layers.61.block_sparse_moe.experts.62.w3", "model.layers.61.block_sparse_moe.experts.63.w3", "model.layers.61.block_sparse_moe.experts.64.w3", "model.layers.61.block_sparse_moe.experts.65.w3", "model.layers.61.block_sparse_moe.experts.66.w3", "model.layers.61.block_sparse_moe.experts.67.w3", "model.layers.61.block_sparse_moe.experts.68.w3", "model.layers.61.block_sparse_moe.experts.69.w3", "model.layers.61.block_sparse_moe.experts.70.w3", "model.layers.61.block_sparse_moe.experts.71.w3", "model.layers.61.block_sparse_moe.experts.72.w3", "model.layers.61.block_sparse_moe.experts.73.w3", "model.layers.61.block_sparse_moe.experts.74.w3", "model.layers.61.block_sparse_moe.experts.75.w3", "model.layers.61.block_sparse_moe.experts.76.w3", "model.layers.61.block_sparse_moe.experts.77.w3", "model.layers.61.block_sparse_moe.experts.78.w3", "model.layers.61.block_sparse_moe.experts.79.w3", "model.layers.61.block_sparse_moe.experts.80.w3", "model.layers.61.block_sparse_moe.experts.81.w3", "model.layers.61.block_sparse_moe.experts.82.w3", "model.layers.61.block_sparse_moe.experts.83.w3", "model.layers.61.block_sparse_moe.experts.84.w3", "model.layers.61.block_sparse_moe.experts.85.w3", "model.layers.61.block_sparse_moe.experts.86.w3", "model.layers.61.block_sparse_moe.experts.87.w3", "model.layers.61.block_sparse_moe.experts.88.w3", "model.layers.61.block_sparse_moe.experts.89.w3", "model.layers.61.block_sparse_moe.experts.90.w3", "model.layers.61.block_sparse_moe.experts.91.w3", "model.layers.61.block_sparse_moe.experts.92.w3", "model.layers.61.block_sparse_moe.experts.93.w3", "model.layers.61.block_sparse_moe.experts.94.w3", "model.layers.61.block_sparse_moe.experts.95.w3", "model.layers.61.block_sparse_moe.experts.96.w3", "model.layers.61.block_sparse_moe.experts.97.w3", "model.layers.61.block_sparse_moe.experts.98.w3", "model.layers.61.block_sparse_moe.experts.99.w3", "model.layers.61.block_sparse_moe.experts.100.w3", "model.layers.61.block_sparse_moe.experts.101.w3", "model.layers.61.block_sparse_moe.experts.102.w3", "model.layers.61.block_sparse_moe.experts.103.w3", "model.layers.61.block_sparse_moe.experts.104.w3", "model.layers.61.block_sparse_moe.experts.105.w3", "model.layers.61.block_sparse_moe.experts.106.w3", "model.layers.61.block_sparse_moe.experts.107.w3", "model.layers.61.block_sparse_moe.experts.108.w3", "model.layers.61.block_sparse_moe.experts.109.w3", "model.layers.61.block_sparse_moe.experts.110.w3", "model.layers.61.block_sparse_moe.experts.111.w3", "model.layers.61.block_sparse_moe.experts.112.w3", "model.layers.61.block_sparse_moe.experts.113.w3", "model.layers.61.block_sparse_moe.experts.114.w3", "model.layers.61.block_sparse_moe.experts.115.w3", "model.layers.61.block_sparse_moe.experts.116.w3", "model.layers.61.block_sparse_moe.experts.117.w3", "model.layers.61.block_sparse_moe.experts.118.w3", "model.layers.61.block_sparse_moe.experts.119.w3", "model.layers.61.block_sparse_moe.experts.120.w3", "model.layers.61.block_sparse_moe.experts.121.w3", "model.layers.61.block_sparse_moe.experts.122.w3", "model.layers.61.block_sparse_moe.experts.123.w3", "model.layers.61.block_sparse_moe.experts.124.w3", "model.layers.61.block_sparse_moe.experts.125.w3", "model.layers.61.block_sparse_moe.experts.126.w3", "model.layers.61.block_sparse_moe.experts.127.w3", "model.layers.61.block_sparse_moe.experts.128.w3", "model.layers.61.block_sparse_moe.experts.129.w3", "model.layers.61.block_sparse_moe.experts.130.w3", "model.layers.61.block_sparse_moe.experts.131.w3", "model.layers.61.block_sparse_moe.experts.132.w3", "model.layers.61.block_sparse_moe.experts.133.w3", "model.layers.61.block_sparse_moe.experts.134.w3", "model.layers.61.block_sparse_moe.experts.135.w3", "model.layers.61.block_sparse_moe.experts.136.w3", "model.layers.61.block_sparse_moe.experts.137.w3", "model.layers.61.block_sparse_moe.experts.138.w3", "model.layers.61.block_sparse_moe.experts.139.w3", "model.layers.61.block_sparse_moe.experts.140.w3", "model.layers.61.block_sparse_moe.experts.141.w3", "model.layers.61.block_sparse_moe.experts.142.w3", "model.layers.61.block_sparse_moe.experts.143.w3", "model.layers.61.block_sparse_moe.experts.144.w3", "model.layers.61.block_sparse_moe.experts.145.w3", "model.layers.61.block_sparse_moe.experts.146.w3", "model.layers.61.block_sparse_moe.experts.147.w3", "model.layers.61.block_sparse_moe.experts.148.w3", "model.layers.61.block_sparse_moe.experts.149.w3", "model.layers.61.block_sparse_moe.experts.150.w3", "model.layers.61.block_sparse_moe.experts.151.w3", "model.layers.61.block_sparse_moe.experts.152.w3", "model.layers.61.block_sparse_moe.experts.153.w3", "model.layers.61.block_sparse_moe.experts.154.w3", "model.layers.61.block_sparse_moe.experts.155.w3", "model.layers.61.block_sparse_moe.experts.156.w3", "model.layers.61.block_sparse_moe.experts.157.w3", "model.layers.61.block_sparse_moe.experts.158.w3", "model.layers.61.block_sparse_moe.experts.159.w3", "model.layers.61.block_sparse_moe.experts.160.w3", "model.layers.61.block_sparse_moe.experts.161.w3", "model.layers.61.block_sparse_moe.experts.162.w3", "model.layers.61.block_sparse_moe.experts.163.w3", "model.layers.61.block_sparse_moe.experts.164.w3", "model.layers.61.block_sparse_moe.experts.165.w3", "model.layers.61.block_sparse_moe.experts.166.w3", "model.layers.61.block_sparse_moe.experts.167.w3", "model.layers.61.block_sparse_moe.experts.168.w3", "model.layers.61.block_sparse_moe.experts.169.w3", "model.layers.61.block_sparse_moe.experts.170.w3", "model.layers.61.block_sparse_moe.experts.171.w3", "model.layers.61.block_sparse_moe.experts.172.w3", "model.layers.61.block_sparse_moe.experts.173.w3", "model.layers.61.block_sparse_moe.experts.174.w3", "model.layers.61.block_sparse_moe.experts.175.w3", "model.layers.61.block_sparse_moe.experts.176.w3", "model.layers.61.block_sparse_moe.experts.177.w3", "model.layers.61.block_sparse_moe.experts.178.w3", "model.layers.61.block_sparse_moe.experts.179.w3", "model.layers.61.block_sparse_moe.experts.180.w3", "model.layers.61.block_sparse_moe.experts.181.w3", "model.layers.61.block_sparse_moe.experts.182.w3", "model.layers.61.block_sparse_moe.experts.183.w3", "model.layers.61.block_sparse_moe.experts.184.w3", "model.layers.61.block_sparse_moe.experts.185.w3", "model.layers.61.block_sparse_moe.experts.186.w3", "model.layers.61.block_sparse_moe.experts.187.w3", "model.layers.61.block_sparse_moe.experts.188.w3", "model.layers.61.block_sparse_moe.experts.189.w3", "model.layers.61.block_sparse_moe.experts.190.w3", "model.layers.61.block_sparse_moe.experts.191.w3", "model.layers.61.block_sparse_moe.experts.192.w3", "model.layers.61.block_sparse_moe.experts.193.w3", "model.layers.61.block_sparse_moe.experts.194.w3", "model.layers.61.block_sparse_moe.experts.195.w3", "model.layers.61.block_sparse_moe.experts.196.w3", "model.layers.61.block_sparse_moe.experts.197.w3", "model.layers.61.block_sparse_moe.experts.198.w3", "model.layers.61.block_sparse_moe.experts.199.w3", "model.layers.61.block_sparse_moe.experts.200.w3", "model.layers.61.block_sparse_moe.experts.201.w3", "model.layers.61.block_sparse_moe.experts.202.w3", "model.layers.61.block_sparse_moe.experts.203.w3", "model.layers.61.block_sparse_moe.experts.204.w3", "model.layers.61.block_sparse_moe.experts.205.w3", "model.layers.61.block_sparse_moe.experts.206.w3", "model.layers.61.block_sparse_moe.experts.207.w3", "model.layers.61.block_sparse_moe.experts.208.w3", "model.layers.61.block_sparse_moe.experts.209.w3", "model.layers.61.block_sparse_moe.experts.210.w3", "model.layers.61.block_sparse_moe.experts.211.w3", "model.layers.61.block_sparse_moe.experts.212.w3", "model.layers.61.block_sparse_moe.experts.213.w3", "model.layers.61.block_sparse_moe.experts.214.w3", "model.layers.61.block_sparse_moe.experts.215.w3", "model.layers.61.block_sparse_moe.experts.216.w3", "model.layers.61.block_sparse_moe.experts.217.w3", "model.layers.61.block_sparse_moe.experts.218.w3", "model.layers.61.block_sparse_moe.experts.219.w3", "model.layers.61.block_sparse_moe.experts.220.w3", "model.layers.61.block_sparse_moe.experts.221.w3", "model.layers.61.block_sparse_moe.experts.222.w3", "model.layers.61.block_sparse_moe.experts.223.w3", "model.layers.61.block_sparse_moe.experts.224.w3", "model.layers.61.block_sparse_moe.experts.225.w3", "model.layers.61.block_sparse_moe.experts.226.w3", "model.layers.61.block_sparse_moe.experts.227.w3", "model.layers.61.block_sparse_moe.experts.228.w3", "model.layers.61.block_sparse_moe.experts.229.w3", "model.layers.61.block_sparse_moe.experts.230.w3", "model.layers.61.block_sparse_moe.experts.231.w3", "model.layers.61.block_sparse_moe.experts.232.w3", "model.layers.61.block_sparse_moe.experts.233.w3", "model.layers.61.block_sparse_moe.experts.234.w3", "model.layers.61.block_sparse_moe.experts.235.w3", "model.layers.61.block_sparse_moe.experts.236.w3", "model.layers.61.block_sparse_moe.experts.237.w3", "model.layers.61.block_sparse_moe.experts.238.w3", "model.layers.61.block_sparse_moe.experts.239.w3", "model.layers.61.block_sparse_moe.experts.240.w3", "model.layers.61.block_sparse_moe.experts.241.w3", "model.layers.61.block_sparse_moe.experts.242.w3", "model.layers.61.block_sparse_moe.experts.243.w3", "model.layers.61.block_sparse_moe.experts.244.w3", "model.layers.61.block_sparse_moe.experts.245.w3", "model.layers.61.block_sparse_moe.experts.246.w3", "model.layers.61.block_sparse_moe.experts.247.w3", "model.layers.61.block_sparse_moe.experts.248.w3", "model.layers.61.block_sparse_moe.experts.249.w3", "model.layers.61.block_sparse_moe.experts.250.w3", "model.layers.61.block_sparse_moe.experts.251.w3", "model.layers.61.block_sparse_moe.experts.252.w3", "model.layers.61.block_sparse_moe.experts.253.w3", "model.layers.61.block_sparse_moe.experts.254.w3", "model.layers.61.block_sparse_moe.experts.255.w3" ], "candidates": [ { "dkld": -0.000986880436539661, "dbits": 2415919104 } ] }, { "idx": 309, "layers": [ "model.layers.61.block_sparse_moe.experts.0.w2", "model.layers.61.block_sparse_moe.experts.1.w2", "model.layers.61.block_sparse_moe.experts.2.w2", "model.layers.61.block_sparse_moe.experts.3.w2", "model.layers.61.block_sparse_moe.experts.4.w2", "model.layers.61.block_sparse_moe.experts.5.w2", "model.layers.61.block_sparse_moe.experts.6.w2", "model.layers.61.block_sparse_moe.experts.7.w2", "model.layers.61.block_sparse_moe.experts.8.w2", "model.layers.61.block_sparse_moe.experts.9.w2", "model.layers.61.block_sparse_moe.experts.10.w2", "model.layers.61.block_sparse_moe.experts.11.w2", "model.layers.61.block_sparse_moe.experts.12.w2", "model.layers.61.block_sparse_moe.experts.13.w2", "model.layers.61.block_sparse_moe.experts.14.w2", "model.layers.61.block_sparse_moe.experts.15.w2", "model.layers.61.block_sparse_moe.experts.16.w2", "model.layers.61.block_sparse_moe.experts.17.w2", "model.layers.61.block_sparse_moe.experts.18.w2", "model.layers.61.block_sparse_moe.experts.19.w2", "model.layers.61.block_sparse_moe.experts.20.w2", "model.layers.61.block_sparse_moe.experts.21.w2", "model.layers.61.block_sparse_moe.experts.22.w2", "model.layers.61.block_sparse_moe.experts.23.w2", "model.layers.61.block_sparse_moe.experts.24.w2", "model.layers.61.block_sparse_moe.experts.25.w2", "model.layers.61.block_sparse_moe.experts.26.w2", "model.layers.61.block_sparse_moe.experts.27.w2", "model.layers.61.block_sparse_moe.experts.28.w2", "model.layers.61.block_sparse_moe.experts.29.w2", "model.layers.61.block_sparse_moe.experts.30.w2", "model.layers.61.block_sparse_moe.experts.31.w2", "model.layers.61.block_sparse_moe.experts.32.w2", "model.layers.61.block_sparse_moe.experts.33.w2", "model.layers.61.block_sparse_moe.experts.34.w2", "model.layers.61.block_sparse_moe.experts.35.w2", "model.layers.61.block_sparse_moe.experts.36.w2", "model.layers.61.block_sparse_moe.experts.37.w2", "model.layers.61.block_sparse_moe.experts.38.w2", "model.layers.61.block_sparse_moe.experts.39.w2", "model.layers.61.block_sparse_moe.experts.40.w2", "model.layers.61.block_sparse_moe.experts.41.w2", "model.layers.61.block_sparse_moe.experts.42.w2", "model.layers.61.block_sparse_moe.experts.43.w2", "model.layers.61.block_sparse_moe.experts.44.w2", "model.layers.61.block_sparse_moe.experts.45.w2", "model.layers.61.block_sparse_moe.experts.46.w2", "model.layers.61.block_sparse_moe.experts.47.w2", "model.layers.61.block_sparse_moe.experts.48.w2", "model.layers.61.block_sparse_moe.experts.49.w2", "model.layers.61.block_sparse_moe.experts.50.w2", "model.layers.61.block_sparse_moe.experts.51.w2", "model.layers.61.block_sparse_moe.experts.52.w2", "model.layers.61.block_sparse_moe.experts.53.w2", "model.layers.61.block_sparse_moe.experts.54.w2", "model.layers.61.block_sparse_moe.experts.55.w2", "model.layers.61.block_sparse_moe.experts.56.w2", "model.layers.61.block_sparse_moe.experts.57.w2", "model.layers.61.block_sparse_moe.experts.58.w2", "model.layers.61.block_sparse_moe.experts.59.w2", "model.layers.61.block_sparse_moe.experts.60.w2", "model.layers.61.block_sparse_moe.experts.61.w2", "model.layers.61.block_sparse_moe.experts.62.w2", "model.layers.61.block_sparse_moe.experts.63.w2", "model.layers.61.block_sparse_moe.experts.64.w2", "model.layers.61.block_sparse_moe.experts.65.w2", "model.layers.61.block_sparse_moe.experts.66.w2", "model.layers.61.block_sparse_moe.experts.67.w2", "model.layers.61.block_sparse_moe.experts.68.w2", "model.layers.61.block_sparse_moe.experts.69.w2", "model.layers.61.block_sparse_moe.experts.70.w2", "model.layers.61.block_sparse_moe.experts.71.w2", "model.layers.61.block_sparse_moe.experts.72.w2", "model.layers.61.block_sparse_moe.experts.73.w2", "model.layers.61.block_sparse_moe.experts.74.w2", "model.layers.61.block_sparse_moe.experts.75.w2", "model.layers.61.block_sparse_moe.experts.76.w2", "model.layers.61.block_sparse_moe.experts.77.w2", "model.layers.61.block_sparse_moe.experts.78.w2", "model.layers.61.block_sparse_moe.experts.79.w2", "model.layers.61.block_sparse_moe.experts.80.w2", "model.layers.61.block_sparse_moe.experts.81.w2", "model.layers.61.block_sparse_moe.experts.82.w2", "model.layers.61.block_sparse_moe.experts.83.w2", "model.layers.61.block_sparse_moe.experts.84.w2", "model.layers.61.block_sparse_moe.experts.85.w2", "model.layers.61.block_sparse_moe.experts.86.w2", "model.layers.61.block_sparse_moe.experts.87.w2", "model.layers.61.block_sparse_moe.experts.88.w2", "model.layers.61.block_sparse_moe.experts.89.w2", "model.layers.61.block_sparse_moe.experts.90.w2", "model.layers.61.block_sparse_moe.experts.91.w2", "model.layers.61.block_sparse_moe.experts.92.w2", "model.layers.61.block_sparse_moe.experts.93.w2", "model.layers.61.block_sparse_moe.experts.94.w2", "model.layers.61.block_sparse_moe.experts.95.w2", "model.layers.61.block_sparse_moe.experts.96.w2", "model.layers.61.block_sparse_moe.experts.97.w2", "model.layers.61.block_sparse_moe.experts.98.w2", "model.layers.61.block_sparse_moe.experts.99.w2", "model.layers.61.block_sparse_moe.experts.100.w2", "model.layers.61.block_sparse_moe.experts.101.w2", "model.layers.61.block_sparse_moe.experts.102.w2", "model.layers.61.block_sparse_moe.experts.103.w2", "model.layers.61.block_sparse_moe.experts.104.w2", "model.layers.61.block_sparse_moe.experts.105.w2", "model.layers.61.block_sparse_moe.experts.106.w2", "model.layers.61.block_sparse_moe.experts.107.w2", "model.layers.61.block_sparse_moe.experts.108.w2", "model.layers.61.block_sparse_moe.experts.109.w2", "model.layers.61.block_sparse_moe.experts.110.w2", "model.layers.61.block_sparse_moe.experts.111.w2", "model.layers.61.block_sparse_moe.experts.112.w2", "model.layers.61.block_sparse_moe.experts.113.w2", "model.layers.61.block_sparse_moe.experts.114.w2", "model.layers.61.block_sparse_moe.experts.115.w2", "model.layers.61.block_sparse_moe.experts.116.w2", "model.layers.61.block_sparse_moe.experts.117.w2", "model.layers.61.block_sparse_moe.experts.118.w2", "model.layers.61.block_sparse_moe.experts.119.w2", "model.layers.61.block_sparse_moe.experts.120.w2", "model.layers.61.block_sparse_moe.experts.121.w2", "model.layers.61.block_sparse_moe.experts.122.w2", "model.layers.61.block_sparse_moe.experts.123.w2", "model.layers.61.block_sparse_moe.experts.124.w2", "model.layers.61.block_sparse_moe.experts.125.w2", "model.layers.61.block_sparse_moe.experts.126.w2", "model.layers.61.block_sparse_moe.experts.127.w2", "model.layers.61.block_sparse_moe.experts.128.w2", "model.layers.61.block_sparse_moe.experts.129.w2", "model.layers.61.block_sparse_moe.experts.130.w2", "model.layers.61.block_sparse_moe.experts.131.w2", "model.layers.61.block_sparse_moe.experts.132.w2", "model.layers.61.block_sparse_moe.experts.133.w2", "model.layers.61.block_sparse_moe.experts.134.w2", "model.layers.61.block_sparse_moe.experts.135.w2", "model.layers.61.block_sparse_moe.experts.136.w2", "model.layers.61.block_sparse_moe.experts.137.w2", "model.layers.61.block_sparse_moe.experts.138.w2", "model.layers.61.block_sparse_moe.experts.139.w2", "model.layers.61.block_sparse_moe.experts.140.w2", "model.layers.61.block_sparse_moe.experts.141.w2", "model.layers.61.block_sparse_moe.experts.142.w2", "model.layers.61.block_sparse_moe.experts.143.w2", "model.layers.61.block_sparse_moe.experts.144.w2", "model.layers.61.block_sparse_moe.experts.145.w2", "model.layers.61.block_sparse_moe.experts.146.w2", "model.layers.61.block_sparse_moe.experts.147.w2", "model.layers.61.block_sparse_moe.experts.148.w2", "model.layers.61.block_sparse_moe.experts.149.w2", "model.layers.61.block_sparse_moe.experts.150.w2", "model.layers.61.block_sparse_moe.experts.151.w2", "model.layers.61.block_sparse_moe.experts.152.w2", "model.layers.61.block_sparse_moe.experts.153.w2", "model.layers.61.block_sparse_moe.experts.154.w2", "model.layers.61.block_sparse_moe.experts.155.w2", "model.layers.61.block_sparse_moe.experts.156.w2", "model.layers.61.block_sparse_moe.experts.157.w2", "model.layers.61.block_sparse_moe.experts.158.w2", "model.layers.61.block_sparse_moe.experts.159.w2", "model.layers.61.block_sparse_moe.experts.160.w2", "model.layers.61.block_sparse_moe.experts.161.w2", "model.layers.61.block_sparse_moe.experts.162.w2", "model.layers.61.block_sparse_moe.experts.163.w2", "model.layers.61.block_sparse_moe.experts.164.w2", "model.layers.61.block_sparse_moe.experts.165.w2", "model.layers.61.block_sparse_moe.experts.166.w2", "model.layers.61.block_sparse_moe.experts.167.w2", "model.layers.61.block_sparse_moe.experts.168.w2", "model.layers.61.block_sparse_moe.experts.169.w2", "model.layers.61.block_sparse_moe.experts.170.w2", "model.layers.61.block_sparse_moe.experts.171.w2", "model.layers.61.block_sparse_moe.experts.172.w2", "model.layers.61.block_sparse_moe.experts.173.w2", "model.layers.61.block_sparse_moe.experts.174.w2", "model.layers.61.block_sparse_moe.experts.175.w2", "model.layers.61.block_sparse_moe.experts.176.w2", "model.layers.61.block_sparse_moe.experts.177.w2", "model.layers.61.block_sparse_moe.experts.178.w2", "model.layers.61.block_sparse_moe.experts.179.w2", "model.layers.61.block_sparse_moe.experts.180.w2", "model.layers.61.block_sparse_moe.experts.181.w2", "model.layers.61.block_sparse_moe.experts.182.w2", "model.layers.61.block_sparse_moe.experts.183.w2", "model.layers.61.block_sparse_moe.experts.184.w2", "model.layers.61.block_sparse_moe.experts.185.w2", "model.layers.61.block_sparse_moe.experts.186.w2", "model.layers.61.block_sparse_moe.experts.187.w2", "model.layers.61.block_sparse_moe.experts.188.w2", "model.layers.61.block_sparse_moe.experts.189.w2", "model.layers.61.block_sparse_moe.experts.190.w2", "model.layers.61.block_sparse_moe.experts.191.w2", "model.layers.61.block_sparse_moe.experts.192.w2", "model.layers.61.block_sparse_moe.experts.193.w2", "model.layers.61.block_sparse_moe.experts.194.w2", "model.layers.61.block_sparse_moe.experts.195.w2", "model.layers.61.block_sparse_moe.experts.196.w2", "model.layers.61.block_sparse_moe.experts.197.w2", "model.layers.61.block_sparse_moe.experts.198.w2", "model.layers.61.block_sparse_moe.experts.199.w2", "model.layers.61.block_sparse_moe.experts.200.w2", "model.layers.61.block_sparse_moe.experts.201.w2", "model.layers.61.block_sparse_moe.experts.202.w2", "model.layers.61.block_sparse_moe.experts.203.w2", "model.layers.61.block_sparse_moe.experts.204.w2", "model.layers.61.block_sparse_moe.experts.205.w2", "model.layers.61.block_sparse_moe.experts.206.w2", "model.layers.61.block_sparse_moe.experts.207.w2", "model.layers.61.block_sparse_moe.experts.208.w2", "model.layers.61.block_sparse_moe.experts.209.w2", "model.layers.61.block_sparse_moe.experts.210.w2", "model.layers.61.block_sparse_moe.experts.211.w2", "model.layers.61.block_sparse_moe.experts.212.w2", "model.layers.61.block_sparse_moe.experts.213.w2", "model.layers.61.block_sparse_moe.experts.214.w2", "model.layers.61.block_sparse_moe.experts.215.w2", "model.layers.61.block_sparse_moe.experts.216.w2", "model.layers.61.block_sparse_moe.experts.217.w2", "model.layers.61.block_sparse_moe.experts.218.w2", "model.layers.61.block_sparse_moe.experts.219.w2", "model.layers.61.block_sparse_moe.experts.220.w2", "model.layers.61.block_sparse_moe.experts.221.w2", "model.layers.61.block_sparse_moe.experts.222.w2", "model.layers.61.block_sparse_moe.experts.223.w2", "model.layers.61.block_sparse_moe.experts.224.w2", "model.layers.61.block_sparse_moe.experts.225.w2", "model.layers.61.block_sparse_moe.experts.226.w2", "model.layers.61.block_sparse_moe.experts.227.w2", "model.layers.61.block_sparse_moe.experts.228.w2", "model.layers.61.block_sparse_moe.experts.229.w2", "model.layers.61.block_sparse_moe.experts.230.w2", "model.layers.61.block_sparse_moe.experts.231.w2", "model.layers.61.block_sparse_moe.experts.232.w2", "model.layers.61.block_sparse_moe.experts.233.w2", "model.layers.61.block_sparse_moe.experts.234.w2", "model.layers.61.block_sparse_moe.experts.235.w2", "model.layers.61.block_sparse_moe.experts.236.w2", "model.layers.61.block_sparse_moe.experts.237.w2", "model.layers.61.block_sparse_moe.experts.238.w2", "model.layers.61.block_sparse_moe.experts.239.w2", "model.layers.61.block_sparse_moe.experts.240.w2", "model.layers.61.block_sparse_moe.experts.241.w2", "model.layers.61.block_sparse_moe.experts.242.w2", "model.layers.61.block_sparse_moe.experts.243.w2", "model.layers.61.block_sparse_moe.experts.244.w2", "model.layers.61.block_sparse_moe.experts.245.w2", "model.layers.61.block_sparse_moe.experts.246.w2", "model.layers.61.block_sparse_moe.experts.247.w2", "model.layers.61.block_sparse_moe.experts.248.w2", "model.layers.61.block_sparse_moe.experts.249.w2", "model.layers.61.block_sparse_moe.experts.250.w2", "model.layers.61.block_sparse_moe.experts.251.w2", "model.layers.61.block_sparse_moe.experts.252.w2", "model.layers.61.block_sparse_moe.experts.253.w2", "model.layers.61.block_sparse_moe.experts.254.w2", "model.layers.61.block_sparse_moe.experts.255.w2" ], "candidates": [ { "dkld": -3.4084916114840436e-05, "dbits": 1207959552 } ] } ], "base_kld": 0.25861078016459943, "arch_string": "MiniMaxM2ForCausalLM" }