Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions aiter/configs/model_configs/dsv4_fp8fp4_tuned_fmoe.csv
Original file line number Diff line number Diff line change
Expand Up @@ -213,3 +213,35 @@ cu_num,token,model_dim,inter_dim,expert,topk,act_type,dtype,q_dtype_a,q_dtype_w,
256,16384,4096,256,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,329.48530000000005,cktile_a8w4_bm64,0.0,639.364,cktile_a8w4_bm64,0.0,968.8493,0,0,0.0,0.0,flydsl_fallback
256,32768,4096,256,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,614.7645,cktile_a8w4_bm64,0.0,1269.2433,cktile_a8w4_bm64,0.0,1884.0078,0,0,0.0,0.0,flydsl_fallback
256,131072,4096,256,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,2501.0829,cktile_a8w4_bm64,0.0,5285.1745,cktile_a8w4_bm64,0.0,7786.2574,0,0,0.0,0.0,flydsl_fallback
256,1,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,13.1954,flydsl_moe1_afp8_wfp4_bf16_t32x64x256_w2_gui_kw2_fp8,0.0%,11.3628,flydsl_moe2_afp8_wfp4_bf16_t32x128x256_reduce_bnt2,0.0%,24.5582,0,0,12.3,262334.5,
256,2,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,20.1426,flydsl_moe1_afp8_wfp4_bf16_t32x64x256_w3_gui_fp8,0.0%,14.7358,flydsl_moe2_afp8_wfp4_bf16_t32x128x256_reduce_persist,0.0%,34.8784,0,0,17.32,184712.47,
256,4,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,35.6444,flydsl_moe1_afp8_wfp4_bf16_t32x64x256_w3_gui_fp8,0.0%,22.9261,flydsl_moe2_afp8_wfp4_bf16_t32x128x256_reduce_bnt2_persist,0.0%,58.5705,0,0,20.62,109995.65,
256,8,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,66.3011,flydsl_moe1_afp8_wfp4_bf16_t32x64x256_w4_gui_fp8,0.0%,39.381,flydsl_moe2_afp8_wfp4_bf16_t32x128x256_reduce_persist,0.0%,105.6821,0,0,22.86,60961.59,
256,16,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,124.9989,flydsl_moe1_afp8_wfp4_bf16_t32x64x256_w3_gui_kw2_fp8,0.0%,72.1626,cktile_a8w4_bm32,0.0%,197.1615,0,0,24.51,32677.01,
256,32,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,201.2978,flydsl_moe1_afp8_wfp4_bf16_t32x64x256_w3_gui_kw2_fp8,0.0%,110.8525,flydsl_moe2_afp8_wfp4_bf16_t32x128x128_reduce_bnt2_persist,0.0%,312.1503,0,0,30.96,20640.2,
256,64,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,274.0652,flydsl_moe1_afp8_wfp4_bf16_t32x64x256_w3_gui_kw2_fp8,0.0%,151.599,flydsl_moe2_afp8_wfp4_bf16_t32x128x256_atomic_persist,0.0%,425.6642,0,0,45.41,15136.9,
256,128,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,321.1872,flydsl_moe1_afp8_wfp4_bf16_t32x64x256_w4_gui_kw2_fp8,0.0%,178.1048,cktile_a8w4_bm32,0.0%,499.292,0,0,77.42,12906.32,
256,256,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,345.4854,flydsl_moe1_afp8_wfp4_bf16_t32x64x256_w4_gui_kw2_fp8,0.0%,193.9367,flydsl_moe2_afp8_wfp4_bf16_t32x128x128_atomic_bnt2_persist,0.0%,539.4221,0,0,143.32,11949.08,
256,512,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,356.8284,flydsl_moe1_afp8_wfp4_bf16_t32x64x256_w4_gui_kw2_fp8,0.0%,197.886,flydsl_moe2_afp8_wfp4_bf16_t32x128x256_atomic,0.0%,554.7144,0,0,278.74,11625.34,
256,1024,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,392.833,flydsl_moe1_afp8_wfp4_bf16_t32x128x256_w3_gui_fp8,0.0%,210.1492,flydsl_moe2_afp8_wfp4_bf16_t32x128x256_atomic,0.0%,602.9822,0,0,512.85,10705.18,
256,2048,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,419.4653,flydsl_moe1_afp8_wfp4_bf16_t64x128x256_w3_gui_fp8,0.0%,246.9579,flydsl_moe2_afp8_wfp4_bf16_t64x128x256_atomic_bnt2_persist,0.0%,666.4232,0,0,928.05,9704.97,
256,4096,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,563.2488,flydsl_moe1_afp8_wfp4_bf16_t128x256x256_w4_gui_fp8,0.0%,356.8566,flydsl_moe2_afp8_wfp4_bf16_t64x256x256_reduce_bnt2_persist_sbm128,0.0%,920.1054,0,0,1344.36,7056.56,
256,8192,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,1000.4671,flydsl_moe1_afp8_wfp4_bf16_t64x256x256_w4_bnt0_gui,0.0%,573.6539,flydsl_moe2_afp8_wfp4_bf16_t64x256x256_reduce_bnt2,0.0%,1574.121,0,0,1571.61,4156.68,
256,16384,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,1590.7982,flydsl_moe1_afp8_wfp4_bf16_t128x256x256_bnt0_gui_fp8,0.0%,1045.6778,flydsl_moe2_afp8_wfp4_bf16_t64x256x128_atomic_sbm128,0.0%,2636.476,0,0,1876.67,2519.95,
256,32768,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,2915.7859,flydsl_moe1_afp8_wfp4_bf16_t128x256x256_w2_bnt0_gui_fp8,0.0%,1991.2871,flydsl_moe2_afp8_wfp4_bf16_t64x256x128_atomic_bnt2_sbm128,0.0%,4907.073,0,0,2016.6,1394.95,
256,1,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,26.0098,cktile_a8w4_bm32,0.0,15.5499,cktile_a8w4_bm32,0.0,41.5597,0,0,0.0,0.0,flydsl_fallback
256,2,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,29.9158,cktile_a8w4_bm32,0.0,17.2712,cktile_a8w4_bm32,0.0,47.187,0,0,0.0,0.0,flydsl_fallback
256,4,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,46.4114,cktile_a8w4_bm32,0.0,25.4266,cktile_a8w4_bm32,0.0,71.838,0,0,0.0,0.0,flydsl_fallback
256,8,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,81.1698,cktile_a8w4_bm32,0.0,42.8618,cktile_a8w4_bm32,0.0,124.0316,0,0,0.0,0.0,flydsl_fallback
256,16,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,136.22549999999998,cktile_a8w4_bm32,0.0,72.1626,cktile_a8w4_bm32,0.0,208.3881,0,0,0.0,0.0,flydsl_fallback
256,32,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,216.1381,cktile_a8w4_bm32,0.0,113.7684,cktile_a8w4_bm32,0.0,329.9065,0,0,0.0,0.0,flydsl_fallback
256,64,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,286.9568,cktile_a8w4_bm32,0.0,153.4274,cktile_a8w4_bm32,0.0,440.3842,0,0,0.0,0.0,flydsl_fallback
256,128,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,337.0177,cktile_a8w4_bm32,0.0,178.1048,cktile_a8w4_bm32,0.0,515.1225,0,0,0.0,0.0,flydsl_fallback
256,256,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,361.5862,cktile_a8w4_bm32,0.0,194.9629,cktile_a8w4_bm32,0.0,556.5491,0,0,0.0,0.0,flydsl_fallback
256,512,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,396.1095,cktile_a8w4_bm32,0.0,215.6017,cktile_a8w4_bm32,0.0,611.7112,0,0,0.0,0.0,flydsl_fallback
256,1024,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,418.9211,cktile_a8w4_bm32,0.0,218.5121,cktile_a8w4_bm32,0.0,637.4332,0,0,0.0,0.0,flydsl_fallback
256,2048,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,483.1896,cktile_a8w4_bm64,0.0,268.486,cktile_a8w4_bm64,0.0,751.6756,0,0,0.0,0.0,flydsl_fallback
256,4096,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,730.8670999999999,cktile_a8w4_bm64,0.0,426.8268,cktile_a8w4_bm64,0.0,1157.6939,0,0,0.0,0.0,flydsl_fallback
256,8192,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,1288.683,cktile_a8w4_bm64,0.0,746.6747,cktile_a8w4_bm64,0.0,2035.3577,0,0,0.0,0.0,flydsl_fallback
256,16384,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,2307.6415,cktile_a8w4_bm64,0.0,1423.8539,cktile_a8w4_bm64,0.0,3731.4954,0,0,0.0,0.0,flydsl_fallback
256,32768,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,4420.6118,cktile_a8w4_bm64,0.0,2684.5026,cktile_a8w4_bm64,0.0,7105.1144,0,0,0.0,0.0,flydsl_fallback
18 changes: 17 additions & 1 deletion aiter/configs/model_configs/dsv4_fp8fp4_untuned_fmoe.csv
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,20 @@ token,model_dim,inter_dim,expert,topk,act_type,dtype,q_dtype_a,q_dtype_w,q_type,
4096,7168,1536,385,7,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
8192,7168,1536,385,7,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
16384,7168,1536,385,7,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
32768,7168,1536,385,7,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
32768,7168,1536,385,7,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
1,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
2,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
4,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
8,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
16,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
32,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
64,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
128,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
256,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
512,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
1024,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
2048,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
4096,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
8192,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
16384,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
32768,4096,2048,256,6,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
Loading
Loading