Skip to content
15 changes: 15 additions & 0 deletions aiter/configs/model_configs/gptoss_fp4_tuned_fmoe.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
cu_num,token,model_dim,inter_dim,expert,topk,act_type,dtype,q_dtype_a,q_dtype_w,q_type,use_g1u1,doweight_stage1,block_m,ksplit,us1,kernelName1,err1,us2,kernelName2,err2,us,run_1stage,tflops,bw,_tag
256,512,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,214.3544,flydsl_moe1_afp8_wfp4_bf16_t32x128x256_w2_gui_fp8,0.0%,111.631,flydsl_moe2_afp8_wfp4_bf16_t32x256x256_atomic_bnt2_persist,0.0%,325.9854,0,355.73,11131.16,
256,1024,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,235.2077,flydsl_moe1_afp8_wfp4_bf16_t64x256x256_gui_fp8,0.0%,125.5088,flydsl_moe2_afp8_wfp4_bf16_t64x256x256_atomic_bnt2,0.0%,360.7165,0,642.97,10072.5,
256,2048,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,312.5584,flydsl_moe1_afp8_wfp4_bf16_t64x256x256_w2_bnt0_gui_fp8,0.0%,172.1029,flydsl_moe2_afp8_wfp4_bf16_t64x128x256_atomic_persist,0.0%,484.6613,0,957.07,7516.08,
256,4096,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,442.3352,flydsl_moe1_afp8_wfp4_bf16_t128x256x256_w2_bnt0_gui_fp8,0.0%,256.1523,flydsl_moe2_afp8_wfp4_bf16_t64x128x256_atomic_persist_sbm128,0.0%,698.4875,0,1328.17,5242.22,
256,8192,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,714.6281,flydsl_moe1_afp8_wfp4_bf16_t128x256x256_bnt0_gui_fp8,0.0%,413.5452,flydsl_moe2_afp8_wfp4_bf16_t64x256x256_atomic_persist_sbm128,0.0%,1128.1733,0,1644.63,3279.08,
256,16384,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,1356.5778,flydsl_moe1_afp8_wfp4_bf16_t128x256x256_w2_bnt0_gui,0.0%,731.3886,flydsl_moe2_afp8_wfp4_bf16_t64x256x256_atomic_sbm128,0.0%,2087.9664,0,1777.26,1807.92,
256,32768,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,2474.7814,flydsl_moe1_afp8_wfp4_bf16_t64x256x256_w2_bnt0_gui_fp8,0.0%,1348.5732,flydsl_moe2_afp8_wfp4_bf16_t64x256x256_atomic_xcd4_persist,0.0%,3823.3546,0,1941.15,1026.81,
256,512,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,527.0715,cktile_a8w4_bm32,0.0,117.3402,cktile_a8w4_bm32,0.0,644.4117,0,0.0,0.0,flydsl_fallback
256,1024,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,286.4048,cktile_a8w4_bm32,0.0,142.6674,cktile_a8w4_bm32,0.0,429.0722,0,0.0,0.0,flydsl_fallback
256,2048,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,446.6267,cktile_a8w4_bm32,0.0,181.4069,cktile_a8w4_bm32,0.0,628.0336,0,0.0,0.0,flydsl_fallback
256,4096,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,786.0193,cktile_a8w4_bm32,0.0,275.9191,cktile_a8w4_bm32,0.0,1061.9384,0,0.0,0.0,flydsl_fallback
256,8192,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,1478.4233,cktile_a8w4_bm32,0.0,480.9397,cktile_a8w4_bm32,0.0,1959.363,0,0.0,0.0,flydsl_fallback
256,16384,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,2752.7649,cktile_a8w4_bm32,0.0,908.23,cktile_a8w4_bm32,0.0,3660.9949,0,0.0,0.0,flydsl_fallback
256,32768,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,5411.816400000001,cktile_a8w4_bm32,0.0,1750.1288,cktile_a8w4_bm32,0.0,7161.9452,0,0.0,0.0,flydsl_fallback
8 changes: 8 additions & 0 deletions aiter/configs/model_configs/gptoss_fp4_untuned_fmoe.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
token,model_dim,inter_dim,expert,topk,act_type,dtype,q_dtype_a,q_dtype_w,q_type,use_g1u1,doweight_stage1
512,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
1024,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
2048,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
4096,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
8192,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
16384,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
32768,3072,3072,128,4,ActivationType.Swiglu,torch.bfloat16,torch.float8_e4m3fn,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0
Loading
Loading