diff --git a/src/graph/backend/dnnl/kernels/layernorm.hpp b/src/graph/backend/dnnl/kernels/layernorm.hpp index 38fa8142c79..d83ae612479 100644 --- a/src/graph/backend/dnnl/kernels/layernorm.hpp +++ b/src/graph/backend/dnnl/kernels/layernorm.hpp @@ -91,6 +91,11 @@ struct layernorm_fwd_t : public kernel_base_t { BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_typecast_to_predecessor); BACKEND_DNNL_ADD_PASS(pipeline, remove_quant_data_with_no_effect); BACKEND_DNNL_ADD_PASS(pipeline, replace_quant_data_with_binary_post_op); + + // broadcast and swap should be before fuse_post_ops + BACKEND_DNNL_ADD_PASS(pipeline, binary_canonicalization); + BACKEND_DNNL_ADD_PASS(pipeline, binary_broadcast_swap); + BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_ops); BACKEND_DNNL_ADD_PASS(pipeline, convert_to_runtime_dst_scales); BACKEND_DNNL_ADD_PASS(pipeline, fuse_dst_scales); diff --git a/tests/benchdnn/inputs/graph/pattern/harness_int8_all b/tests/benchdnn/inputs/graph/pattern/harness_int8_all index 598e664ff85..62a47032d82 100644 --- a/tests/benchdnn/inputs/graph/pattern/harness_int8_all +++ b/tests/benchdnn/inputs/graph/pattern/harness_int8_all @@ -117,5 +117,8 @@ # layernorm with zp != 0 --reset --op-attrs=2:zps:1 --case=pattern/int8/int8_lnorm_gelu_quantize.json --reset --case=pattern/int8/int8_lnorm_multiply_quantize.json +--reset --case=pattern/int8/int8_lnorm_tc_multiply_quantize.json +# layernorm with zp != 0 and broadcast binary +--reset --op-attrs=3:zps:1 --in-shapes=5:512 --case=pattern/int8/int8_lnorm_tc_multiply_quantize.json #softmax --reset --case=pattern/int8/int8_softmax_add.json diff --git a/tests/benchdnn/inputs/graph/pattern/int8/int8_lnorm_tc_multiply_quantize.json b/tests/benchdnn/inputs/graph/pattern/int8/int8_lnorm_tc_multiply_quantize.json new file mode 100644 index 00000000000..11426ada521 --- /dev/null +++ b/tests/benchdnn/inputs/graph/pattern/int8/int8_lnorm_tc_multiply_quantize.json @@ -0,0 +1,267 @@ +{ + "version": "3.5.0", + "engine_kind": "cpu", + "fpmath_mode": "strict", + "input_ports": [ + 0, + 1, + 2, + 5 + ], + "output_ports": [ + 7 + ], + "graph": [ + { + "id": 0, + "name": "layernorm", + "kind": "LayerNorm", + "attrs": { + "begin_norm_axis": { + "type": "s64", + "value": -1 + }, + "use_affine": { + "type": "bool", + "value": 1 + }, + "keep_stats": { + "type": "bool", + "value": 0 + }, + "epsilon": { + "type": "f32", + "value": 0.0625 + } + }, + "inputs": [ + { + "id": 0, + "dtype": "bf16", + "shape": [ + 1, + 128, + 512 + ], + "stride": [ + 65536, + 512, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 1, + "dtype": "f32", + "shape": [ + 512 + ], + "stride": [ + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 2, + "dtype": "f32", + "shape": [ + 512 + ], + "stride": [ + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 3, + "dtype": "bf16", + "shape": [ + 1, + 128, + 512 + ], + "stride": [ + 65536, + 512, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 1, + "name": "typecast", + "kind": "TypeCast", + "attrs": {}, + "inputs": [ + { + "id": 3, + "dtype": "bf16", + "shape": [ + 1, + 128, + 512 + ], + "stride": [ + 65536, + 512, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 4, + "dtype": "f32", + "shape": [ + 1, + 128, + 512 + ], + "stride": [ + 65536, + 512, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 2, + "name": "multiply", + "kind": "Multiply", + "attrs": { + "auto_broadcast": { + "type": "string", + "value": "numpy" + } + }, + "inputs": [ + { + "id": 4, + "dtype": "f32", + "shape": [ + 1, + 128, + 512 + ], + "stride": [ + 65536, + 512, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 5, + "dtype": "f32", + "shape": [ + 1, + 128, + 512 + ], + "stride": [ + 65536, + 512, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 6, + "dtype": "f32", + "shape": [ + 1, + 128, + 512 + ], + "stride": [ + 65536, + 512, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 3, + "name": "quantize", + "kind": "Quantize", + "attrs": { + "axis": { + "type": "s64", + "value": 0 + }, + "qtype": { + "type": "string", + "value": "per_tensor" + }, + "scales": { + "type": "f32[]", + "value": [ + 0.5 + ] + }, + "zps": { + "type": "s64[]", + "value": [ + 0 + ] + } + }, + "inputs": [ + { + "id": 6, + "dtype": "f32", + "shape": [ + 1, + 128, + 512 + ], + "stride": [ + 65536, + 512, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 7, + "dtype": "s8", + "shape": [ + 1, + 128, + 512 + ], + "stride": [ + 65536, + 512, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + } + ] +}