graph: backend: dnnl: support lnorm + q with zps!=0

oneapi-src · Jun 18, 2024 · dc2701a · dc2701a
1 parent 4a045e4
commit dc2701a
Show file tree

Hide file tree

Showing 4 changed files with 5 additions and 3 deletions.
diff --git a/src/graph/backend/dnnl/kernels/layernorm.hpp b/src/graph/backend/dnnl/kernels/layernorm.hpp
@@ -89,8 +89,9 @@ struct layernorm_fwd_t : public kernel_base_t {
 
         BACKEND_DNNL_ADD_PASS(pipeline, lower_down);
         BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_typecast_to_predecessor);
-        BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_ops);
         BACKEND_DNNL_ADD_PASS(pipeline, remove_quant_data_with_no_effect);
+        BACKEND_DNNL_ADD_PASS(pipeline, replace_quant_data_with_binary_post_op);
+        BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_ops);
         BACKEND_DNNL_ADD_PASS(pipeline, convert_to_runtime_dst_scales);
         BACKEND_DNNL_ADD_PASS(pipeline, fuse_dst_scales);
         BACKEND_DNNL_ADD_PASS(pipeline, infer_shape);

diff --git a/src/graph/backend/dnnl/patterns/layernorm_fusion.cpp b/src/graph/backend/dnnl/patterns/layernorm_fusion.cpp
@@ -79,7 +79,6 @@ DNNL_BACKEND_REGISTER_PATTERN_MATCHER_PASS(dnnl, layernorm_post_ops_fusion_cpu)
                     auto q_graph = std::make_shared<pb_graph_t>();
                     pm::pb_op_t *pquantize
                             = q_graph->append_op(graph::op_kind::Quantize);
-                    pquantize->append_decision_function(check_zps_values<0>);
                     q_graph->create_input_port(0, pquantize, 0);
                     q_graph->create_output_port(0, pquantize, 0);
                     pgraph->append_optional(

diff --git a/tests/benchdnn/inputs/graph/pattern/harness_int8_all b/tests/benchdnn/inputs/graph/pattern/harness_int8_all
@@ -114,6 +114,8 @@
 --reset --in-shapes=0:1x128x150x150*acdb+1:1x128x150x150*acdb+2:1x128x150x150*acdb --op-attrs=3:axis:0 --case=pattern/int8/int8_concat_fusion_3.json
 #layernorm
 --reset --case=pattern/int8/int8_lnorm_gelu_quantize.json
+# layernorm with zp != 0
+--reset --op-attrs=2:zps:1 --case=pattern/int8/int8_lnorm_gelu_quantize.json
 --reset --case=pattern/int8/int8_lnorm_multiply_quantize.json
 #softmax
 --reset --case=pattern/int8/int8_softmax_add.json
diff --git a/tests/gtests/graph/unit/backend/dnnl/test_pass.cpp b/tests/gtests/graph/unit/backend/dnnl/test_pass.cpp
@@ -11926,7 +11926,7 @@ TEST(test_pass_pass_system, FuseLayernormTypecastQuantize_CPU) {
     ASSERT_EQ(agraph.get_partitions()[0]->get_outputs()[0].id, 5U);
 }
 
-TEST(test_pass_pass_system, NotFuseLayernormTypecast) {
+TEST(test_pass_pass_system, NotFuseLayernormTypecast_GPU) {
     /*
              | (bf16)
            layernorm