Skip to content

Commit

Permalink
graph: backend: dnnl: support lnorm + q with zps!=0
Browse files Browse the repository at this point in the history
  • Loading branch information
rongzha1 authored and vpirogov committed Jun 18, 2024
1 parent 4a045e4 commit dc2701a
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 3 deletions.
3 changes: 2 additions & 1 deletion src/graph/backend/dnnl/kernels/layernorm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,9 @@ struct layernorm_fwd_t : public kernel_base_t {

BACKEND_DNNL_ADD_PASS(pipeline, lower_down);
BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_typecast_to_predecessor);
BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_ops);
BACKEND_DNNL_ADD_PASS(pipeline, remove_quant_data_with_no_effect);
BACKEND_DNNL_ADD_PASS(pipeline, replace_quant_data_with_binary_post_op);
BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_ops);
BACKEND_DNNL_ADD_PASS(pipeline, convert_to_runtime_dst_scales);
BACKEND_DNNL_ADD_PASS(pipeline, fuse_dst_scales);
BACKEND_DNNL_ADD_PASS(pipeline, infer_shape);
Expand Down
1 change: 0 additions & 1 deletion src/graph/backend/dnnl/patterns/layernorm_fusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ DNNL_BACKEND_REGISTER_PATTERN_MATCHER_PASS(dnnl, layernorm_post_ops_fusion_cpu)
auto q_graph = std::make_shared<pb_graph_t>();
pm::pb_op_t *pquantize
= q_graph->append_op(graph::op_kind::Quantize);
pquantize->append_decision_function(check_zps_values<0>);
q_graph->create_input_port(0, pquantize, 0);
q_graph->create_output_port(0, pquantize, 0);
pgraph->append_optional(
Expand Down
2 changes: 2 additions & 0 deletions tests/benchdnn/inputs/graph/pattern/harness_int8_all
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@
--reset --in-shapes=0:1x128x150x150*acdb+1:1x128x150x150*acdb+2:1x128x150x150*acdb --op-attrs=3:axis:0 --case=pattern/int8/int8_concat_fusion_3.json
#layernorm
--reset --case=pattern/int8/int8_lnorm_gelu_quantize.json
# layernorm with zp != 0
--reset --op-attrs=2:zps:1 --case=pattern/int8/int8_lnorm_gelu_quantize.json
--reset --case=pattern/int8/int8_lnorm_multiply_quantize.json
#softmax
--reset --case=pattern/int8/int8_softmax_add.json
2 changes: 1 addition & 1 deletion tests/gtests/graph/unit/backend/dnnl/test_pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11926,7 +11926,7 @@ TEST(test_pass_pass_system, FuseLayernormTypecastQuantize_CPU) {
ASSERT_EQ(agraph.get_partitions()[0]->get_outputs()[0].id, 5U);
}

TEST(test_pass_pass_system, NotFuseLayernormTypecast) {
TEST(test_pass_pass_system, NotFuseLayernormTypecast_GPU) {
/*
| (bf16)
layernorm
Expand Down

0 comments on commit dc2701a

Please sign in to comment.