Skip to content

Commit

Permalink
graph: backend: dnnl: fix bug for pattern lnorm + tc + mul + q
Browse files Browse the repository at this point in the history
  • Loading branch information
rongzha1 authored and TaoLv committed Jul 17, 2024
1 parent 2340f5a commit 0013e8c
Show file tree
Hide file tree
Showing 3 changed files with 275 additions and 0 deletions.
5 changes: 5 additions & 0 deletions src/graph/backend/dnnl/kernels/layernorm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,11 @@ struct layernorm_fwd_t : public kernel_base_t {
BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_typecast_to_predecessor);
BACKEND_DNNL_ADD_PASS(pipeline, remove_quant_data_with_no_effect);
BACKEND_DNNL_ADD_PASS(pipeline, replace_quant_data_with_binary_post_op);

// broadcast and swap should be before fuse_post_ops
BACKEND_DNNL_ADD_PASS(pipeline, binary_canonicalization);
BACKEND_DNNL_ADD_PASS(pipeline, binary_broadcast_swap);

BACKEND_DNNL_ADD_PASS(pipeline, fuse_post_ops);
BACKEND_DNNL_ADD_PASS(pipeline, convert_to_runtime_dst_scales);
BACKEND_DNNL_ADD_PASS(pipeline, fuse_dst_scales);
Expand Down
3 changes: 3 additions & 0 deletions tests/benchdnn/inputs/graph/pattern/harness_int8_all
Original file line number Diff line number Diff line change
Expand Up @@ -117,5 +117,8 @@
# layernorm with zp != 0
--reset --op-attrs=2:zps:1 --case=pattern/int8/int8_lnorm_gelu_quantize.json
--reset --case=pattern/int8/int8_lnorm_multiply_quantize.json
--reset --case=pattern/int8/int8_lnorm_tc_multiply_quantize.json
# layernorm with zp != 0 and broadcast binary
--reset --op-attrs=3:zps:1 --in-shapes=5:512 --case=pattern/int8/int8_lnorm_tc_multiply_quantize.json
#softmax
--reset --case=pattern/int8/int8_softmax_add.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
{
"version": "3.5.0",
"engine_kind": "cpu",
"fpmath_mode": "strict",
"input_ports": [
0,
1,
2,
5
],
"output_ports": [
7
],
"graph": [
{
"id": 0,
"name": "layernorm",
"kind": "LayerNorm",
"attrs": {
"begin_norm_axis": {
"type": "s64",
"value": -1
},
"use_affine": {
"type": "bool",
"value": 1
},
"keep_stats": {
"type": "bool",
"value": 0
},
"epsilon": {
"type": "f32",
"value": 0.0625
}
},
"inputs": [
{
"id": 0,
"dtype": "bf16",
"shape": [
1,
128,
512
],
"stride": [
65536,
512,
1
],
"layout_type": "strided",
"property_type": "undef"
},
{
"id": 1,
"dtype": "f32",
"shape": [
512
],
"stride": [
1
],
"layout_type": "strided",
"property_type": "undef"
},
{
"id": 2,
"dtype": "f32",
"shape": [
512
],
"stride": [
1
],
"layout_type": "strided",
"property_type": "undef"
}
],
"outputs": [
{
"id": 3,
"dtype": "bf16",
"shape": [
1,
128,
512
],
"stride": [
65536,
512,
1
],
"layout_type": "strided",
"property_type": "undef"
}
]
},
{
"id": 1,
"name": "typecast",
"kind": "TypeCast",
"attrs": {},
"inputs": [
{
"id": 3,
"dtype": "bf16",
"shape": [
1,
128,
512
],
"stride": [
65536,
512,
1
],
"layout_type": "strided",
"property_type": "undef"
}
],
"outputs": [
{
"id": 4,
"dtype": "f32",
"shape": [
1,
128,
512
],
"stride": [
65536,
512,
1
],
"layout_type": "strided",
"property_type": "undef"
}
]
},
{
"id": 2,
"name": "multiply",
"kind": "Multiply",
"attrs": {
"auto_broadcast": {
"type": "string",
"value": "numpy"
}
},
"inputs": [
{
"id": 4,
"dtype": "f32",
"shape": [
1,
128,
512
],
"stride": [
65536,
512,
1
],
"layout_type": "strided",
"property_type": "undef"
},
{
"id": 5,
"dtype": "f32",
"shape": [
1,
128,
512
],
"stride": [
65536,
512,
1
],
"layout_type": "strided",
"property_type": "undef"
}
],
"outputs": [
{
"id": 6,
"dtype": "f32",
"shape": [
1,
128,
512
],
"stride": [
65536,
512,
1
],
"layout_type": "strided",
"property_type": "undef"
}
]
},
{
"id": 3,
"name": "quantize",
"kind": "Quantize",
"attrs": {
"axis": {
"type": "s64",
"value": 0
},
"qtype": {
"type": "string",
"value": "per_tensor"
},
"scales": {
"type": "f32[]",
"value": [
0.5
]
},
"zps": {
"type": "s64[]",
"value": [
0
]
}
},
"inputs": [
{
"id": 6,
"dtype": "f32",
"shape": [
1,
128,
512
],
"stride": [
65536,
512,
1
],
"layout_type": "strided",
"property_type": "undef"
}
],
"outputs": [
{
"id": 7,
"dtype": "s8",
"shape": [
1,
128,
512
],
"stride": [
65536,
512,
1
],
"layout_type": "strided",
"property_type": "undef"
}
]
}
]
}

0 comments on commit 0013e8c

Please sign in to comment.