api: depthwise post-op with any kernel, stride and padding values

oneapi-src · Jul 6, 2022 · 79b019b · 79b019b
1 parent 2be0060
commit 79b019b
Show file tree

Hide file tree

Showing 20 changed files with 629 additions and 124 deletions.
diff --git a/doc/programming_model/attributes_post_ops.md b/doc/programming_model/attributes_post_ops.md
@@ -164,12 +164,14 @@ convolution.
 The @ref dnnl::primitive::kind of this post-op
 is #dnnl::primitive::kind::convolution.
 
-There are two variants of this post-op: `dw_k3s1p1` and `dw_k3s2p1` for stride-1
-and stride-2 respectively.
+Three variants of depthwise post-op are supported:
+* `dw_k3s1p1` for the case of stride 1, kernel size 3, and left padding of 1.
+* `dw_k3s2p1` for the case of stride 2, kernel size 3, and left padding of 1.
+* `dw` for a general case.
 
 API:
-- C: @ref dnnl_post_ops_append_dw_k3s1p1 , @ref dnnl_post_ops_append_dw_k3s2p1
-- C++: @ref dnnl::post_ops::append_dw_k3s1p1 , @ref dnnl::post_ops::append_dw_k3s2p1
+- C: @ref dnnl_post_ops_append_dw , @ref dnnl_post_ops_append_dw_k3s1p1 , @ref dnnl_post_ops_append_dw_k3s2p1
+- C++: @ref dnnl::post_ops::append_dw , @ref dnnl::post_ops::append_dw_k3s1p1 , @ref dnnl::post_ops::append_dw_k3s2p1
 
 For better readability, below we assume a 2D convolution and use the following
 notations:

diff --git a/include/oneapi/dnnl/dnnl.h b/include/oneapi/dnnl/dnnl.h
@@ -792,6 +792,70 @@ dnnl_status_t DNNL_API dnnl_post_ops_get_params_eltwise(
         const_dnnl_post_ops_t post_ops, int index, float *scale,
         dnnl_alg_kind_t *alg_kind, float *alpha, float *beta);
 
+/// Appends a depthwise post-op convolution.
+///
+/// This post-op can only be fused with a 2D 1x1 convolution (convolution with
+/// weights spatial dimensions equal to 1 i.e., kh=kw=1).
+///
+/// The kind of this post-op is #dnnl_convolution.
+///
+/// The number of outputs for primitive with fusion is one. The output spatial
+/// size can be derived as below:
+///
+/// output_height = ceil(output_height_1x1_convolution, stride)
+/// output_width = ceil(output_width_1x1_convolution, stride)
+///
+/// See @ref dev_guide_attributes_post_ops_depthwise and
+/// @ref dev_guide_attributes_post_ops_depthwise_fusion for more info.
+///
+/// @param post_ops Post-ops.
+/// @param weights_data_type Weights data type of depthwise post-op
+/// @param bias_data_type Bias data type of depthwise post-op
+/// @param dst_data_type Output data type of depthwise post-op
+/// @param kernel_size Size of kernel of depthwise post-op
+/// @param stride_size Size of stride of depthwise post-op
+/// @param padding_l_size Size of left and top paddings of depthwise post-op
+/// @param count Output length of the array of scaling factors @p scales.
+/// @param mask Output scaling factors correspondence mask that defines the
+///     correspondence between the output tensor dimensions and the @p
+///     scales array. The set i-th bit indicates that a dedicated output scaling
+///     factor is used for each index along that dimension. The mask value of 0
+///     implies a common scaling factor for the whole output tensor.
+/// @param scales Output pointer to a constant array of float scaling factors.
+/// @returns #dnnl_success on success and a status describing the error
+///     otherwise
+dnnl_status_t DNNL_API dnnl_post_ops_append_dw(dnnl_post_ops_t post_ops,
+        dnnl_data_type_t weights_data_type, dnnl_data_type_t bias_data_type,
+        dnnl_data_type_t dst_data_type, dnnl_dim_t kernel_size,
+        dnnl_dim_t stride_size, dnnl_dim_t padding_l_size, dnnl_dim_t count,
+        int mask, const float *scales);
+
+/// Returns the parameters of an depthwise post-op.
+///
+/// @param post_ops Post-ops.
+/// @param index Index of the elementwise post-op.
+/// @param weights_data_type Weights data type of depthwise post-op
+/// @param bias_data_type Bias data type of depthwise post-op
+/// @param dst_data_type Output data type of depthwise post-op
+/// @param kernel_size Size of kernel of depthwise post-op
+/// @param stride_size Size of stride of depthwise post-op
+/// @param padding_l_size Size of left and top paddings of depthwise post-op
+/// @param count Output length of the array of scaling factors @p scales.
+/// @param mask Output scaling factors correspondence mask that defines the
+///     correspondence between the output tensor dimensions and the @p
+///     scales array. The set i-th bit indicates that a dedicated output scaling
+///     factor is used for each index along that dimension. The mask value of 0
+///     implies a common scaling factor for the whole output tensor.
+/// @param scales Output pointer to a constant array of float scaling factors.
+/// @returns #dnnl_success on success and a status describing the error
+///     otherwise
+dnnl_status_t DNNL_API dnnl_post_ops_get_params_dw(
+        const_dnnl_post_ops_t post_ops, int index,
+        dnnl_data_type_t *weights_data_type, dnnl_data_type_t *bias_data_type,
+        dnnl_data_type_t *dst_data_type, dnnl_dim_t *kernel_size,
+        dnnl_dim_t *stride_size, dnnl_dim_t *padding_l_size, dnnl_dim_t *count,
+        int *mask, const float **scales);
+
 /// Appends a depthwise post-op convolution with stride 1.
 ///
 /// This post-op can only be fused with a 2D 1x1 convolution (convolution with

diff --git a/include/oneapi/dnnl/dnnl.hpp b/include/oneapi/dnnl/dnnl.hpp
@@ -3081,26 +3081,28 @@ struct post_ops : public handle<dnnl_post_ops_t> {
         aalgorithm = static_cast<dnnl::algorithm>(c_alg);
     }
 
-    /// Appends a depthwise post-op convolution with stride 1.
+    /// Appends a depthwise post-op convolution.
     ///
     /// This post-op can only be fused with a 2D 1x1 convolution (convolution
     /// with weights spatial dimension equal to 1 i.e., kh=kw=1).
     ///
     /// The kind of this post-op is #dnnl_convolution.
     ///
     /// The number of outputs for primitive remain same as before. The output
-    /// size remain same as the original primitive due to stride=1.
-    ///
-    /// The Post-op can be defined as:
+    /// spatial size can be derived as below:
     ///
-    ///      dst[:] <- scales * (conv_dw(conv_1x1))
+    /// output_height = ceil(output_height_1x1_convolution, stride)
+    /// output_width = ceil(output_width_1x1_convolution, stride)
     ///
     /// See @ref dev_guide_attributes_post_ops_depthwise and
     /// @ref dev_guide_attributes_post_ops_depthwise_fusion for more info.
     ///
     /// @param weights_data_type Weights data type of depthwise post-op
     /// @param bias_data_type Bias data type of depthwise post-op
     /// @param dst_data_type Output data type of depthwise post-op
+    /// @param kernel_size Size of kernel of depthwise post-op
+    /// @param stride_size Size of stride of depthwise post-op
+    /// @param padding_l_size Size of left and top paddings of depthwise post-op
     /// @param mask Output scaling factors correspondence mask that defines the
     ///     correspondence between the output tensor dimensions and the
     ///     @p scales array. The set i-th bit indicates that a dedicated output
@@ -3109,14 +3111,17 @@ struct post_ops : public handle<dnnl_post_ops_t> {
     ///     tensor.
     /// @param scales Output pointer to a constant array of float scaling
     ///     factors.
-    void append_dw_k3s1p1(memory::data_type weights_data_type,
+    void append_dw(memory::data_type weights_data_type,
             memory::data_type bias_data_type, memory::data_type dst_data_type,
-            int mask, const std::vector<float> &scales) {
+            memory::dim kernel_size, memory::dim stride_size,
+            memory::dim padding_l_size, int mask,
+            const std::vector<float> &scales) {
 
-        error::wrap_c_api(dnnl_post_ops_append_dw_k3s1p1(get(),
+        error::wrap_c_api(dnnl_post_ops_append_dw(get(),
                                   memory::convert_to_c(weights_data_type),
                                   memory::convert_to_c(bias_data_type),
                                   memory::convert_to_c(dst_data_type),
+                                  kernel_size, stride_size, padding_l_size,
                                   scales.size(), mask, scales.data()),
                 "could not append depthwise post-op");
     }
@@ -3135,24 +3140,34 @@ struct post_ops : public handle<dnnl_post_ops_t> {
     ///     tensor.
     /// @param scales Output pointer to a constant array of float scaling
     ///     factors.
-    void get_params_dw_k3s1p1(int index, memory::data_type &weights_data_type,
+    void get_params_dw(int index, memory::data_type &weights_data_type,
             memory::data_type &bias_data_type, memory::data_type &dst_data_type,
-            int &mask, std::vector<float> &scales) const {
+            memory::dim &kernel_size, memory::dim &stride_size,
+            memory::dim &padding_l_size, int &mask,
+            std::vector<float> &scales) const {
 
         dnnl_data_type_t c_weights_data_type;
         dnnl_data_type_t c_bias_data_type;
         dnnl_data_type_t c_dst_data_type;
+        dnnl_dim_t c_kernel_size;
+        dnnl_dim_t c_stride_size;
+        dnnl_dim_t c_padding_l_size;
         dnnl_dim_t count;
         int c_mask;
         const float *c_scales;
-        error::wrap_c_api(dnnl_post_ops_get_params_dw_k3s1p1(get(), index,
-                                  &c_weights_data_type, &c_bias_data_type,
-                                  &c_dst_data_type, &count, &c_mask, &c_scales),
+        error::wrap_c_api(
+                dnnl_post_ops_get_params_dw(get(), index, &c_weights_data_type,
+                        &c_bias_data_type, &c_dst_data_type, &c_kernel_size,
+                        &c_stride_size, &c_padding_l_size, &count, &c_mask,
+                        &c_scales),
                 "could not get parameters of depthwise post-op");
 
         weights_data_type = static_cast<memory::data_type>(c_weights_data_type);
         bias_data_type = static_cast<memory::data_type>(c_bias_data_type);
         dst_data_type = static_cast<memory::data_type>(c_dst_data_type);
+        kernel_size = c_kernel_size;
+        stride_size = c_stride_size;
+        padding_l_size = c_padding_l_size;
         scales.resize(count);
 
         mask = c_mask;
@@ -3161,6 +3176,67 @@ struct post_ops : public handle<dnnl_post_ops_t> {
         return;
     }
 
+    /// Appends a depthwise post-op convolution with stride 1.
+    ///
+    /// This post-op can only be fused with a 2D 1x1 convolution (convolution
+    /// with weights spatial dimension equal to 1 i.e., kh=kw=1).
+    ///
+    /// The kind of this post-op is #dnnl_convolution.
+    ///
+    /// The number of outputs for primitive remain same as before. The output
+    /// size remain same as the original primitive due to stride=1.
+    ///
+    /// The Post-op can be defined as:
+    ///
+    ///      dst[:] <- scales * (conv_dw(conv_1x1))
+    ///
+    /// See @ref dev_guide_attributes_post_ops_depthwise and
+    /// @ref dev_guide_attributes_post_ops_depthwise_fusion for more info.
+    ///
+    /// @param weights_data_type Weights data type of depthwise post-op
+    /// @param bias_data_type Bias data type of depthwise post-op
+    /// @param dst_data_type Output data type of depthwise post-op
+    /// @param mask Output scaling factors correspondence mask that defines the
+    ///     correspondence between the output tensor dimensions and the
+    ///     @p scales array. The set i-th bit indicates that a dedicated output
+    ///     scaling factor is used for each index along that dimension. The mask
+    ///     value of 0 implies a common scaling factor for the whole output
+    ///     tensor.
+    /// @param scales Output pointer to a constant array of float scaling
+    ///     factors.
+    void append_dw_k3s1p1(memory::data_type weights_data_type,
+            memory::data_type bias_data_type, memory::data_type dst_data_type,
+            int mask, const std::vector<float> &scales) {
+
+        append_dw(weights_data_type, bias_data_type, dst_data_type, 3, 1, 1,
+                mask, scales);
+    }
+
+    /// Returns the parameters of an depthwise post-op with stride 1.
+    ///
+    /// @param index Index of the elementwise post-op.
+    /// @param weights_data_type Weights data type of depthwise post-op
+    /// @param bias_data_type Bias data type of depthwise post-op
+    /// @param dst_data_type Output data type of depthwise post-op
+    /// @param mask Output scaling factors correspondence mask that defines the
+    ///     correspondence between the output tensor dimensions and the
+    ///     @p scales array. The set i-th bit indicates that a dedicated output
+    ///     scaling factor is used for each index along that dimension. The mask
+    ///     value of 0 implies a common scaling factor for the whole output
+    ///     tensor.
+    /// @param scales Output pointer to a constant array of float scaling
+    ///     factors.
+    void get_params_dw_k3s1p1(int index, memory::data_type &weights_data_type,
+            memory::data_type &bias_data_type, memory::data_type &dst_data_type,
+            int &mask, std::vector<float> &scales) const {
+
+        memory::dim kernel_size;
+        memory::dim stride_size;
+        memory::dim padding_l_size;
+        get_params_dw(index, weights_data_type, bias_data_type, dst_data_type,
+                kernel_size, stride_size, padding_l_size, mask, scales);
+    }
+
     /// Appends a depthwise post-op convolution with stride 2.
     ///
     /// This post-op can only be fused with a 2D 1x1 convolution (convolution
@@ -3197,13 +3273,8 @@ struct post_ops : public handle<dnnl_post_ops_t> {
     void append_dw_k3s2p1(memory::data_type weights_data_type,
             memory::data_type bias_data_type, memory::data_type dst_data_type,
             int mask, const std::vector<float> &scales) {
-
-        error::wrap_c_api(dnnl_post_ops_append_dw_k3s2p1(get(),
-                                  memory::convert_to_c(weights_data_type),
-                                  memory::convert_to_c(bias_data_type),
-                                  memory::convert_to_c(dst_data_type),
-                                  scales.size(), mask, scales.data()),
-                "could not append depthwise post-op");
+        append_dw(weights_data_type, bias_data_type, dst_data_type, 3, 2, 1,
+                mask, scales);
     }
 
     /// Returns the parameters of an depthwise post-op with stride 2.
@@ -3224,26 +3295,11 @@ struct post_ops : public handle<dnnl_post_ops_t> {
             memory::data_type &bias_data_type, memory::data_type &dst_data_type,
             int &mask, std::vector<float> &scales) const {
 
-        dnnl_data_type_t c_weights_data_type;
-        dnnl_data_type_t c_bias_data_type;
-        dnnl_data_type_t c_dst_data_type;
-        dnnl_dim_t count;
-        int c_mask;
-        const float *c_scales;
-        error::wrap_c_api(dnnl_post_ops_get_params_dw_k3s2p1(get(), index,
-                                  &c_weights_data_type, &c_bias_data_type,
-                                  &c_dst_data_type, &count, &c_mask, &c_scales),
-                "could not get parameters of depthwise post-op");
-
-        weights_data_type = static_cast<memory::data_type>(c_weights_data_type);
-        bias_data_type = static_cast<memory::data_type>(c_bias_data_type);
-        dst_data_type = static_cast<memory::data_type>(c_dst_data_type);
-        scales.resize(count);
-
-        mask = c_mask;
-        for (dnnl_dim_t c = 0; c < count; ++c)
-            scales[c] = c_scales[c];
-        return;
+        memory::dim kernel_size;
+        memory::dim stride_size;
+        memory::dim padding_l_size;
+        get_params_dw(index, weights_data_type, bias_data_type, dst_data_type,
+                kernel_size, stride_size, padding_l_size, mask, scales);
     }
 
     /// Appends a binary post-op.

diff --git a/scripts/verbose_converter/src/benchdnn_generator.py b/scripts/verbose_converter/src/benchdnn_generator.py
@@ -449,7 +449,8 @@ def convert_binary_post_op(post_op):
 
     def convert_dw_post_op(post_op):
         policy = convert_scale_policy(post_op['scales']['mask'])
-        po = post_op['alg'] + ':' + post_op['dst_dt'] + ':' + policy
+        po = post_op['alg'] + ':' + post_op['ksp'] + ':' + post_op[
+            'dst_dt'] + ':' + policy
         if post_op['scales']['value'] != None:
             po += ':' + post_op['scales']['value']
         return po

diff --git a/scripts/verbose_converter/src/dnnl_parser.py b/scripts/verbose_converter/src/dnnl_parser.py
@@ -131,6 +131,7 @@ def convert_binary_post_op(value):
                     def convert_dw_post_op(value):
                         p_op = {
                             'alg': '',
+                            'ksp': '',
                             'dst_dt': 'f32',
                             'wei_dt': 'f32',
                             'scales': {
@@ -141,13 +142,14 @@ def convert_dw_post_op(value):
                         params = value.split(':')
                         len_params = len(params)
                         p_op['alg'] = params[0]
-                        if len_params > 1:
-                            p_op['dst_dt'] = params[1]
+                        p_op['ksp'] = params[1]
                         if len_params > 2:
-                            p_op['wei_dt'] = 's8'
-                            p_op['scales']['mask'] = params[2]
+                            p_op['dst_dt'] = params[2]
                         if len_params > 3:
-                            p_op['scales']['value'] = params[3]
+                            p_op['wei_dt'] = 's8'
+                            p_op['scales']['mask'] = params[3]
+                        if len_params > 4:
+                            p_op['scales']['value'] = params[4]
                         return p_op
 
                     def convert_eltwise_post_op(value):