From f3389f51cd83d351fdbd76aaaae1515ea4911206 Mon Sep 17 00:00:00 2001
From: Rohit Kumar Srivastava <srivastava.141@buckeyemail.osu.edu>
Date: Fri, 6 Jul 2018 17:55:31 +0000
Subject: [PATCH 1/7] [MXNET-378] Adding depth_to_space and space_to_depth
 operator

---
 src/operator/tensor/matrix_op-inl.h    | 274 +++++++++++++++++++++++++
 src/operator/tensor/matrix_op.cc       |  91 ++++++++
 src/operator/tensor/matrix_op.cu       |   6 +
 tests/python/unittest/test_operator.py |  34 +++
 4 files changed, 405 insertions(+)
diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index dcdf03a5316e..0f8d5407dbec 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -2171,6 +2171,280 @@ inline bool SqueezeShape(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
+
+struct DepthToSpaceParam : public dmlc::Parameter<DepthToSpaceParam> {
+  int blockSize;
+  DMLC_DECLARE_PARAMETER(DepthToSpaceParam) {
+    DMLC_DECLARE_FIELD(blockSize)
+      .describe("The size of chunks that need to be taken from depth and spread across to the shape dimension of the tensor");
+  }
+};
+
+inline bool DepthToSpaceOpShape(const nnvm::NodeAttrs& attrs,
+                             std::vector<TShape>* in_attrs,
+                             std::vector<TShape>* out_attrs) {
+  const DepthToSpaceParam& param = nnvm::get<DepthToSpaceParam>(attrs.parsed);
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+  CHECK_EQ(in_attrs->at(0).ndim(), 4) << "Operation Depth To Space requires exactly 4D tensor";
+
+  TShape expected_out(in_attrs->at(0).ndim());
+
+  TShape& in_shape = in_attrs->at(0);
+  int block = param.blockSize;
+  CHECK_NE(in_shape[1], 0)
+      << "Depth dimension:1 cannot be 0";
+  CHECK_EQ(in_shape[1]%(block*block), 0)
+    << "Cannot perform Depth To Space operation on the specified tensor. Dimension:1(depth dimension) should be a multiple of 'block^2' ";
+  CHECK_NE(in_shape[0], 0)
+      << "Operation requires a 4D tensor. Size of dimension:0 cannot be 0";
+  CHECK_NE(in_shape[2], 0)
+      << "Operation requires a 4D tensor. Size of dimension:2 cannot be 0";
+  CHECK_NE(in_shape[3], 0)
+      << "Operation requires a 4D tensor. Size of dimension:3 cannot be 0";
+
+  expected_out[0]=in_shape[0];
+  expected_out[1]=in_shape[1]/(block*block);
+  uint16_t i = 2;
+  while (i < expected_out.ndim()) {
+	expected_out[i] = in_shape[i] * block;
+	++i;
+  }
+
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, expected_out);
+  return true;
+}
+
+inline bool DepthToSpaceOpType(const nnvm::NodeAttrs& attrs,
+                            std::vector<int>* in_attrs,
+                            std::vector<int>* out_attrs) {
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+
+  TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
+  TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0));
+  return out_attrs->at(0) != -1;
+}
+
+#define UPDATE_INDEX_USING_OFFSET(X) \
+    next_idx_val = idx/dim_size; \
+    inp_index += (idx - (next_idx_val) * dim_size) * offset_arr[X]; \
+    idx = next_idx_val;
+
+template<int req>
+struct depth_to_space_forward {
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data,
+                                  const int block, const int* size, const int* __restrict__ offset_arr) {
+
+    int inp_index = 0, idx = i, next_idx_val, dim_size;
+    dim_size = block;
+    UPDATE_INDEX_USING_OFFSET(2)
+    dim_size = size[3];
+    UPDATE_INDEX_USING_OFFSET(5)
+    dim_size = block;
+    UPDATE_INDEX_USING_OFFSET(1)
+    dim_size = size[2];
+    UPDATE_INDEX_USING_OFFSET(4)
+    dim_size = size[1]/block/block;
+    UPDATE_INDEX_USING_OFFSET(3)
+    dim_size = size[0];
+    UPDATE_INDEX_USING_OFFSET(0)
+    KERNEL_ASSIGN(out_data[i], req, in_data[inp_index]);
+  }
+};
+
+template<int req>
+struct compute_offset_for_depth_to_space {
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(int i, DType* offset_arr, DType* size, const int block,
+                                    const int32_t size0, const int32_t size1, const int32_t size2, const int32_t size3) {
+    size[0] = size0;
+    size[1] = size1;
+    size[2] = size2;
+    size[3] = size3;
+
+    offset_arr[5] = 1;
+    offset_arr[4] = offset_arr[5] * size[3];
+    offset_arr[3] = offset_arr[4] * size[2];
+    offset_arr[2] = offset_arr[3] * size[1]/block/block;
+    offset_arr[1] = offset_arr[2] * block;
+    offset_arr[0] = offset_arr[1] * block;
+  }
+};
+
+template<typename xpu>
+void DepthToSpaceOpForward(const nnvm::NodeAttrs& attrs,
+                        const OpContext& ctx,
+                        const std::vector<TBlob>& inputs,
+                        const std::vector<OpReqType>& req,
+                        const std::vector<TBlob>& outputs) {
+  CHECK_EQ(inputs.size(), 1U);
+  CHECK_EQ(outputs.size(), 1U);
+  CHECK_EQ(req.size(), 1U);
+  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
+  const TBlob& in_data = inputs[0];
+  const TBlob& out_data = outputs[0];
+  const DepthToSpaceParam& param = nnvm::get<DepthToSpaceParam>(attrs.parsed);
+  using namespace mxnet_op;
+  int block = param.blockSize;
+
+  mshadow::Tensor<xpu, 1, char> workspace = ctx.requested[0].get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(int32_t)*10), s);
+  char* workspace_curr_ptr = workspace.dptr_;
+  int32_t* offset_arr = reinterpret_cast<int32_t*>(workspace_curr_ptr);
+  int32_t* size = reinterpret_cast<int32_t*>(workspace_curr_ptr + sizeof(int32_t)*6);
+
+  MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, {
+      MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
+        Kernel<compute_offset_for_depth_to_space<req_type>, xpu>::Launch(
+            s, 1, offset_arr, size, block, in_data.shape_[0], in_data.shape_[1], in_data.shape_[2], in_data.shape_[3]);
+
+        Kernel<depth_to_space_forward<req_type>, xpu>::Launch(
+            s, out_data.Size(), out_data.dptr<DType>(), in_data.dptr<DType>(),
+            block, size, offset_arr);
+
+      });
+  });
+
+}
+
+struct SpaceToDepthParam : public dmlc::Parameter<SpaceToDepthParam> {
+  int blockSize;
+  DMLC_DECLARE_PARAMETER(SpaceToDepthParam) {
+    DMLC_DECLARE_FIELD(blockSize)
+      .describe("The size of chunks that need to be taken from space and combined to depth dimension of the tensor");
+  }
+};
+
+inline bool SpaceToDepthOpShape(const nnvm::NodeAttrs& attrs,
+                             std::vector<TShape>* in_attrs,
+                             std::vector<TShape>* out_attrs) {
+  const SpaceToDepthParam& param = nnvm::get<SpaceToDepthParam>(attrs.parsed);
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+  CHECK_EQ(in_attrs->at(0).ndim(), 4) << "Operation Space To Depth requires exactly 4D tensor";
+
+  TShape expected_out(in_attrs->at(0).ndim());
+
+  TShape& in_shape = in_attrs->at(0);
+  int block = param.blockSize;
+  CHECK_NE(in_shape[0], 0)
+      << "Operation requires a 4D tensor. Size of dimension:0 cannot be 0";
+  CHECK_NE(in_shape[1], 0)
+      << "Depth dimension:1 cannot be 0";
+  CHECK_NE(in_shape[2], 0)
+      << "Operation requires a 4D tensor. Size of dimension:2 cannot be 0";
+  CHECK_EQ(in_shape[2]%block, 0)
+      << "Cannot perform Depth To Space operation on the specified tensor. Dimension:2(1st Space dimension) should be a multiple of 'block' ";
+  CHECK_NE(in_shape[3], 0)
+      << "Operation requires a 4D tensor. Size of dimension:3 cannot be 0";
+  CHECK_EQ(in_shape[3]%block, 0)
+      << "Cannot perform Depth To Space operation on the specified tensor. Dimension:3(2nd space dimension) should be a multiple of 'block' ";
+
+  expected_out[0]=in_shape[0];
+  expected_out[1]=in_shape[1]*(block*block);
+  uint16_t i = 2;
+  while (i < expected_out.ndim()) {
+  expected_out[i] = in_shape[i] / block;
+  ++i;
+  }
+
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, expected_out);
+  return true;
+}
+
+inline bool SpaceToDepthOpType(const nnvm::NodeAttrs& attrs,
+                            std::vector<int>* in_attrs,
+                            std::vector<int>* out_attrs) {
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+
+  TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
+  TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0));
+  return out_attrs->at(0) != -1;
+}
+
+template<int req>
+struct space_to_depth_forward {
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data,
+                                  const int block, const int* size, const int* __restrict__ offset_arr) {
+
+    int inp_index = 0, idx = i, next_idx_val, dim_size;
+    dim_size = size[3]/block;
+    UPDATE_INDEX_USING_OFFSET(4)
+    dim_size = size[2]/block;
+    UPDATE_INDEX_USING_OFFSET(2)
+    dim_size = size[1];
+    UPDATE_INDEX_USING_OFFSET(1)
+    dim_size = block;
+    UPDATE_INDEX_USING_OFFSET(5)
+    dim_size = block;
+    UPDATE_INDEX_USING_OFFSET(3)
+    dim_size = size[0];
+    UPDATE_INDEX_USING_OFFSET(0)
+    KERNEL_ASSIGN(out_data[i], req, in_data[inp_index]);
+  }
+};
+
+#ifdef UPDATE_INDEX_USING_OFFSET
+#undef UPDATE_INDEX_USING_OFFSET
+#endif
+
+template<int req>
+struct compute_offset_for_space_to_depth {
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(int i, DType* offset_arr, DType* size, const int block,
+                                    const int32_t size0, const int32_t size1, const int32_t size2, const int32_t size3) {
+    size[0] = size0;
+    size[1] = size1;
+    size[2] = size2;
+    size[3] = size3;
+
+    offset_arr[5] = 1;
+    offset_arr[4] = offset_arr[5] * block;
+    offset_arr[3] = offset_arr[4] * size[3]/block;
+    offset_arr[2] = offset_arr[3] * block;
+    offset_arr[1] = offset_arr[2] * size[2]/block;
+    offset_arr[0] = offset_arr[1] * size[1];
+  }
+};
+
+template<typename xpu>
+void SpaceToDepthOpForward(const nnvm::NodeAttrs& attrs,
+                        const OpContext& ctx,
+                        const std::vector<TBlob>& inputs,
+                        const std::vector<OpReqType>& req,
+                        const std::vector<TBlob>& outputs) {
+  CHECK_EQ(inputs.size(), 1U);
+  CHECK_EQ(outputs.size(), 1U);
+  CHECK_EQ(req.size(), 1U);
+  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
+  const TBlob& in_data = inputs[0];
+  const TBlob& out_data = outputs[0];
+  const SpaceToDepthParam& param = nnvm::get<SpaceToDepthParam>(attrs.parsed);
+  using namespace mxnet_op;
+  int block = param.blockSize;
+
+  mshadow::Tensor<xpu, 1, char> workspace = ctx.requested[0].get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(int32_t)*10), s);
+  char* workspace_curr_ptr = workspace.dptr_;
+  int32_t* offset_arr = reinterpret_cast<int32_t*>(workspace_curr_ptr);
+  int32_t* size = reinterpret_cast<int32_t*>(workspace_curr_ptr + sizeof(int32_t)*6);
+
+  MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, {
+      MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
+        Kernel<compute_offset_for_space_to_depth<req_type>, xpu>::Launch(
+            s, 1, offset_arr, size, block, in_data.shape_[0], in_data.shape_[1], in_data.shape_[2], in_data.shape_[3]);
+
+        Kernel<space_to_depth_forward<req_type>, xpu>::Launch(
+            s, out_data.Size(), out_data.dptr<DType>(), in_data.dptr<DType>(),
+            block, size, offset_arr);
+
+      });
+  });
+
+}
+
 }  // namespace op
 }  // namespace mxnet
 
diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc
index 29d493ae5a54..cacc5b2ffbe0 100644
--- a/src/operator/tensor/matrix_op.cc
+++ b/src/operator/tensor/matrix_op.cc
@@ -101,6 +101,8 @@ DMLC_REGISTER_PARAMETER(TileParam);
 DMLC_REGISTER_PARAMETER(ReverseParam);
 DMLC_REGISTER_PARAMETER(StackParam);
 DMLC_REGISTER_PARAMETER(SqueezeParam);
+DMLC_REGISTER_PARAMETER(DepthToSpaceParam);
+DMLC_REGISTER_PARAMETER(SpaceToDepthParam);
 
 NNVM_REGISTER_OP(Reshape)
 .add_alias("reshape")
@@ -908,5 +910,94 @@ NNVM_REGISTER_OP(_backward_squeeze)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
 .set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>);
 
+NNVM_REGISTER_OP(depth_to_space)
+.describe(R"code(This operators implements the depthToSpace function:
+.. math::
+    f(x, block) = ~x
+where :math:`x` is an input tensor of shape [N,C,H,W] and `~x` is the output tensor of shape [N,C/block^2,H*block,W*block]
+Example::
+#(1,4,2,3) input tensor
+  x = [[[[0, 1, 2],
+		[3, 4, 5]],
+	   [[6, 7, 8],
+		[9, 10, 11]],
+	   [[12, 13, 14],
+		[15, 16, 17]],
+	   [[18, 19, 20],
+		[21, 22, 23]]]]
+  
+  y = depth_to_space(x, 2)
+
+# (1, 1, 4, 6) output tensor
+  y = [[[[0, 6, 1, 7, 2, 8],
+		[12, 18, 13, 19, 14, 20],
+		[3, 9, 4, 10, 5, 11],
+		[15, 21, 16, 22, 17, 23]]]]
+)code" ADD_FILELINE)
+.set_attr_parser(ParamParser<DepthToSpaceParam>)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr<nnvm::FListInputNames>("FListInputNames",
+  [](const NodeAttrs& attrs) {
+    return std::vector<std::string>{"data"};
+  })
+.set_attr<nnvm::FInferShape>("FInferShape", DepthToSpaceOpShape)
+.set_attr<nnvm::FInferType>("FInferType", DepthToSpaceOpType)
+.set_attr<FCompute>("FCompute<cpu>", DepthToSpaceOpForward<cpu>)
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
+  return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
+})
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+  [](const NodeAttrs& attrs) {
+    return std::vector<std::pair<int, int> >{{0, 0}};
+  })
+.add_argument("data", "NDArray-or-Symbol", "Input ndarray")
+.add_arguments(DepthToSpaceParam::__FIELDS__());
+
+NNVM_REGISTER_OP(space_to_depth)
+.describe(R"code(This operators implements the spacetodepth function:
+.. math::
+    f(x, block) = ~x
+where :math:`x` is an input tensor of shape [N,C,H,W] and `~x` is the output tensor of shape [N,C/block^2,H*block,W*block]
+Example::
+#(1,4,2,3) input tensor
+  x = [[[[0, 6, 1, 7, 2, 8],
+    [12, 18, 13, 19, 14, 20],
+    [3, 9, 4, 10, 5, 11],
+    [15, 21, 16, 22, 17, 23]]]]
+  
+  
+  y = space_to_depth(x, 2)
+
+# (1, 1, 4, 6) output tensor
+  y = [[[[0, 1, 2],
+    [3, 4, 5]],
+     [[6, 7, 8],
+    [9, 10, 11]],
+     [[12, 13, 14],
+    [15, 16, 17]],
+     [[18, 19, 20],
+    [21, 22, 23]]]]
+)code" ADD_FILELINE)
+.set_attr_parser(ParamParser<SpaceToDepthParam>)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr<nnvm::FListInputNames>("FListInputNames",
+  [](const NodeAttrs& attrs) {
+    return std::vector<std::string>{"data"};
+  })
+.set_attr<nnvm::FInferShape>("FInferShape", SpaceToDepthOpShape)
+.set_attr<nnvm::FInferType>("FInferType", SpaceToDepthOpType)
+.set_attr<FCompute>("FCompute<cpu>", SpaceToDepthOpForward<cpu>)
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
+  return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
+})
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+  [](const NodeAttrs& attrs) {
+    return std::vector<std::pair<int, int> >{{0, 0}};
+  })
+.add_argument("data", "NDArray-or-Symbol", "Input ndarray")
+.add_arguments(SpaceToDepthParam::__FIELDS__());
+
 }  // namespace op
 }  // namespace mxnet
diff --git a/src/operator/tensor/matrix_op.cu b/src/operator/tensor/matrix_op.cu
index bd1b9f208256..4e31a4cf1155 100644
--- a/src/operator/tensor/matrix_op.cu
+++ b/src/operator/tensor/matrix_op.cu
@@ -211,5 +211,11 @@ NNVM_REGISTER_OP(squeeze)
 NNVM_REGISTER_OP(_backward_squeeze)
 .set_attr<FCompute>("FCompute<gpu>", UnaryOp::IdentityCompute<gpu>);
 
+NNVM_REGISTER_OP(depth_to_space)
+.set_attr<FCompute>("FCompute<gpu>", DepthToSpaceOpForward<gpu>);
+
+NNVM_REGISTER_OP(space_to_depth)
+.set_attr<FCompute>("FCompute<gpu>", SpaceToDepthOpForward<gpu>);
+
 }  // namespace op
 }  // namespace mxnet
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 11180ebbc5d4..6bef78e26fae 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -6678,6 +6678,40 @@ def test_diag():
     diag_sym = mx.sym.diag(data=data, k=-1)
     check_numeric_gradient(diag_sym, [a_np])
 
+def test_depthtospace():
+    def f(x, blocksize):
+        b, c, h, w = x.shape[0], x.shape[1], x.shape[2], x.shape[3]
+        tmp = np.reshape(x, [b, blocksize, blocksize, c // (blocksize**2), h, w])
+        tmp = np.transpose(tmp, [0, 3, 4, 1, 5, 2])
+        y = np.reshape(tmp, [b, c // (blocksize**2), h * blocksize, w * blocksize])
+        return y
+
+
+    shape = (1,4,2,3)
+    block = 2
+    data = rand_ndarray(shape, 'default')
+    data_np = data.asnumpy()
+    expected = f(data_np, block)
+    output = mx.nd.depth_to_space(data, block)
+    assert_almost_equal(output.asnumpy(), expected)
+
+@with_seed()
+def test_spacetodepth():
+    def f(x, blocksize):
+        b, c, h, w = x.shape[0], x.shape[1], x.shape[2], x.shape[3]
+        tmp = np.reshape(x, [b, c, h // blocksize, blocksize, w // blocksize, blocksize])
+        tmp = np.transpose(tmp, [0, 3, 5, 1, 2, 4])
+        y = np.reshape(tmp, [b, c * (blocksize**2), h // blocksize, w // blocksize])
+        return y
+
+
+    shape = (1,1,2,6)
+    block = 2
+    data = rand_ndarray(shape, 'default')
+    data_np = data.asnumpy()
+    expected = f(data_np, block)
+    output = mx.nd.space_to_depth(data, block)
+    assert_almost_equal(output.asnumpy(), expected)
 
 if __name__ == '__main__':
     import nose

From 0a48469d93948d4c7c54b48d568fe520aa500b7f Mon Sep 17 00:00:00 2001
From: Rohit Kumar Srivastava <srivastava.141@osu.edu>
Date: Fri, 6 Jul 2018 20:37:42 +0000
Subject: [PATCH 2/7] fixed lint and windows CPU errors

---
 src/operator/tensor/matrix_op-inl.h | 55 ++++++++++++++++-------------
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index 0f8d5407dbec..2617d70ea5bf 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -2176,7 +2176,8 @@ struct DepthToSpaceParam : public dmlc::Parameter<DepthToSpaceParam> {
   int blockSize;
   DMLC_DECLARE_PARAMETER(DepthToSpaceParam) {
     DMLC_DECLARE_FIELD(blockSize)
-      .describe("The size of chunks that need to be taken from depth and spread across to the shape dimension of the tensor");
+      .describe("The size of chunks that need to be taken from depth and spread across to the "
+          "shape dimension of the tensor");
   }
 };
 
@@ -2195,7 +2196,8 @@ inline bool DepthToSpaceOpShape(const nnvm::NodeAttrs& attrs,
   CHECK_NE(in_shape[1], 0)
       << "Depth dimension:1 cannot be 0";
   CHECK_EQ(in_shape[1]%(block*block), 0)
-    << "Cannot perform Depth To Space operation on the specified tensor. Dimension:1(depth dimension) should be a multiple of 'block^2' ";
+    << "Cannot perform Depth To Space operation on the specified tensor. "
+        "Dimension:1(depth dimension) should be a multiple of 'block^2' ";
   CHECK_NE(in_shape[0], 0)
       << "Operation requires a 4D tensor. Size of dimension:0 cannot be 0";
   CHECK_NE(in_shape[2], 0)
@@ -2203,12 +2205,12 @@ inline bool DepthToSpaceOpShape(const nnvm::NodeAttrs& attrs,
   CHECK_NE(in_shape[3], 0)
       << "Operation requires a 4D tensor. Size of dimension:3 cannot be 0";
 
-  expected_out[0]=in_shape[0];
-  expected_out[1]=in_shape[1]/(block*block);
+  expected_out[0] = in_shape[0];
+  expected_out[1] = in_shape[1]/(block*block);
   uint16_t i = 2;
   while (i < expected_out.ndim()) {
-	expected_out[i] = in_shape[i] * block;
-	++i;
+  expected_out[i] = in_shape[i] * block;
+  ++i;
   }
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, expected_out);
@@ -2235,8 +2237,7 @@ template<int req>
 struct depth_to_space_forward {
   template<typename DType>
   MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data,
-                                  const int block, const int* size, const int* __restrict__ offset_arr) {
-
+      const int block, const int* size, const int* offset_arr) {
     int inp_index = 0, idx = i, next_idx_val, dim_size;
     dim_size = block;
     UPDATE_INDEX_USING_OFFSET(2)
@@ -2258,7 +2259,7 @@ template<int req>
 struct compute_offset_for_depth_to_space {
   template<typename DType>
   MSHADOW_XINLINE static void Map(int i, DType* offset_arr, DType* size, const int block,
-                                    const int32_t size0, const int32_t size1, const int32_t size2, const int32_t size3) {
+      const int32_t size0, const int32_t size1, const int32_t size2, const int32_t size3) {
     size[0] = size0;
     size[1] = size1;
     size[2] = size2;
@@ -2289,7 +2290,8 @@ void DepthToSpaceOpForward(const nnvm::NodeAttrs& attrs,
   using namespace mxnet_op;
   int block = param.blockSize;
 
-  mshadow::Tensor<xpu, 1, char> workspace = ctx.requested[0].get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(int32_t)*10), s);
+  mshadow::Tensor<xpu, 1, char> workspace = ctx.requested[0]
+      .get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(int32_t)*10), s);
   char* workspace_curr_ptr = workspace.dptr_;
   int32_t* offset_arr = reinterpret_cast<int32_t*>(workspace_curr_ptr);
   int32_t* size = reinterpret_cast<int32_t*>(workspace_curr_ptr + sizeof(int32_t)*6);
@@ -2297,22 +2299,22 @@ void DepthToSpaceOpForward(const nnvm::NodeAttrs& attrs,
   MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, {
       MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
         Kernel<compute_offset_for_depth_to_space<req_type>, xpu>::Launch(
-            s, 1, offset_arr, size, block, in_data.shape_[0], in_data.shape_[1], in_data.shape_[2], in_data.shape_[3]);
+            s, 1, offset_arr, size, block, in_data.shape_[0], in_data.shape_[1],
+            in_data.shape_[2], in_data.shape_[3]);
 
         Kernel<depth_to_space_forward<req_type>, xpu>::Launch(
             s, out_data.Size(), out_data.dptr<DType>(), in_data.dptr<DType>(),
             block, size, offset_arr);
-
       });
   });
-
 }
 
 struct SpaceToDepthParam : public dmlc::Parameter<SpaceToDepthParam> {
   int blockSize;
   DMLC_DECLARE_PARAMETER(SpaceToDepthParam) {
     DMLC_DECLARE_FIELD(blockSize)
-      .describe("The size of chunks that need to be taken from space and combined to depth dimension of the tensor");
+      .describe("The size of chunks that need to be taken from space and combined to depth "
+          "dimension of the tensor");
   }
 };
 
@@ -2335,14 +2337,16 @@ inline bool SpaceToDepthOpShape(const nnvm::NodeAttrs& attrs,
   CHECK_NE(in_shape[2], 0)
       << "Operation requires a 4D tensor. Size of dimension:2 cannot be 0";
   CHECK_EQ(in_shape[2]%block, 0)
-      << "Cannot perform Depth To Space operation on the specified tensor. Dimension:2(1st Space dimension) should be a multiple of 'block' ";
+      << "Cannot perform Depth To Space operation on the specified tensor. "
+          "Dimension:2(1st Space dimension) should be a multiple of 'block' ";
   CHECK_NE(in_shape[3], 0)
       << "Operation requires a 4D tensor. Size of dimension:3 cannot be 0";
   CHECK_EQ(in_shape[3]%block, 0)
-      << "Cannot perform Depth To Space operation on the specified tensor. Dimension:3(2nd space dimension) should be a multiple of 'block' ";
+      << "Cannot perform Depth To Space operation on the specified tensor. "
+          "Dimension:3(2nd space dimension) should be a multiple of 'block' ";
 
-  expected_out[0]=in_shape[0];
-  expected_out[1]=in_shape[1]*(block*block);
+  expected_out[0] = in_shape[0];
+  expected_out[1] = in_shape[1]*(block*block);
   uint16_t i = 2;
   while (i < expected_out.ndim()) {
   expected_out[i] = in_shape[i] / block;
@@ -2368,8 +2372,8 @@ template<int req>
 struct space_to_depth_forward {
   template<typename DType>
   MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data,
-                                  const int block, const int* size, const int* __restrict__ offset_arr) {
-
+                                  const int block, const int* size,
+                                  const int* offset_arr) {
     int inp_index = 0, idx = i, next_idx_val, dim_size;
     dim_size = size[3]/block;
     UPDATE_INDEX_USING_OFFSET(4)
@@ -2395,7 +2399,8 @@ template<int req>
 struct compute_offset_for_space_to_depth {
   template<typename DType>
   MSHADOW_XINLINE static void Map(int i, DType* offset_arr, DType* size, const int block,
-                                    const int32_t size0, const int32_t size1, const int32_t size2, const int32_t size3) {
+                                    const int32_t size0, const int32_t size1,
+                                    const int32_t size2, const int32_t size3) {
     size[0] = size0;
     size[1] = size1;
     size[2] = size2;
@@ -2426,7 +2431,8 @@ void SpaceToDepthOpForward(const nnvm::NodeAttrs& attrs,
   using namespace mxnet_op;
   int block = param.blockSize;
 
-  mshadow::Tensor<xpu, 1, char> workspace = ctx.requested[0].get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(int32_t)*10), s);
+  mshadow::Tensor<xpu, 1, char> workspace = ctx.requested[0]
+      .get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(int32_t)*10), s);
   char* workspace_curr_ptr = workspace.dptr_;
   int32_t* offset_arr = reinterpret_cast<int32_t*>(workspace_curr_ptr);
   int32_t* size = reinterpret_cast<int32_t*>(workspace_curr_ptr + sizeof(int32_t)*6);
@@ -2434,15 +2440,14 @@ void SpaceToDepthOpForward(const nnvm::NodeAttrs& attrs,
   MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, {
       MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
         Kernel<compute_offset_for_space_to_depth<req_type>, xpu>::Launch(
-            s, 1, offset_arr, size, block, in_data.shape_[0], in_data.shape_[1], in_data.shape_[2], in_data.shape_[3]);
+            s, 1, offset_arr, size, block, in_data.shape_[0], in_data.shape_[1],
+            in_data.shape_[2], in_data.shape_[3]);
 
         Kernel<space_to_depth_forward<req_type>, xpu>::Launch(
             s, out_data.Size(), out_data.dptr<DType>(), in_data.dptr<DType>(),
             block, size, offset_arr);
-
       });
   });
-
 }
 
 }  // namespace op

From 21b19c0fdfc8dac72272e0f8e42103109009c7bc Mon Sep 17 00:00:00 2001
From: Rohit Kumar Srivastava <srivastava.141@osu.edu>
Date: Tue, 10 Jul 2018 22:25:18 +0000
Subject: [PATCH 3/7] compliance with C++ style guiide and address shortcomings
 in unittests

---
 src/operator/tensor/matrix_op-inl.h    | 168 ++++++++++++-------------
 src/operator/tensor/matrix_op.cc       |  31 ++---
 tests/python/unittest/test_operator.py |  27 +++-
 3 files changed, 115 insertions(+), 111 deletions(-)

diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index 2617d70ea5bf..9c5d56f802f4 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -2171,46 +2171,44 @@ inline bool SqueezeShape(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
-
 struct DepthToSpaceParam : public dmlc::Parameter<DepthToSpaceParam> {
   int blockSize;
   DMLC_DECLARE_PARAMETER(DepthToSpaceParam) {
     DMLC_DECLARE_FIELD(blockSize)
-      .describe("The size of chunks that need to be taken from depth and spread across to the "
-          "shape dimension of the tensor");
+      .describe("The size of chunks that need to be taken from depth and spread across to the"
+                " shape dimension of the tensor and vice versa");
   }
 };
 
 inline bool DepthToSpaceOpShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape>* in_attrs,
-                             std::vector<TShape>* out_attrs) {
+                                std::vector<TShape>* in_attrs,
+                                std::vector<TShape>* out_attrs) {
   const DepthToSpaceParam& param = nnvm::get<DepthToSpaceParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   CHECK_EQ(in_attrs->at(0).ndim(), 4) << "Operation Depth To Space requires exactly 4D tensor";
 
-  TShape expected_out(in_attrs->at(0).ndim());
+  TShape expected_out(4);
 
   TShape& in_shape = in_attrs->at(0);
   int block = param.blockSize;
-  CHECK_NE(in_shape[1], 0)
-      << "Depth dimension:1 cannot be 0";
-  CHECK_EQ(in_shape[1]%(block*block), 0)
-    << "Cannot perform Depth To Space operation on the specified tensor. "
-        "Dimension:1(depth dimension) should be a multiple of 'block^2' ";
+  CHECK_NE(in_shape[1], 0) << "Depth dimension:1 cannot be 0";
+  CHECK_EQ(in_shape[1] % (block * block), 0)
+    << "Cannot perform Depth To Space operation on the specified tensor."
+       " Dimension:1(depth dimension) should be a multiple of 'block^2'";
   CHECK_NE(in_shape[0], 0)
-      << "Operation requires a 4D tensor. Size of dimension:0 cannot be 0";
+    << "Operation requires a 4D tensor. Size of dimension:0 cannot be 0";
   CHECK_NE(in_shape[2], 0)
-      << "Operation requires a 4D tensor. Size of dimension:2 cannot be 0";
+    << "Operation requires a 4D tensor. Size of dimension:2 cannot be 0";
   CHECK_NE(in_shape[3], 0)
-      << "Operation requires a 4D tensor. Size of dimension:3 cannot be 0";
+    << "Operation requires a 4D tensor. Size of dimension:3 cannot be 0";
 
   expected_out[0] = in_shape[0];
-  expected_out[1] = in_shape[1]/(block*block);
-  uint16_t i = 2;
+  expected_out[1] = in_shape[1] / (block * block);
+  uint32_t i = 2;
   while (i < expected_out.ndim()) {
-  expected_out[i] = in_shape[i] * block;
-  ++i;
+    expected_out[i] = in_shape[i] * block;
+    ++i;
   }
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, expected_out);
@@ -2218,8 +2216,8 @@ inline bool DepthToSpaceOpShape(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool DepthToSpaceOpType(const nnvm::NodeAttrs& attrs,
-                            std::vector<int>* in_attrs,
-                            std::vector<int>* out_attrs) {
+                               std::vector<int>* in_attrs,
+                               std::vector<int>* out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
 
@@ -2229,15 +2227,15 @@ inline bool DepthToSpaceOpType(const nnvm::NodeAttrs& attrs,
 }
 
 #define UPDATE_INDEX_USING_OFFSET(X) \
-    next_idx_val = idx/dim_size; \
-    inp_index += (idx - (next_idx_val) * dim_size) * offset_arr[X]; \
+    next_idx_val = idx / dim_size; \
+    inp_index += (idx - next_idx_val * dim_size) * offset_arr[X]; \
     idx = next_idx_val;
 
 template<int req>
 struct depth_to_space_forward {
   template<typename DType>
   MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data,
-      const int block, const int* size, const int* offset_arr) {
+                                  const int block, const int* size, const int* offset_arr) {
     int inp_index = 0, idx = i, next_idx_val, dim_size;
     dim_size = block;
     UPDATE_INDEX_USING_OFFSET(2)
@@ -2247,7 +2245,7 @@ struct depth_to_space_forward {
     UPDATE_INDEX_USING_OFFSET(1)
     dim_size = size[2];
     UPDATE_INDEX_USING_OFFSET(4)
-    dim_size = size[1]/block/block;
+    dim_size = size[1] / (block * block);
     UPDATE_INDEX_USING_OFFSET(3)
     dim_size = size[0];
     UPDATE_INDEX_USING_OFFSET(0)
@@ -2259,7 +2257,8 @@ template<int req>
 struct compute_offset_for_depth_to_space {
   template<typename DType>
   MSHADOW_XINLINE static void Map(int i, DType* offset_arr, DType* size, const int block,
-      const int32_t size0, const int32_t size1, const int32_t size2, const int32_t size3) {
+                                  const int32_t size0, const int32_t size1, const int32_t size2,
+                                  const int32_t size3) {
     size[0] = size0;
     size[1] = size1;
     size[2] = size2;
@@ -2268,7 +2267,7 @@ struct compute_offset_for_depth_to_space {
     offset_arr[5] = 1;
     offset_arr[4] = offset_arr[5] * size[3];
     offset_arr[3] = offset_arr[4] * size[2];
-    offset_arr[2] = offset_arr[3] * size[1]/block/block;
+    offset_arr[2] = offset_arr[3] * size[1] / (block * block);
     offset_arr[1] = offset_arr[2] * block;
     offset_arr[0] = offset_arr[1] * block;
   }
@@ -2276,10 +2275,10 @@ struct compute_offset_for_depth_to_space {
 
 template<typename xpu>
 void DepthToSpaceOpForward(const nnvm::NodeAttrs& attrs,
-                        const OpContext& ctx,
-                        const std::vector<TBlob>& inputs,
-                        const std::vector<OpReqType>& req,
-                        const std::vector<TBlob>& outputs) {
+                           const OpContext& ctx,
+                           const std::vector<TBlob>& inputs,
+                           const std::vector<OpReqType>& req,
+                           const std::vector<TBlob>& outputs) {
   CHECK_EQ(inputs.size(), 1U);
   CHECK_EQ(outputs.size(), 1U);
   CHECK_EQ(req.size(), 1U);
@@ -2290,38 +2289,29 @@ void DepthToSpaceOpForward(const nnvm::NodeAttrs& attrs,
   using namespace mxnet_op;
   int block = param.blockSize;
 
-  mshadow::Tensor<xpu, 1, char> workspace = ctx.requested[0]
-      .get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(int32_t)*10), s);
+  mshadow::Tensor<xpu, 1, char> workspace =
+    ctx.requested[0].get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(int32_t)*10), s);
   char* workspace_curr_ptr = workspace.dptr_;
   int32_t* offset_arr = reinterpret_cast<int32_t*>(workspace_curr_ptr);
   int32_t* size = reinterpret_cast<int32_t*>(workspace_curr_ptr + sizeof(int32_t)*6);
 
   MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, {
-      MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
-        Kernel<compute_offset_for_depth_to_space<req_type>, xpu>::Launch(
-            s, 1, offset_arr, size, block, in_data.shape_[0], in_data.shape_[1],
-            in_data.shape_[2], in_data.shape_[3]);
-
-        Kernel<depth_to_space_forward<req_type>, xpu>::Launch(
-            s, out_data.Size(), out_data.dptr<DType>(), in_data.dptr<DType>(),
-            block, size, offset_arr);
-      });
+    MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
+      Kernel<compute_offset_for_depth_to_space<req_type>, xpu>::Launch(
+          s, 1, offset_arr, size, block, in_data.shape_[0], in_data.shape_[1],
+          in_data.shape_[2], in_data.shape_[3]);
+
+      Kernel<depth_to_space_forward<req_type>, xpu>::Launch(
+          s, out_data.Size(), out_data.dptr<DType>(), in_data.dptr<DType>(),
+          block, size, offset_arr);
+    });
   });
 }
 
-struct SpaceToDepthParam : public dmlc::Parameter<SpaceToDepthParam> {
-  int blockSize;
-  DMLC_DECLARE_PARAMETER(SpaceToDepthParam) {
-    DMLC_DECLARE_FIELD(blockSize)
-      .describe("The size of chunks that need to be taken from space and combined to depth "
-          "dimension of the tensor");
-  }
-};
-
 inline bool SpaceToDepthOpShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape>* in_attrs,
-                             std::vector<TShape>* out_attrs) {
-  const SpaceToDepthParam& param = nnvm::get<SpaceToDepthParam>(attrs.parsed);
+                                std::vector<TShape>* in_attrs,
+                                std::vector<TShape>* out_attrs) {
+  const DepthToSpaceParam& param = nnvm::get<DepthToSpaceParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   CHECK_EQ(in_attrs->at(0).ndim(), 4) << "Operation Space To Depth requires exactly 4D tensor";
@@ -2331,26 +2321,25 @@ inline bool SpaceToDepthOpShape(const nnvm::NodeAttrs& attrs,
   TShape& in_shape = in_attrs->at(0);
   int block = param.blockSize;
   CHECK_NE(in_shape[0], 0)
-      << "Operation requires a 4D tensor. Size of dimension:0 cannot be 0";
-  CHECK_NE(in_shape[1], 0)
-      << "Depth dimension:1 cannot be 0";
+    << "Operation requires a 4D tensor. Size of dimension:0 cannot be 0";
+  CHECK_NE(in_shape[1], 0) << "Depth dimension:1 cannot be 0";
   CHECK_NE(in_shape[2], 0)
-      << "Operation requires a 4D tensor. Size of dimension:2 cannot be 0";
+    << "Operation requires a 4D tensor. Size of dimension:2 cannot be 0";
   CHECK_EQ(in_shape[2]%block, 0)
-      << "Cannot perform Depth To Space operation on the specified tensor. "
-          "Dimension:2(1st Space dimension) should be a multiple of 'block' ";
+    << "Cannot perform Depth To Space operation on the specified tensor."
+       " Dimension:2(1st Space dimension) should be a multiple of 'block' ";
   CHECK_NE(in_shape[3], 0)
-      << "Operation requires a 4D tensor. Size of dimension:3 cannot be 0";
+    << "Operation requires a 4D tensor. Size of dimension:3 cannot be 0";
   CHECK_EQ(in_shape[3]%block, 0)
-      << "Cannot perform Depth To Space operation on the specified tensor. "
-          "Dimension:3(2nd space dimension) should be a multiple of 'block' ";
+    << "Cannot perform Depth To Space operation on the specified tensor."
+       " Dimension:3(2nd space dimension) should be a multiple of 'block' ";
 
   expected_out[0] = in_shape[0];
-  expected_out[1] = in_shape[1]*(block*block);
-  uint16_t i = 2;
+  expected_out[1] = in_shape[1] * (block * block);
+  uint32_t i = 2;
   while (i < expected_out.ndim()) {
-  expected_out[i] = in_shape[i] / block;
-  ++i;
+    expected_out[i] = in_shape[i] / block;
+    ++i;
   }
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, expected_out);
@@ -2358,8 +2347,8 @@ inline bool SpaceToDepthOpShape(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool SpaceToDepthOpType(const nnvm::NodeAttrs& attrs,
-                            std::vector<int>* in_attrs,
-                            std::vector<int>* out_attrs) {
+                               std::vector<int>* in_attrs,
+                               std::vector<int>* out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
 
@@ -2375,9 +2364,9 @@ struct space_to_depth_forward {
                                   const int block, const int* size,
                                   const int* offset_arr) {
     int inp_index = 0, idx = i, next_idx_val, dim_size;
-    dim_size = size[3]/block;
+    dim_size = size[3] / block;
     UPDATE_INDEX_USING_OFFSET(4)
-    dim_size = size[2]/block;
+    dim_size = size[2] / block;
     UPDATE_INDEX_USING_OFFSET(2)
     dim_size = size[1];
     UPDATE_INDEX_USING_OFFSET(1)
@@ -2399,8 +2388,8 @@ template<int req>
 struct compute_offset_for_space_to_depth {
   template<typename DType>
   MSHADOW_XINLINE static void Map(int i, DType* offset_arr, DType* size, const int block,
-                                    const int32_t size0, const int32_t size1,
-                                    const int32_t size2, const int32_t size3) {
+                                  const int32_t size0, const int32_t size1,
+                                  const int32_t size2, const int32_t size3) {
     size[0] = size0;
     size[1] = size1;
     size[2] = size2;
@@ -2408,45 +2397,44 @@ struct compute_offset_for_space_to_depth {
 
     offset_arr[5] = 1;
     offset_arr[4] = offset_arr[5] * block;
-    offset_arr[3] = offset_arr[4] * size[3]/block;
+    offset_arr[3] = offset_arr[4] * size[3] / block;
     offset_arr[2] = offset_arr[3] * block;
-    offset_arr[1] = offset_arr[2] * size[2]/block;
+    offset_arr[1] = offset_arr[2] * size[2] / block;
     offset_arr[0] = offset_arr[1] * size[1];
   }
 };
 
 template<typename xpu>
 void SpaceToDepthOpForward(const nnvm::NodeAttrs& attrs,
-                        const OpContext& ctx,
-                        const std::vector<TBlob>& inputs,
-                        const std::vector<OpReqType>& req,
-                        const std::vector<TBlob>& outputs) {
+                           const OpContext& ctx,
+                           const std::vector<TBlob>& inputs,
+                           const std::vector<OpReqType>& req,
+                           const std::vector<TBlob>& outputs) {
   CHECK_EQ(inputs.size(), 1U);
   CHECK_EQ(outputs.size(), 1U);
   CHECK_EQ(req.size(), 1U);
   mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
   const TBlob& in_data = inputs[0];
   const TBlob& out_data = outputs[0];
-  const SpaceToDepthParam& param = nnvm::get<SpaceToDepthParam>(attrs.parsed);
+  const DepthToSpaceParam& param = nnvm::get<DepthToSpaceParam>(attrs.parsed);
   using namespace mxnet_op;
   int block = param.blockSize;
 
-  mshadow::Tensor<xpu, 1, char> workspace = ctx.requested[0]
-      .get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(int32_t)*10), s);
+  mshadow::Tensor<xpu, 1, char> workspace =
+    ctx.requested[0].get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(int32_t) * 10), s);
   char* workspace_curr_ptr = workspace.dptr_;
   int32_t* offset_arr = reinterpret_cast<int32_t*>(workspace_curr_ptr);
   int32_t* size = reinterpret_cast<int32_t*>(workspace_curr_ptr + sizeof(int32_t)*6);
 
   MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, {
-      MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
-        Kernel<compute_offset_for_space_to_depth<req_type>, xpu>::Launch(
-            s, 1, offset_arr, size, block, in_data.shape_[0], in_data.shape_[1],
-            in_data.shape_[2], in_data.shape_[3]);
-
-        Kernel<space_to_depth_forward<req_type>, xpu>::Launch(
-            s, out_data.Size(), out_data.dptr<DType>(), in_data.dptr<DType>(),
-            block, size, offset_arr);
-      });
+    MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
+      Kernel<compute_offset_for_space_to_depth<req_type>, xpu>::Launch(
+          s, 1, offset_arr, size, block, in_data.shape_[0], in_data.shape_[1],
+          in_data.shape_[2], in_data.shape_[3]);
+      Kernel<space_to_depth_forward<req_type>, xpu>::Launch(
+          s, out_data.Size(), out_data.dptr<DType>(), in_data.dptr<DType>(),
+          block, size, offset_arr);
+    });
   });
 }
 
diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc
index cacc5b2ffbe0..7c311dfb59b7 100644
--- a/src/operator/tensor/matrix_op.cc
+++ b/src/operator/tensor/matrix_op.cc
@@ -102,7 +102,6 @@ DMLC_REGISTER_PARAMETER(ReverseParam);
 DMLC_REGISTER_PARAMETER(StackParam);
 DMLC_REGISTER_PARAMETER(SqueezeParam);
 DMLC_REGISTER_PARAMETER(DepthToSpaceParam);
-DMLC_REGISTER_PARAMETER(SpaceToDepthParam);
 
 NNVM_REGISTER_OP(Reshape)
 .add_alias("reshape")
@@ -914,7 +913,7 @@ NNVM_REGISTER_OP(depth_to_space)
 .describe(R"code(This operators implements the depthToSpace function:
 .. math::
     f(x, block) = ~x
-where :math:`x` is an input tensor of shape [N,C,H,W] and `~x` is the output tensor of shape [N,C/block^2,H*block,W*block]
+where :math:`x` is an input tensor of shape [N,C,H,W] and `~x` is the output tensor of shape [N, C/(block^2), H*block, W*block]
 Example::
 #(1,4,2,3) input tensor
   x = [[[[0, 1, 2],
@@ -951,6 +950,7 @@ Example::
   [](const NodeAttrs& attrs) {
     return std::vector<std::pair<int, int> >{{0, 0}};
   })
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"space_to_depth"})
 .add_argument("data", "NDArray-or-Symbol", "Input ndarray")
 .add_arguments(DepthToSpaceParam::__FIELDS__());
 
@@ -958,28 +958,28 @@ NNVM_REGISTER_OP(space_to_depth)
 .describe(R"code(This operators implements the spacetodepth function:
 .. math::
     f(x, block) = ~x
-where :math:`x` is an input tensor of shape [N,C,H,W] and `~x` is the output tensor of shape [N,C/block^2,H*block,W*block]
+where :math:`x` is an input tensor of shape [N,C,H,W] and `~x` is the output tensor of shape [N, C*(block^2), H/block, W/block]
 Example::
 #(1,4,2,3) input tensor
   x = [[[[0, 6, 1, 7, 2, 8],
-    [12, 18, 13, 19, 14, 20],
-    [3, 9, 4, 10, 5, 11],
-    [15, 21, 16, 22, 17, 23]]]]
+         [12, 18, 13, 19, 14, 20],
+         [3, 9, 4, 10, 5, 11],
+         [15, 21, 16, 22, 17, 23]]]]
   
   
   y = space_to_depth(x, 2)
 
 # (1, 1, 4, 6) output tensor
   y = [[[[0, 1, 2],
-    [3, 4, 5]],
-     [[6, 7, 8],
-    [9, 10, 11]],
-     [[12, 13, 14],
-    [15, 16, 17]],
-     [[18, 19, 20],
-    [21, 22, 23]]]]
+         [3, 4, 5]],
+        [[6, 7, 8],
+         [9, 10, 11]],
+        [[12, 13, 14],
+         [15, 16, 17]],
+        [[18, 19, 20],
+         [21, 22, 23]]]]
 )code" ADD_FILELINE)
-.set_attr_parser(ParamParser<SpaceToDepthParam>)
+.set_attr_parser(ParamParser<DepthToSpaceParam>)
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr<nnvm::FListInputNames>("FListInputNames",
@@ -996,8 +996,9 @@ Example::
   [](const NodeAttrs& attrs) {
     return std::vector<std::pair<int, int> >{{0, 0}};
   })
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"depth_to_space"})
 .add_argument("data", "NDArray-or-Symbol", "Input ndarray")
-.add_arguments(SpaceToDepthParam::__FIELDS__());
+.add_arguments(DepthToSpaceParam::__FIELDS__());
 
 }  // namespace op
 }  // namespace mxnet
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 6bef78e26fae..e21820bdcee3 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -6678,6 +6678,7 @@ def test_diag():
     diag_sym = mx.sym.diag(data=data, k=-1)
     check_numeric_gradient(diag_sym, [a_np])
 
+@with_seed()
 def test_depthtospace():
     def f(x, blocksize):
         b, c, h, w = x.shape[0], x.shape[1], x.shape[2], x.shape[3]
@@ -6686,15 +6687,22 @@ def f(x, blocksize):
         y = np.reshape(tmp, [b, c // (blocksize**2), h * blocksize, w * blocksize])
         return y
 
-
-    shape = (1,4,2,3)
+    shape_inp = (1,4,2,3)
     block = 2
-    data = rand_ndarray(shape, 'default')
+    data = rand_ndarray(shape_inp, 'default')
     data_np = data.asnumpy()
     expected = f(data_np, block)
     output = mx.nd.depth_to_space(data, block)
     assert_almost_equal(output.asnumpy(), expected)
 
+    shape_out = (1,1,4,6)
+    data = mx.sym.Variable('data')
+    dts_sym = mx.sym.depth_to_space(data, block)
+    check_numeric_gradient(dts_sym, [np.ones(shape_inp)])
+
+    check_symbolic_forward(dts_sym, [data_np], [expected])
+    check_symbolic_backward(dts_sym, [data_np], [np.ones(shape_out)], [np.ones(shape_inp)])
+
 @with_seed()
 def test_spacetodepth():
     def f(x, blocksize):
@@ -6704,15 +6712,22 @@ def f(x, blocksize):
         y = np.reshape(tmp, [b, c * (blocksize**2), h // blocksize, w // blocksize])
         return y
 
-
-    shape = (1,1,2,6)
+    shape_inp = (1,1,4,6)
     block = 2
-    data = rand_ndarray(shape, 'default')
+    data = rand_ndarray(shape_inp, 'default')
     data_np = data.asnumpy()
     expected = f(data_np, block)
     output = mx.nd.space_to_depth(data, block)
     assert_almost_equal(output.asnumpy(), expected)
 
+    shape_out = (1,4,2,3)
+    data = mx.sym.Variable('data')
+    dts_sym = mx.sym.space_to_depth(data, block)
+    check_numeric_gradient(dts_sym, [np.ones(shape_inp)])
+
+    check_symbolic_forward(dts_sym, [data_np], [expected])
+    check_symbolic_backward(dts_sym, [data_np], [np.ones(shape_out)], [np.ones(shape_inp)])
+
 if __name__ == '__main__':
     import nose
     nose.runmodule()

From e803c95f148b1f6e24b279c47756aa07dff236d5 Mon Sep 17 00:00:00 2001
From: Rohit Kumar Srivastava <srivastava.141@osu.edu>
Date: Wed, 11 Jul 2018 00:35:12 +0000
Subject: [PATCH 4/7] fixed documentation and nitpicky suggestions

---
 src/operator/tensor/matrix_op-inl.h | 61 +++++++++++++++++++++---
 src/operator/tensor/matrix_op.cc    | 74 +++++++++++++++--------------
 2 files changed, 93 insertions(+), 42 deletions(-)

diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index 9c5d56f802f4..52d55040aa4c 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -2231,6 +2231,17 @@ inline bool DepthToSpaceOpType(const nnvm::NodeAttrs& attrs,
     inp_index += (idx - next_idx_val * dim_size) * offset_arr[X]; \
     idx = next_idx_val;
 
+/*!
+ * \brief This function preforms the tensor transpose (0, 1, 2, 3, 4, 5) ->
+ * (0, 3, 4, 1, 5, 2) by computing linear index within input tensor to be mapped
+ * to the ith index of output tensor
+ * \param i           tensor index
+ * \param out_data    output tensor
+ * \param in_data     input tensor
+ * \param block       size of chunks to be moved out of depth dimension
+ * \param size        array containing the size of each dimension of input tensor
+ * \param offset_arr  array containing the linear offset of input tensor
+ */
 template<int req>
 struct depth_to_space_forward {
   template<typename DType>
@@ -2253,6 +2264,19 @@ struct depth_to_space_forward {
   }
 };
 
+/*!
+ * \brief This function calculates the linear offset for each dimension of
+ * input tensor and stores them in an array, which is later used in
+ * performing depth_to_space operation
+ * \param i           global thread id
+ * \param offset_arr  array to be populated with offset values
+ * \param size        array to be populated with size of each dimension of input tensor
+ * \param block       size of chunks to be moved out of depth dimension
+ * \param size0       size of Dim 0 of input tensor
+ * \param size1       size of Dim 1 of input tensor
+ * \param size2       size of Dim 2 of input tensor
+ * \param size3       size of Dim 3 of input tensor
+ */
 template<int req>
 struct compute_offset_for_depth_to_space {
   template<typename DType>
@@ -2290,10 +2314,10 @@ void DepthToSpaceOpForward(const nnvm::NodeAttrs& attrs,
   int block = param.blockSize;
 
   mshadow::Tensor<xpu, 1, char> workspace =
-    ctx.requested[0].get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(int32_t)*10), s);
+    ctx.requested[0].get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(int32_t) * 10), s);
   char* workspace_curr_ptr = workspace.dptr_;
   int32_t* offset_arr = reinterpret_cast<int32_t*>(workspace_curr_ptr);
-  int32_t* size = reinterpret_cast<int32_t*>(workspace_curr_ptr + sizeof(int32_t)*6);
+  int32_t* size = reinterpret_cast<int32_t*>(workspace_curr_ptr + sizeof(int32_t) * 6);
 
   MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, {
     MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
@@ -2335,7 +2359,7 @@ inline bool SpaceToDepthOpShape(const nnvm::NodeAttrs& attrs,
        " Dimension:3(2nd space dimension) should be a multiple of 'block' ";
 
   expected_out[0] = in_shape[0];
-  expected_out[1] = in_shape[1] * (block * block);
+  expected_out[1] = in_shape[1] * block * block;
   uint32_t i = 2;
   while (i < expected_out.ndim()) {
     expected_out[i] = in_shape[i] / block;
@@ -2357,12 +2381,22 @@ inline bool SpaceToDepthOpType(const nnvm::NodeAttrs& attrs,
   return out_attrs->at(0) != -1;
 }
 
+/*!
+ * \brief This function preforms the tensor transpose (0, 1, 2, 3, 4, 5) ->
+ * (0, 3, 5, 1, 2, 4) by computing linear index within input tensor to be mapped
+ * to the ith index of output tensor
+ * \param i           tensor index
+ * \param out_data    output tensor
+ * \param in_data     input tensor
+ * \param block       size of chunks to be moved out of depth dimension
+ * \param size        array containing the size of each dimension of input tensor
+ * \param offset_arr  array containing the linear offset of input tensor
+ */
 template<int req>
 struct space_to_depth_forward {
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data,
-                                  const int block, const int* size,
-                                  const int* offset_arr) {
+  MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data, const int block,
+                                  const int* size, const int* offset_arr) {
     int inp_index = 0, idx = i, next_idx_val, dim_size;
     dim_size = size[3] / block;
     UPDATE_INDEX_USING_OFFSET(4)
@@ -2384,6 +2418,19 @@ struct space_to_depth_forward {
 #undef UPDATE_INDEX_USING_OFFSET
 #endif
 
+/*!
+ * \brief This function calculates the linear offset for each dimension of
+ * input tensor and stores them in an array, which is later used in
+ * performing space_to_depth operation
+ * \param i           global thread id
+ * \param offset_arr  array to be populated with offset values
+ * \param size        array to be populated with size of each dimension of input tensor
+ * \param block       size of chunks to be moved out of depth dimension
+ * \param size0       size of Dim 0 of input tensor
+ * \param size1       size of Dim 1 of input tensor
+ * \param size2       size of Dim 2 of input tensor
+ * \param size3       size of Dim 3 of input tensor
+ */
 template<int req>
 struct compute_offset_for_space_to_depth {
   template<typename DType>
@@ -2424,7 +2471,7 @@ void SpaceToDepthOpForward(const nnvm::NodeAttrs& attrs,
     ctx.requested[0].get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(int32_t) * 10), s);
   char* workspace_curr_ptr = workspace.dptr_;
   int32_t* offset_arr = reinterpret_cast<int32_t*>(workspace_curr_ptr);
-  int32_t* size = reinterpret_cast<int32_t*>(workspace_curr_ptr + sizeof(int32_t)*6);
+  int32_t* size = reinterpret_cast<int32_t*>(workspace_curr_ptr + sizeof(int32_t) * 6);
 
   MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, {
     MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc
index 7c311dfb59b7..34af9d8c2854 100644
--- a/src/operator/tensor/matrix_op.cc
+++ b/src/operator/tensor/matrix_op.cc
@@ -911,27 +911,28 @@ NNVM_REGISTER_OP(_backward_squeeze)
 
 NNVM_REGISTER_OP(depth_to_space)
 .describe(R"code(This operators implements the depthToSpace function:
+
 .. math::
-    f(x, block) = ~x
-where :math:`x` is an input tensor of shape [N,C,H,W] and `~x` is the output tensor of shape [N, C/(block^2), H*block, W*block]
+
+    f(x, block) = \tilde{x}
+
+where :math:`x` is an input tensor of shape [N,C,H,W] and :math:`\tilde{x}` is the output tensor of shape :math:`[N, C/(block^2), H*block, W*block]`
+
 Example::
-#(1,4,2,3) input tensor
+
   x = [[[[0, 1, 2],
-		[3, 4, 5]],
-	   [[6, 7, 8],
-		[9, 10, 11]],
-	   [[12, 13, 14],
-		[15, 16, 17]],
-	   [[18, 19, 20],
-		[21, 22, 23]]]]
+         [3, 4, 5]],
+        [[6, 7, 8],
+         [9, 10, 11]],
+        [[12, 13, 14],
+         [15, 16, 17]],
+        [[18, 19, 20],
+         [21, 22, 23]]]]
   
-  y = depth_to_space(x, 2)
-
-# (1, 1, 4, 6) output tensor
-  y = [[[[0, 6, 1, 7, 2, 8],
-		[12, 18, 13, 19, 14, 20],
-		[3, 9, 4, 10, 5, 11],
-		[15, 21, 16, 22, 17, 23]]]]
+  depth_to_space(x, 2) = [[[[0, 6, 1, 7, 2, 8],
+                            [12, 18, 13, 19, 14, 20],
+                            [3, 9, 4, 10, 5, 11],
+                            [15, 21, 16, 22, 17, 23]]]]
 )code" ADD_FILELINE)
 .set_attr_parser(ParamParser<DepthToSpaceParam>)
 .set_num_inputs(1)
@@ -943,8 +944,9 @@ Example::
 .set_attr<nnvm::FInferShape>("FInferShape", DepthToSpaceOpShape)
 .set_attr<nnvm::FInferType>("FInferType", DepthToSpaceOpType)
 .set_attr<FCompute>("FCompute<cpu>", DepthToSpaceOpForward<cpu>)
-.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
-  return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
+.set_attr<FResourceRequest>("FResourceRequest",
+  [](const NodeAttrs& n) {
+    return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
   [](const NodeAttrs& attrs) {
@@ -956,28 +958,29 @@ Example::
 
 NNVM_REGISTER_OP(space_to_depth)
 .describe(R"code(This operators implements the spacetodepth function:
+
 .. math::
-    f(x, block) = ~x
-where :math:`x` is an input tensor of shape [N,C,H,W] and `~x` is the output tensor of shape [N, C*(block^2), H/block, W/block]
+
+    f(x, blockSize) = \tilde{x}
+
+where :math:`x` is an input tensor of shape [N,C,H,W] and :math:`\tilde{x}` is the output tensor of shape :math:`[N, C*(block^2), H/block, W/block]`
+
 Example::
-#(1,4,2,3) input tensor
+
   x = [[[[0, 6, 1, 7, 2, 8],
          [12, 18, 13, 19, 14, 20],
          [3, 9, 4, 10, 5, 11],
          [15, 21, 16, 22, 17, 23]]]]
   
   
-  y = space_to_depth(x, 2)
-
-# (1, 1, 4, 6) output tensor
-  y = [[[[0, 1, 2],
-         [3, 4, 5]],
-        [[6, 7, 8],
-         [9, 10, 11]],
-        [[12, 13, 14],
-         [15, 16, 17]],
-        [[18, 19, 20],
-         [21, 22, 23]]]]
+  space_to_depth(x, 2) = [[[[0, 1, 2],
+                            [3, 4, 5]],
+                           [[6, 7, 8],
+                            [9, 10, 11]],
+                           [[12, 13, 14],
+                            [15, 16, 17]],
+                           [[18, 19, 20],
+                            [21, 22, 23]]]]
 )code" ADD_FILELINE)
 .set_attr_parser(ParamParser<DepthToSpaceParam>)
 .set_num_inputs(1)
@@ -989,8 +992,9 @@ Example::
 .set_attr<nnvm::FInferShape>("FInferShape", SpaceToDepthOpShape)
 .set_attr<nnvm::FInferType>("FInferType", SpaceToDepthOpType)
 .set_attr<FCompute>("FCompute<cpu>", SpaceToDepthOpForward<cpu>)
-.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
-  return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
+.set_attr<FResourceRequest>("FResourceRequest",
+  [](const NodeAttrs& n) {
+    return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
   [](const NodeAttrs& attrs) {

From 5187d2eb1ae1c8f27cd979fc956029996d2fbe76 Mon Sep 17 00:00:00 2001
From: Rohit Kumar Srivastava <srivastava.141@osu.edu>
Date: Fri, 13 Jul 2018 23:35:14 +0000
Subject: [PATCH 5/7] added operator references in API docs and removed inplace
 optimization support

---
 docs/api/python/ndarray/ndarray.md | 4 ++++
 docs/api/python/symbol/symbol.md   | 4 ++++
 src/operator/tensor/matrix_op.cc   | 8 --------
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/api/python/ndarray/ndarray.md b/docs/api/python/ndarray/ndarray.md
index d92c3e84e00f..01a154405cf3 100644
--- a/docs/api/python/ndarray/ndarray.md
+++ b/docs/api/python/ndarray/ndarray.md
@@ -156,6 +156,8 @@ The `ndarray` package provides several classes:
     NDArray.transpose
     NDArray.swapaxes
     NDArray.flip
+    NDArray.depth_to_space
+    NDArray.space_to_depth
 ```
 
 ### Array reduction
@@ -411,6 +413,8 @@ The `ndarray` package provides several classes:
     transpose
     swapaxes
     flip
+    depth_to_space
+    space_to_depth
 ```
 
 ### Joining and splitting arrays
diff --git a/docs/api/python/symbol/symbol.md b/docs/api/python/symbol/symbol.md
index b0db774d9331..7c78cbd59b0d 100644
--- a/docs/api/python/symbol/symbol.md
+++ b/docs/api/python/symbol/symbol.md
@@ -222,6 +222,8 @@ Composite multiple symbols into a new one by an operator.
     Symbol.transpose
     Symbol.swapaxes
     Symbol.flip
+    Symbol.depth_to_space
+    Symbol.space_to_depth
 ```
 
 ### Reduce functions
@@ -409,6 +411,8 @@ Composite multiple symbols into a new one by an operator.
     transpose
     swapaxes
     flip
+    depth_to_space
+    space_to_depth
 ```
 
 ### Joining and splitting symbols
diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc
index 34af9d8c2854..5999ac845887 100644
--- a/src/operator/tensor/matrix_op.cc
+++ b/src/operator/tensor/matrix_op.cc
@@ -948,10 +948,6 @@ Example::
   [](const NodeAttrs& n) {
     return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
-.set_attr<nnvm::FInplaceOption>("FInplaceOption",
-  [](const NodeAttrs& attrs) {
-    return std::vector<std::pair<int, int> >{{0, 0}};
-  })
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"space_to_depth"})
 .add_argument("data", "NDArray-or-Symbol", "Input ndarray")
 .add_arguments(DepthToSpaceParam::__FIELDS__());
@@ -996,10 +992,6 @@ Example::
   [](const NodeAttrs& n) {
     return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
-.set_attr<nnvm::FInplaceOption>("FInplaceOption",
-  [](const NodeAttrs& attrs) {
-    return std::vector<std::pair<int, int> >{{0, 0}};
-  })
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"depth_to_space"})
 .add_argument("data", "NDArray-or-Symbol", "Input ndarray")
 .add_arguments(DepthToSpaceParam::__FIELDS__());

From 923f8a86552820bd7beaeac3d69a7e05a1f963d7 Mon Sep 17 00:00:00 2001
From: Rohit Kumar Srivastava <srivastava.141@osu.edu>
Date: Fri, 20 Jul 2018 20:18:35 +0000
Subject: [PATCH 6/7] Added references in symbol.md and ndarray.md. Improved
 test cases and added block_size check

---
 src/operator/tensor/matrix_op-inl.h    | 68 ++++++++++++++------------
 src/operator/tensor/matrix_op.cc       | 28 ++++++++---
 tests/python/unittest/test_operator.py | 40 ++++++++++++++-
 3 files changed, 97 insertions(+), 39 deletions(-)

diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index 52d55040aa4c..5227d21d31df 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -2172,11 +2172,10 @@ inline bool SqueezeShape(const nnvm::NodeAttrs& attrs,
 }
 
 struct DepthToSpaceParam : public dmlc::Parameter<DepthToSpaceParam> {
-  int blockSize;
+  int block_size;
   DMLC_DECLARE_PARAMETER(DepthToSpaceParam) {
-    DMLC_DECLARE_FIELD(blockSize)
-      .describe("The size of chunks that need to be taken from depth and spread across to the"
-                " shape dimension of the tensor and vice versa");
+    DMLC_DECLARE_FIELD(block_size)
+      .describe("Blocks of [block_size. block_size] are moved");
   }
 };
 
@@ -2191,7 +2190,8 @@ inline bool DepthToSpaceOpShape(const nnvm::NodeAttrs& attrs,
   TShape expected_out(4);
 
   TShape& in_shape = in_attrs->at(0);
-  int block = param.blockSize;
+  int block = param.block_size;
+  CHECK_NE(block, 0) << "block_size must be a positive integer value";
   CHECK_NE(in_shape[1], 0) << "Depth dimension:1 cannot be 0";
   CHECK_EQ(in_shape[1] % (block * block), 0)
     << "Cannot perform Depth To Space operation on the specified tensor."
@@ -2226,10 +2226,21 @@ inline bool DepthToSpaceOpType(const nnvm::NodeAttrs& attrs,
   return out_attrs->at(0) != -1;
 }
 
-#define UPDATE_INDEX_USING_OFFSET(X) \
-    next_idx_val = idx / dim_size; \
-    inp_index += (idx - next_idx_val * dim_size) * offset_arr[X]; \
-    idx = next_idx_val;
+/*!
+ * \brief This function updates the value of input index from where the data element
+ * needs to be fetched and written out to the ith location in output tensor
+ * \param index_position    index within offset array to get offset of given dimension
+ * \param dim_size          size of current dimension
+ * \param idx               output tensor index
+ * \param inp_index         index within input tensor from where value is retrieved
+ * \param offset_arr        array containing the linear offset of input tensor
+ */
+MSHADOW_XINLINE void update_index(int index_position, int dim_size, int *idx,
+                                  int *inp_index, const int* offset_arr) {
+  int next_idx_val = *idx / dim_size;
+  *inp_index += (*idx - next_idx_val * dim_size) * offset_arr[index_position];
+  *idx = next_idx_val;
+}
 
 /*!
  * \brief This function preforms the tensor transpose (0, 1, 2, 3, 4, 5) ->
@@ -2247,19 +2258,19 @@ struct depth_to_space_forward {
   template<typename DType>
   MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data,
                                   const int block, const int* size, const int* offset_arr) {
-    int inp_index = 0, idx = i, next_idx_val, dim_size;
+    int inp_index = 0, idx = i, dim_size;
     dim_size = block;
-    UPDATE_INDEX_USING_OFFSET(2)
+    update_index(2, dim_size, &idx, &inp_index, offset_arr);
     dim_size = size[3];
-    UPDATE_INDEX_USING_OFFSET(5)
+    update_index(5, dim_size, &idx, &inp_index, offset_arr);
     dim_size = block;
-    UPDATE_INDEX_USING_OFFSET(1)
+    update_index(1, dim_size, &idx, &inp_index, offset_arr);
     dim_size = size[2];
-    UPDATE_INDEX_USING_OFFSET(4)
+    update_index(4, dim_size, &idx, &inp_index, offset_arr);
     dim_size = size[1] / (block * block);
-    UPDATE_INDEX_USING_OFFSET(3)
+    update_index(3, dim_size, &idx, &inp_index, offset_arr);
     dim_size = size[0];
-    UPDATE_INDEX_USING_OFFSET(0)
+    update_index(0, dim_size, &idx, &inp_index, offset_arr);
     KERNEL_ASSIGN(out_data[i], req, in_data[inp_index]);
   }
 };
@@ -2311,7 +2322,7 @@ void DepthToSpaceOpForward(const nnvm::NodeAttrs& attrs,
   const TBlob& out_data = outputs[0];
   const DepthToSpaceParam& param = nnvm::get<DepthToSpaceParam>(attrs.parsed);
   using namespace mxnet_op;
-  int block = param.blockSize;
+  int block = param.block_size;
 
   mshadow::Tensor<xpu, 1, char> workspace =
     ctx.requested[0].get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(int32_t) * 10), s);
@@ -2343,7 +2354,8 @@ inline bool SpaceToDepthOpShape(const nnvm::NodeAttrs& attrs,
   TShape expected_out(in_attrs->at(0).ndim());
 
   TShape& in_shape = in_attrs->at(0);
-  int block = param.blockSize;
+  int block = param.block_size;
+  CHECK_NE(block, 0) << "block_size must be a positive integer value";
   CHECK_NE(in_shape[0], 0)
     << "Operation requires a 4D tensor. Size of dimension:0 cannot be 0";
   CHECK_NE(in_shape[1], 0) << "Depth dimension:1 cannot be 0";
@@ -2397,27 +2409,23 @@ struct space_to_depth_forward {
   template<typename DType>
   MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data, const int block,
                                   const int* size, const int* offset_arr) {
-    int inp_index = 0, idx = i, next_idx_val, dim_size;
+    int inp_index = 0, idx = i, dim_size;
     dim_size = size[3] / block;
-    UPDATE_INDEX_USING_OFFSET(4)
+    update_index(4, dim_size, &idx, &inp_index, offset_arr);
     dim_size = size[2] / block;
-    UPDATE_INDEX_USING_OFFSET(2)
+    update_index(2, dim_size, &idx, &inp_index, offset_arr);
     dim_size = size[1];
-    UPDATE_INDEX_USING_OFFSET(1)
+    update_index(1, dim_size, &idx, &inp_index, offset_arr);
     dim_size = block;
-    UPDATE_INDEX_USING_OFFSET(5)
+    update_index(5, dim_size, &idx, &inp_index, offset_arr);
     dim_size = block;
-    UPDATE_INDEX_USING_OFFSET(3)
+    update_index(3, dim_size, &idx, &inp_index, offset_arr);
     dim_size = size[0];
-    UPDATE_INDEX_USING_OFFSET(0)
+    update_index(0, dim_size, &idx, &inp_index, offset_arr);
     KERNEL_ASSIGN(out_data[i], req, in_data[inp_index]);
   }
 };
 
-#ifdef UPDATE_INDEX_USING_OFFSET
-#undef UPDATE_INDEX_USING_OFFSET
-#endif
-
 /*!
  * \brief This function calculates the linear offset for each dimension of
  * input tensor and stores them in an array, which is later used in
@@ -2465,7 +2473,7 @@ void SpaceToDepthOpForward(const nnvm::NodeAttrs& attrs,
   const TBlob& out_data = outputs[0];
   const DepthToSpaceParam& param = nnvm::get<DepthToSpaceParam>(attrs.parsed);
   using namespace mxnet_op;
-  int block = param.blockSize;
+  int block = param.block_size;
 
   mshadow::Tensor<xpu, 1, char> workspace =
     ctx.requested[0].get_space_typed<xpu, 1, char>(mshadow::Shape1(sizeof(int32_t) * 10), s);
diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc
index 5999ac845887..eb62955eec97 100644
--- a/src/operator/tensor/matrix_op.cc
+++ b/src/operator/tensor/matrix_op.cc
@@ -910,13 +910,20 @@ NNVM_REGISTER_OP(_backward_squeeze)
 .set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>);
 
 NNVM_REGISTER_OP(depth_to_space)
-.describe(R"code(This operators implements the depthToSpace function:
+.describe(R"code(Similar to ONNX DepthToSpace operator:
+https://github.com/onnx/onnx/blob/master/docs/Operators.md#DepthToSpace.
+Rearranges(permutes) data from depth into blocks of spatial data. 
+The output is a new tensor where the values from depth dimension are moved in spatial blocks 
+to height and width dimension. The reverse of this operation is ``space_to_depth``.
 
 .. math::
 
-    f(x, block) = \tilde{x}
+    x \prime = reshape(x, [N, block_size, block_size, C / (block_size ^ 2), H * block_size, W * block_size]), 
+    x \prime \prime = transpose(x \prime, [0, 3, 4, 1, 5, 2])
+    y = reshape(x \prime \prime, [N, C / (block ^ 2), H * block_size, W * block_size]\)
 
-where :math:`x` is an input tensor of shape [N,C,H,W] and :math:`\tilde{x}` is the output tensor of shape :math:`[N, C/(block^2), H*block, W*block]`
+where :math:`x` is an input tensor with default layout as :math:`[N, C, H, W]`: [batch, channels, height, width] 
+and :math:`y` is the output tensor of layout :math:`[N, C / (block_size ^ 2), H * block_size, W * block_size]`
 
 Example::
 
@@ -928,7 +935,7 @@ Example::
          [15, 16, 17]],
         [[18, 19, 20],
          [21, 22, 23]]]]
-  
+
   depth_to_space(x, 2) = [[[[0, 6, 1, 7, 2, 8],
                             [12, 18, 13, 19, 14, 20],
                             [3, 9, 4, 10, 5, 11],
@@ -953,13 +960,20 @@ Example::
 .add_arguments(DepthToSpaceParam::__FIELDS__());
 
 NNVM_REGISTER_OP(space_to_depth)
-.describe(R"code(This operators implements the spacetodepth function:
+.describe(R"code(Similar to ONNX SpaceToDepth operator:
+https://github.com/onnx/onnx/blob/master/docs/Operators.md#SpaceToDepth 
+Rearranges(permutes) blocks of spatial data into depth. 
+The output is a new tensor where the values from height and width dimension are 
+moved to the depth dimension. The reverse of this operation is ``depth_to_space``.
 
 .. math::
 
-    f(x, blockSize) = \tilde{x}
+    x \prime = reshape(x, [N, C, H / block_size, block_size, W / block_size, block_size]), 
+    x \prime \prime = transpose(x \prime, [0, 3, 5, 1, 2, 4])
+    y = reshape(x \prime \prime, [N, C * (block ^ 2), H / block_size, W / block_size]\)
 
-where :math:`x` is an input tensor of shape [N,C,H,W] and :math:`\tilde{x}` is the output tensor of shape :math:`[N, C*(block^2), H/block, W/block]`
+where :math:`x` is an input tensor with default layout as :math:`[N, C, H, W]`: [batch, channels, height, width] 
+and :math:`y` is the output tensor of layout :math:`[N, C * (block ^ 2), H / block, W / block]`
 
 Example::
 
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index e21820bdcee3..30a1704db692 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -6693,7 +6693,7 @@ def f(x, blocksize):
     data_np = data.asnumpy()
     expected = f(data_np, block)
     output = mx.nd.depth_to_space(data, block)
-    assert_almost_equal(output.asnumpy(), expected)
+    assert_almost_equal(output.asnumpy(), expected, atol=1e-3, rtol=1e-3)
 
     shape_out = (1,1,4,6)
     data = mx.sym.Variable('data')
@@ -6703,6 +6703,24 @@ def f(x, blocksize):
     check_symbolic_forward(dts_sym, [data_np], [expected])
     check_symbolic_backward(dts_sym, [data_np], [np.ones(shape_out)], [np.ones(shape_inp)])
 
+    def test_invalid_depth_dim():
+        invalid_shape_inp = (1,3,2,3)
+        block = 2
+        data = rand_ndarray(invalid_shape_inp, 'default')
+        assertRaises(MXNetError, mx.nd.depth_to_space, data, block)
+
+    def test_invalid_space_dim():
+        invalid_shape_inp = (1,4,2,3)
+        block = 2
+        data = rand_ndarray(invalid_shape_inp, 'default')
+        assertRaises(MXNetError, mx.nd.depth_to_space, data, block)
+
+    def test_invalid_block_size():
+        invalid_shape_inp = (1,0,2,3)
+        block = 2
+        data = rand_ndarray(invalid_shape_inp, 'default')
+        assertRaises(MXNetError, mx.nd.depth_to_space, data, block)
+
 @with_seed()
 def test_spacetodepth():
     def f(x, blocksize):
@@ -6718,7 +6736,7 @@ def f(x, blocksize):
     data_np = data.asnumpy()
     expected = f(data_np, block)
     output = mx.nd.space_to_depth(data, block)
-    assert_almost_equal(output.asnumpy(), expected)
+    assert_almost_equal(output.asnumpy(), expected, atol=1e-3, rtol=1e-3)
 
     shape_out = (1,4,2,3)
     data = mx.sym.Variable('data')
@@ -6728,6 +6746,24 @@ def f(x, blocksize):
     check_symbolic_forward(dts_sym, [data_np], [expected])
     check_symbolic_backward(dts_sym, [data_np], [np.ones(shape_out)], [np.ones(shape_inp)])
 
+    def test_invalid_space_dim():
+        invalid_shape_inp = (1,1,2,3)
+        block = 2
+        data = rand_ndarray(invalid_shape_inp, 'default')
+        assertRaises(MXNetError, mx.nd.space_to_depth, data, block)
+
+    def test_invalid_block_size():
+        invalid_shape_inp = (1,1,4,2)
+        block = 0
+        data = rand_ndarray(invalid_shape_inp, 'default')
+        assertRaises(MXNetError, mx.nd.space_to_depth, data, block)
+    
+    def test_invalid_depth_dim():
+        invalid_shape_inp = (1,0,4,2)
+        block = 2
+        data = rand_ndarray(invalid_shape_inp, 'default')
+        assertRaises(MXNetError, mx.nd.space_to_depth, data, block)
+
 if __name__ == '__main__':
     import nose
     nose.runmodule()

From 6879a506efe8bed6f9b4f2d97ef85e4736f38b8c Mon Sep 17 00:00:00 2001
From: Rohit Kumar Srivastava <srivastava.141@osu.edu>
Date: Wed, 25 Jul 2018 20:53:34 +0000
Subject: [PATCH 7/7] Fixing bugs in documentation. Tests now include tensors
 of random shapes.

---
 python/mxnet/ndarray/ndarray.py        | 16 +++++++++
 python/mxnet/symbol/symbol.py          | 16 +++++++++
 src/operator/tensor/matrix_op-inl.h    |  6 ++--
 src/operator/tensor/matrix_op.cc       | 29 ++++++++-------
 tests/python/unittest/test_operator.py | 49 +++++++++++++++++---------
 5 files changed, 84 insertions(+), 32 deletions(-)

diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py
index 64d510296ffa..46b21a90d4c6 100644
--- a/python/mxnet/ndarray/ndarray.py
+++ b/python/mxnet/ndarray/ndarray.py
@@ -1302,6 +1302,22 @@ def flip(self, *args, **kwargs):
         """
         return op.flip(self, *args, **kwargs)
 
+    def depth_to_space(self, *args, **kwargs):
+        """Convenience fluent method for :py:func:`depth_to_space`.
+
+        The arguments are the same as for :py:func:`depth_to_space`, with
+        this array as data.
+        """
+        return op.depth_to_space(self, *args, **kwargs)
+
+    def space_to_depth(self, *args, **kwargs):
+        """Convenience fluent method for :py:func:`space_to_depth`.
+
+        The arguments are the same as for :py:func:`space_to_depth`, with
+        this array as data.
+        """
+        return op.space_to_depth(self, *args, **kwargs)
+
     def diag(self, k=0, **kwargs):
         """Convenience fluent method for :py:func:`diag`.
 
diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py
index ea476cdcb213..5f6cbd6b6e14 100644
--- a/python/mxnet/symbol/symbol.py
+++ b/python/mxnet/symbol/symbol.py
@@ -2046,6 +2046,22 @@ def flip(self, *args, **kwargs):
         """
         return op.flip(self, *args, **kwargs)
 
+    def depth_to_space(self, *args, **kwargs):
+        """Convenience fluent method for :py:func:`depth_to_space`.
+
+        The arguments are the same as for :py:func:`depth_to_space`, with
+        this array as data.
+        """
+        return op.depth_to_space(self, *args, **kwargs)
+
+    def space_to_depth(self, *args, **kwargs):
+        """Convenience fluent method for :py:func:`space_to_depth`.
+
+        The arguments are the same as for :py:func:`space_to_depth`, with
+        this array as data.
+        """
+        return op.space_to_depth(self, *args, **kwargs)
+
     def diag(self, k=0, **kwargs):
         """Convenience fluent method for :py:func:`diag`.
 
diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index 5227d21d31df..eec920555ed1 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -2243,7 +2243,7 @@ MSHADOW_XINLINE void update_index(int index_position, int dim_size, int *idx,
 }
 
 /*!
- * \brief This function preforms the tensor transpose (0, 1, 2, 3, 4, 5) ->
+ * \brief This function performs the tensor transpose (0, 1, 2, 3, 4, 5) ->
  * (0, 3, 4, 1, 5, 2) by computing linear index within input tensor to be mapped
  * to the ith index of output tensor
  * \param i           tensor index
@@ -2361,12 +2361,12 @@ inline bool SpaceToDepthOpShape(const nnvm::NodeAttrs& attrs,
   CHECK_NE(in_shape[1], 0) << "Depth dimension:1 cannot be 0";
   CHECK_NE(in_shape[2], 0)
     << "Operation requires a 4D tensor. Size of dimension:2 cannot be 0";
-  CHECK_EQ(in_shape[2]%block, 0)
+  CHECK_EQ(in_shape[2] % block, 0)
     << "Cannot perform Depth To Space operation on the specified tensor."
        " Dimension:2(1st Space dimension) should be a multiple of 'block' ";
   CHECK_NE(in_shape[3], 0)
     << "Operation requires a 4D tensor. Size of dimension:3 cannot be 0";
-  CHECK_EQ(in_shape[3]%block, 0)
+  CHECK_EQ(in_shape[3] % block, 0)
     << "Cannot perform Depth To Space operation on the specified tensor."
        " Dimension:3(2nd space dimension) should be a multiple of 'block' ";
 
diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc
index eb62955eec97..ffdc228b2d64 100644
--- a/src/operator/tensor/matrix_op.cc
+++ b/src/operator/tensor/matrix_op.cc
@@ -910,20 +910,22 @@ NNVM_REGISTER_OP(_backward_squeeze)
 .set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>);
 
 NNVM_REGISTER_OP(depth_to_space)
-.describe(R"code(Similar to ONNX DepthToSpace operator:
+.describe(R"code(Rearranges(permutes) data from depth into blocks of spatial data.
+Similar to ONNX DepthToSpace operator:
 https://github.com/onnx/onnx/blob/master/docs/Operators.md#DepthToSpace.
-Rearranges(permutes) data from depth into blocks of spatial data. 
 The output is a new tensor where the values from depth dimension are moved in spatial blocks 
 to height and width dimension. The reverse of this operation is ``space_to_depth``.
 
 .. math::
 
-    x \prime = reshape(x, [N, block_size, block_size, C / (block_size ^ 2), H * block_size, W * block_size]), 
-    x \prime \prime = transpose(x \prime, [0, 3, 4, 1, 5, 2])
-    y = reshape(x \prime \prime, [N, C / (block ^ 2), H * block_size, W * block_size]\)
+    \begin{gather*}
+    x \prime = reshape(x, [N, block\_size, block\_size, C / (block\_size ^ 2), H * block\_size, W * block\_size]) \\
+    x \prime \prime = transpose(x \prime, [0, 3, 4, 1, 5, 2]) \\
+    y = reshape(x \prime \prime, [N, C / (block\_size ^ 2), H * block\_size, W * block\_size])
+    \end{gather*}
 
 where :math:`x` is an input tensor with default layout as :math:`[N, C, H, W]`: [batch, channels, height, width] 
-and :math:`y` is the output tensor of layout :math:`[N, C / (block_size ^ 2), H * block_size, W * block_size]`
+and :math:`y` is the output tensor of layout :math:`[N, C / (block\_size ^ 2), H * block\_size, W * block\_size]`
 
 Example::
 
@@ -960,20 +962,23 @@ Example::
 .add_arguments(DepthToSpaceParam::__FIELDS__());
 
 NNVM_REGISTER_OP(space_to_depth)
-.describe(R"code(Similar to ONNX SpaceToDepth operator:
+.describe(R"code(Rearranges(permutes) blocks of spatial data into depth.
+Similar to ONNX SpaceToDepth operator:
 https://github.com/onnx/onnx/blob/master/docs/Operators.md#SpaceToDepth 
-Rearranges(permutes) blocks of spatial data into depth. 
+
 The output is a new tensor where the values from height and width dimension are 
 moved to the depth dimension. The reverse of this operation is ``depth_to_space``.
 
 .. math::
 
-    x \prime = reshape(x, [N, C, H / block_size, block_size, W / block_size, block_size]), 
-    x \prime \prime = transpose(x \prime, [0, 3, 5, 1, 2, 4])
-    y = reshape(x \prime \prime, [N, C * (block ^ 2), H / block_size, W / block_size]\)
+    \begin{gather*}
+    x \prime = reshape(x, [N, C, H / block\_size, block\_size, W / block\_size, block\_size]) \\
+    x \prime \prime = transpose(x \prime, [0, 3, 5, 1, 2, 4]) \\
+    y = reshape(x \prime \prime, [N, C * (block\_size ^ 2), H / block\_size, W / block\_size])
+    \end{gather*}
 
 where :math:`x` is an input tensor with default layout as :math:`[N, C, H, W]`: [batch, channels, height, width] 
-and :math:`y` is the output tensor of layout :math:`[N, C * (block ^ 2), H / block, W / block]`
+and :math:`y` is the output tensor of layout :math:`[N, C * (block\_size ^ 2), H / block\_size, W / block\_size]`
 
 Example::
 
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 30a1704db692..6b0e588889a5 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -6687,15 +6687,20 @@ def f(x, blocksize):
         y = np.reshape(tmp, [b, c // (blocksize**2), h * blocksize, w * blocksize])
         return y
 
-    shape_inp = (1,4,2,3)
-    block = 2
+    block = random.randint(2, 4)
+    rand_mul1 = random.randint(1, 4)
+    n = random.randint(1, 5)
+    c = block * block * rand_mul1
+    h = random.randint(1, 5)
+    w = random.randint(1, 5)
+    shape_inp = (n, c, h, w)
     data = rand_ndarray(shape_inp, 'default')
     data_np = data.asnumpy()
     expected = f(data_np, block)
     output = mx.nd.depth_to_space(data, block)
     assert_almost_equal(output.asnumpy(), expected, atol=1e-3, rtol=1e-3)
 
-    shape_out = (1,1,4,6)
+    shape_out = (n, c // (block ** 2), h * block, w * block)
     data = mx.sym.Variable('data')
     dts_sym = mx.sym.depth_to_space(data, block)
     check_numeric_gradient(dts_sym, [np.ones(shape_inp)])
@@ -6704,22 +6709,24 @@ def f(x, blocksize):
     check_symbolic_backward(dts_sym, [data_np], [np.ones(shape_out)], [np.ones(shape_inp)])
 
     def test_invalid_depth_dim():
-        invalid_shape_inp = (1,3,2,3)
-        block = 2
+        invalid_shape_inp = (n, block - 1, h, w)
         data = rand_ndarray(invalid_shape_inp, 'default')
         assertRaises(MXNetError, mx.nd.depth_to_space, data, block)
 
     def test_invalid_space_dim():
-        invalid_shape_inp = (1,4,2,3)
-        block = 2
+        invalid_shape_inp = (n, block ** 2, 0, block + 1)
         data = rand_ndarray(invalid_shape_inp, 'default')
         assertRaises(MXNetError, mx.nd.depth_to_space, data, block)
 
     def test_invalid_block_size():
-        invalid_shape_inp = (1,0,2,3)
-        block = 2
+        block = 0
+        invalid_shape_inp = (n , c, h, w)
         data = rand_ndarray(invalid_shape_inp, 'default')
         assertRaises(MXNetError, mx.nd.depth_to_space, data, block)
+        
+    test_invalid_depth_dim()
+    test_invalid_space_dim()
+    test_invalid_block_size()
 
 @with_seed()
 def test_spacetodepth():
@@ -6730,15 +6737,21 @@ def f(x, blocksize):
         y = np.reshape(tmp, [b, c * (blocksize**2), h // blocksize, w // blocksize])
         return y
 
-    shape_inp = (1,1,4,6)
-    block = 2
+    block = random.randint(2, 4)
+    rand_mul1 = random.randint(1, 4)
+    rand_mul2 = random.randint(1, 4)
+    n = random.randint(1, 5)
+    c = random.randint(1, 5)
+    h = block * rand_mul1
+    w = block * rand_mul2
+    shape_inp = (n, c, h, w)
     data = rand_ndarray(shape_inp, 'default')
     data_np = data.asnumpy()
     expected = f(data_np, block)
     output = mx.nd.space_to_depth(data, block)
     assert_almost_equal(output.asnumpy(), expected, atol=1e-3, rtol=1e-3)
 
-    shape_out = (1,4,2,3)
+    shape_out = (n, c * (block ** 2), h // block, w // block)
     data = mx.sym.Variable('data')
     dts_sym = mx.sym.space_to_depth(data, block)
     check_numeric_gradient(dts_sym, [np.ones(shape_inp)])
@@ -6747,22 +6760,24 @@ def f(x, blocksize):
     check_symbolic_backward(dts_sym, [data_np], [np.ones(shape_out)], [np.ones(shape_inp)])
 
     def test_invalid_space_dim():
-        invalid_shape_inp = (1,1,2,3)
-        block = 2
+        invalid_shape_inp = (n , c, block - 1, w)
         data = rand_ndarray(invalid_shape_inp, 'default')
         assertRaises(MXNetError, mx.nd.space_to_depth, data, block)
 
     def test_invalid_block_size():
-        invalid_shape_inp = (1,1,4,2)
         block = 0
+        invalid_shape_inp = (n, c, h, w)
         data = rand_ndarray(invalid_shape_inp, 'default')
         assertRaises(MXNetError, mx.nd.space_to_depth, data, block)
     
     def test_invalid_depth_dim():
-        invalid_shape_inp = (1,0,4,2)
-        block = 2
+        invalid_shape_inp = (n, 0, h, w)
         data = rand_ndarray(invalid_shape_inp, 'default')
         assertRaises(MXNetError, mx.nd.space_to_depth, data, block)
+    
+    test_invalid_space_dim()
+    test_invalid_block_size()
+    test_invalid_depth_dim()
 
 if __name__ == '__main__':
     import nose