add split

5 years ago · 96bc761754
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.cc
@@ -0,0 +1,143 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "nnacl/fp16/cast_fp16.h"
 #include "nnacl/fp16/split_fp16.h"
 #include "src/runtime/kernel/arm/fp16/split_fp16.h"
 #include "src/runtime/kernel/arm/base/split_base.h"
 #include "src/runtime/kernel/arm/nnacl/split.h"
 #include "src/runtime/kernel/arm/nnacl/split_parameter.h"
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
 #include "src/runtime/runtime_api.h"

 using mindspore::kernel::KERNEL_ARCH::kCPU;
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Split;

 namespace mindspore::kernel {

 int SplitFp16CPUKernel::Init() {
  auto ret = SplitBaseCPUKernel::Init();
  if (ret != RET_OK) {
    return ret;
  }

  output_ptr_.resize(param->num_split_);

  if (!InferShapeDone()) {
    return RET_OK;
  }

  return ReSize();
 }

 int SplitFp16CPUKernel::ReSize() { return SplitBaseCPUKernel::ReSize(); }

 int SplitFp16CPUKernel::Split(int task_id) {
  int num_unit_thread = MSMIN(thread_n_stride_, num_unit_ - task_id * thread_n_stride_);
  if (num_unit_thread <= 0) {
    return RET_OK;
  }
  int thread_offset = task_id * thread_n_stride_;
  auto ret = DoSplitFp16(input_ptr_, output_ptr_.data(), in_tensors_.front()->shape().data(), thread_offset,
                         num_unit_thread, param);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Split error task_id[" << task_id << "] error_code[" << ret << "]";
    return RET_ERROR;
  }
  return RET_OK;
 }

 int SplitRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
  auto g_kernel = reinterpret_cast<SplitFp16CPUKernel *>(cdata);
  auto ret = g_kernel->Split(task_id);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "SplitRun error task_id[" << task_id << "] error_code[" << ret << "]";
    return RET_ERROR;
  }
  return RET_OK;
 }

 int SplitFp16CPUKernel::Run() {
  auto ret = Prepare();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Prepare failed.";
    return RET_ERROR;
  }
  auto in_tensor = in_tensors_.front();
  if (in_tensor->data_type() == kNumberTypeFloat32) {
    input_ptr_ =
      reinterpret_cast<float16_t *>(context_->allocator->Malloc(in_tensor->ElementsNum() * sizeof(float16_t)));
    Float32ToFloat16(reinterpret_cast<float *>(in_tensor->Data()), input_ptr_, in_tensor->ElementsNum());
  } else {
    input_ptr_ = reinterpret_cast<float16_t *>(in_tensor->Data());
  }
  for (int i = 0; i < param->num_split_; i++) {
    if (in_tensor->data_type() == kNumberTypeFloat32) {
      output_ptr_[i] = reinterpret_cast<float16_t *>(
        context_->allocator->Malloc(out_tensors_.at(i)->ElementsNum() * sizeof(float16_t)));
      Float32ToFloat16(reinterpret_cast<float *>(out_tensors_.at(i)->Data()), output_ptr_[i],
                       out_tensors_.at(i)->ElementsNum());
    } else {
      output_ptr_[i] = reinterpret_cast<float16_t *>(out_tensors_.at(i)->Data());
    }
  }
  ret = LiteBackendParallelLaunch(SplitRun, this, thread_n_num_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "split error error_code[" << ret << "]";
    return RET_ERROR;
  }
  if (in_tensor->data_type() == kNumberTypeFloat32) {
    context_->allocator->Free(input_ptr_);
    input_ptr_ = nullptr;
  }
  for (int i = 0; i < param->num_split_; i++) {
    if (in_tensor->data_type() == kNumberTypeFloat32) {
      context_->allocator->Free(output_ptr_[i]);
      output_ptr_[i] = nullptr;
    }
    return RET_OK;
  }
 }

 kernel::LiteKernel *CpuSplitFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
                                              const std::vector<lite::tensor::Tensor *> &outputs,
                                              OpParameter *opParameter, const Context *ctx,
                                              const kernel::KernelKey &desc, const lite::Primitive *primitive) {
  if (opParameter == nullptr) {
    MS_LOG(ERROR) << "Input opParameter is nullptr!";
    return nullptr;
  }
  MS_ASSERT(desc.type == schema::PrimitiveType_Split);
  auto *kernel = new (std::nothrow) SplitFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
  if (kernel == nullptr) {
    MS_LOG(ERROR) << "new SplitFp16CPUKernel fail!";
    return nullptr;
  }
  auto ret = kernel->Init();
  if (ret != RET_OK) {
    delete kernel;
    MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
                  << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
    return nullptr;
  }
  return kernel;
 }
 REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Split, CpuSplitFp16KernelCreator)

 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/split_fp16.h
@@ -0,0 +1,45 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_SPLIT_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_SPLIT_H_
 #include <arm_neon.h>

 #include <vector>
 #include "src/runtime/kernel/arm/base/split_base.h"
 #include "src/lite_kernel.h"

 namespace mindspore::kernel {
 class SplitFp16CPUKernel : public SplitBaseCPUKernel {
 public:
  SplitFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                     const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
                     const lite::Primitive *primitive)
      : SplitBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
  ~SplitFp16CPUKernel() override = default;

  int Init() override;
  int ReSize() override;
  int Run() override;
  int Split(int task_id);

 private:
  float16_t *input_ptr_;
  std::vector<float16_t *> output_ptr_;
 };
 }  // namespace mindspore::kernel

 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_SPLIT_H_
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/split_fp16.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/split_fp16.c
@@ -0,0 +1,59 @@
 /**
 * Copyright 2019 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "nnacl/fp16/split_fp16.h"
 #include "nnacl/split_parameter.h"
 #include <string.h>
 #include "nnacl/errorcode.h"

 int DoSplitFp16(float16_t *in_data, float16_t **out_data, const int *input_shape, int offset, int num_unit,
            SplitParameter *split_param) {
  if (in_data == NULL || out_data == NULL) {
    return NNACL_ERR;
  }
  int num_split = split_param->num_split_;
  int *split_sizes = split_param->split_sizes_;
  int *strides = split_param->strides_;
  int split_dim = split_param->split_dim_;
  int in_stride = strides[split_dim];

  float16_t *src;
  int size_float = (int)(sizeof(float16_t));
  int in_stride_bytes = in_stride * size_float;

  int split_which;
  int split_times;
  int stride_per_split = in_stride * input_shape[split_dim];

  split_which = offset % num_split;
  split_times = offset / num_split;
  src = in_data + split_times * stride_per_split;

  for (int i = 0; i < split_which; i++) {
    src += split_sizes[i] * in_stride;
  }

  for (int i = offset; i < offset + num_unit; i++) {
    split_which = i % num_split;
    split_times = i / num_split;
    int split_size = split_sizes[split_which];
    float16_t *dst = out_data[split_which] + split_times * in_stride * split_size;
    (void)memcpy(dst, src, split_size * in_stride_bytes);
    src += split_size * in_stride;
  }

  return NNACL_OK;
 }
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/split_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp16/split_fp16.h
@@ -0,0 +1,33 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_SPLITFP16_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_SPLITFP16_H_

 #include <arm_neon.h>
 #include "nnacl/op_base.h"
 #include "nnacl/split_parameter.h"

 #ifdef __cplusplus
 extern "C" {
 #endif
 int DoSplitFp16(float16_t *in_data, float16_t **out_data, const int *input_shape, int offset, int num_unit,
            SplitParameter *split_param);
 #ifdef __cplusplus
 }
 #endif

 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_SPLIT_H_