diff --git a/mindspore/lite/nnacl/lsh_projection_parameter.h b/mindspore/lite/nnacl/lsh_projection_parameter.h index 73b26b1617..ff81ba6dca 100644 --- a/mindspore/lite/nnacl/lsh_projection_parameter.h +++ b/mindspore/lite/nnacl/lsh_projection_parameter.h @@ -23,13 +23,10 @@ typedef struct LshProjectionParameter { OpParameter op_parameter_; int lsh_type_; int hash_shape_[2]; - int in_item_num_; - size_t in_item_size_; - size_t seed_size_; - size_t key_size_; - int64_t real_dst_count; - int task_id_; - int64_t count_unit_; + int feature_num_; + char **hash_buffs_; + size_t hash_buff_size_; + int64_t thread_stride_; } LshProjectionParameter; #endif // MINDSPORE_LITE_NNACL_LSH_PROJECTION_PARAMETER_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/lsh_projection.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/lsh_projection.cc index bde9294ffa..3317ba7007 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/lsh_projection.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/lsh_projection.cc @@ -23,7 +23,6 @@ using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::lite::KernelRegistrar; using mindspore::lite::RET_ERROR; -using mindspore::lite::RET_MEMORY_FAILED; using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_LshProjection; @@ -37,53 +36,77 @@ int LshProjectionCPUKernel::Init() { int LshProjectionCPUKernel::ReSize() { return RET_OK; } +int LshProjectionRun(void *cdata, int task_id) { + auto kernel = reinterpret_cast(cdata); + return kernel->DoExecute(task_id); +} + int LshProjectionCPUKernel::Run() { - auto input_tensor0 = in_tensors_.at(0); - auto input_tensor1 = in_tensors_.at(1); - auto out_tensor0 = out_tensors_.at(0); - - hash = reinterpret_cast(input_tensor0->MutableData()); - in_data = reinterpret_cast(input_tensor1->MutableData()); - weight = in_tensors_.size() == 2 ? nullptr : reinterpret_cast(in_tensors_.at(2)->MutableData()); - output = reinterpret_cast(out_tensor0->MutableData()); - - const size_t seed_size = sizeof(float); - const size_t input_item_size = - input_tensor1->ElementsNum() * sizeof(input_tensor1->data_type()) / input_tensor1->DimensionSize(0); - const size_t key_size = seed_size + input_item_size; - lsh_param_->seed_size_ = seed_size; - lsh_param_->in_item_size_ = input_item_size; - lsh_param_->key_size_ = key_size; - lsh_param_->in_item_num_ = input_tensor1->DimensionSize(0); - memcpy(lsh_param_->hash_shape_, input_tensor0->shape().data(), sizeof(int) * input_tensor0->shape().size()); - - elements_num_ = input_tensor0->DimensionSize(0); - count_unit_ = thread_num_ > 1 ? UP_DIV(elements_num_, thread_num_) : elements_num_; - auto ret = ParallelLaunch(this->context_->thread_pool_, LshProjectionRun, this, thread_num_); + auto input0_tensor = in_tensors_.at(0); + auto input1_tensor = in_tensors_.at(1); + auto out_tensor = out_tensors_.at(0); + + hash_seed_ = reinterpret_cast(input0_tensor->MutableData()); + feature_ = reinterpret_cast(input1_tensor->MutableData()); + weight_ = in_tensors_.size() == 2 ? nullptr : reinterpret_cast(in_tensors_.at(2)->MutableData()); + output_ = reinterpret_cast(out_tensor->MutableData()); + + param_->hash_buff_size_ = sizeof(float) + sizeof(int32_t); + param_->feature_num_ = input1_tensor->ElementsNum(); + param_->hash_shape_[0] = input0_tensor->DimensionSize(0); + param_->hash_shape_[1] = input0_tensor->DimensionSize(1); + param_->thread_stride_ = op_parameter_->thread_num_ > 1 ? UP_DIV(param_->hash_shape_[0], op_parameter_->thread_num_) + : param_->hash_shape_[0]; + auto ret = MallocKeys(); + if (ret != RET_OK) { + return ret; + } + ret = ParallelLaunch(this->context_->thread_pool_, LshProjectionRun, this, op_parameter_->thread_num_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "LshProjection kernel parallel launch failed"; + } + FreeKeys(); return ret; } -int LshProjectionRun(void *cdata, int task_id) { - auto lsh_projection = reinterpret_cast(cdata); - lsh_projection->DoExecute(task_id); +int LshProjectionCPUKernel::MallocKeys() { + param_->hash_buffs_ = static_cast(context_->allocator->Malloc(op_parameter_->thread_num_ * sizeof(char *))); + if (param_->hash_buffs_ == nullptr) { + MS_LOG(ERROR) << "Memory allocation failed"; + return RET_ERROR; + } + for (int i = 0; i < op_parameter_->thread_num_; i++) { + param_->hash_buffs_[i] = static_cast(context_->allocator->Malloc(param_->hash_buff_size_)); + if (param_->hash_buffs_[i] == nullptr) { + FreeKeys(); + MS_LOG(ERROR) << "Memory allocation failed"; + return RET_ERROR; + } + } return RET_OK; } -int LshProjectionCPUKernel::DoExecute(int task_id) { - int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_); - lsh_param_->real_dst_count = real_dst_count; - lsh_param_->task_id_ = task_id; - lsh_param_->count_unit_ = count_unit_; - if (real_dst_count <= 0) { - return lite::RET_OK; +void LshProjectionCPUKernel::FreeKeys() { + if (param_->hash_buffs_ != nullptr) { + for (int i = 0; i < op_parameter_->thread_num_; i++) { + context_->allocator->Free(param_->hash_buffs_[i]); + } + context_->allocator->Free(param_->hash_buffs_); } +} + +int LshProjectionCPUKernel::DoExecute(int task_id) { + int cur_group_num = MSMIN(param_->hash_shape_[0] - task_id * param_->thread_stride_, param_->thread_stride_); + int start = task_id * param_->thread_stride_; + int end = start + cur_group_num; + char *hash_buff = param_->hash_buffs_[task_id]; - switch (lsh_param_->lsh_type_) { + switch (param_->lsh_type_) { case schema::LshProjectionType_SPARSE: - LshProjectionSparse(hash, in_data, weight, output, lsh_param_); + LshProjectionSparse(hash_seed_, feature_, weight_, output_, param_, start, end, hash_buff); break; case schema::LshProjectionType_DENSE: - LshProjectionDense(hash, in_data, weight, output, lsh_param_); + LshProjectionDense(hash_seed_, feature_, weight_, output_, param_, start, end, hash_buff); break; default: return RET_ERROR; @@ -91,50 +114,43 @@ int LshProjectionCPUKernel::DoExecute(int task_id) { return RET_OK; } -int LshProjectionCPUKernel::GetSignBit(char *in_data, float *weight, float seed, LshProjectionParameter *para) { +int LshProjectionCPUKernel::GetSignBit(int32_t *feature_, float *weight_, float seed, LshProjectionParameter *para, + char *hash_buff) { double score = 0.0; - for (int i = 0; i < para->in_item_num_; i++) { - char *key = static_cast(context_->allocator->Malloc(lsh_param_->key_size_)); - if (key == nullptr) { - MS_LOG(ERROR) << "malloc key failed."; - return RET_ERROR; - } - memcpy(key, &seed, para->seed_size_); - memcpy(key + para->seed_size_, in_data, para->in_item_size_); - in_data += para->in_item_size_; - int64_t hash_i = static_cast(mindspore::lite::StringHash64(key, para->key_size_)); + for (int i = 0; i < para->feature_num_; i++) { + memcpy(hash_buff, &seed, sizeof(float)); + memcpy(hash_buff + sizeof(float), &(feature_[i]), sizeof(int32_t)); + int64_t hash_i = static_cast(lite::StringHash64(hash_buff, para->hash_buff_size_)); double hash_d = static_cast(hash_i); - if (weight == nullptr) { + if (weight_ == nullptr) { score += hash_d; } else { - score += weight[i] * hash_d; + score += weight_[i] * hash_d; } - context_->allocator->Free(key); } return (score > 0) ? 1 : 0; } -void LshProjectionCPUKernel::LshProjectionSparse(float *hash, char *in_data, float *weight, int32_t *output, - LshProjectionParameter *para) { - int start = para->task_id_ * para->count_unit_; - int end = start + para->real_dst_count; +void LshProjectionCPUKernel::LshProjectionSparse(float *hash_seed_, int32_t *feature_, float *weight_, int32_t *output_, + LshProjectionParameter *para, int32_t start, int32_t end, + char *hash_buff) { for (int i = start; i < end; i++) { int32_t hash_sign = 0; for (int j = 0; j < para->hash_shape_[1]; j++) { - int bit = GetSignBit(in_data, weight, hash[i * para->hash_shape_[1] + j], para); + int bit = GetSignBit(feature_, weight_, hash_seed_[i * para->hash_shape_[1] + j], para, hash_buff); hash_sign = (hash_sign << 1) | bit; } - output[i] = hash_sign + i * (1 << para->hash_shape_[1]); + output_[i] = hash_sign + i * (1 << para->hash_shape_[1]); } } -void LshProjectionCPUKernel::LshProjectionDense(float *hash, char *in_data, float *weight, int32_t *output, - LshProjectionParameter *para) { - int start = para->task_id_ * para->count_unit_; - int end = start + para->real_dst_count; +void LshProjectionCPUKernel::LshProjectionDense(float *hash_seed_, int32_t *feature_, float *weight_, int32_t *output_, + LshProjectionParameter *para, int32_t start, int32_t end, + char *hash_buff) { for (int i = start; i < end; i++) { for (int j = 0; j < para->hash_shape_[1]; j++) { - output[i * para->hash_shape_[1] + j] = GetSignBit(in_data, weight, hash[i * para->hash_shape_[1] + j], para); + output_[i * para->hash_shape_[1] + j] = + GetSignBit(feature_, weight_, hash_seed_[i * para->hash_shape_[1] + j], para, hash_buff); } } } @@ -144,16 +160,6 @@ kernel::LiteKernel *CpuLshProjectionFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) { - lsh_param_ = reinterpret_cast(op_parameter_); + : LiteKernel(parameter, inputs, outputs, ctx, primitive) { + param_ = reinterpret_cast(op_parameter_); } ~LshProjectionCPUKernel() = default; @@ -37,23 +37,21 @@ class LshProjectionCPUKernel : public LiteKernel { int ReSize() override; int Run() override; int DoExecute(int task_id); - int GetSignBit(char *in_data, float *weight, float seed, LshProjectionParameter *para); - void LshProjectionSparse(float *hash, char *in_data, float *weight, int32_t *output, LshProjectionParameter *param); - void LshProjectionDense(float *hash, char *in_data, float *weight, int32_t *output, LshProjectionParameter *param); private: - LshProjectionParameter *lsh_param_ = nullptr; - int thread_num_; - int64_t elements_num_; - int64_t count_unit_; - float *hash = nullptr; - char *in_data = nullptr; - float *weight = nullptr; - int32_t *output = nullptr; + int MallocKeys(); + void FreeKeys(); + int GetSignBit(int32_t *feature_, float *weight_, float seed, LshProjectionParameter *para, char *hash_buff); + void LshProjectionSparse(float *hash_seed_, int32_t *feature_, float *weight_, int32_t *output_, + LshProjectionParameter *param, int32_t start, int32_t end, char *hash_buff); + void LshProjectionDense(float *hash_seed_, int32_t *feature_, float *weight_, int32_t *output_, + LshProjectionParameter *param, int32_t start, int32_t end, char *hash_buff); + LshProjectionParameter *param_ = nullptr; + float *hash_seed_ = nullptr; + int32_t *feature_ = nullptr; + float *weight_ = nullptr; + int32_t *output_ = nullptr; }; - -int LshProjectionRun(void *cdata, int task_id); - } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_LSH_PROJECTION_H_ diff --git a/mindspore/lite/tools/benchmark/benchmark.cc b/mindspore/lite/tools/benchmark/benchmark.cc index 276559c68b..a370f32f5e 100644 --- a/mindspore/lite/tools/benchmark/benchmark.cc +++ b/mindspore/lite/tools/benchmark/benchmark.cc @@ -98,17 +98,27 @@ int Benchmark::ReadInputFile() { MS_LOG(ERROR) << "ReadFile return nullptr"; return RET_ERROR; } - auto tensor_data_size = cur_tensor->Size(); - if (size != tensor_data_size) { - std::cerr << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size - << std::endl; - MS_LOG(ERROR) << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size; - delete bin_buf; - return RET_ERROR; + if (cur_tensor->data_type() == kObjectTypeString) { + std::string str(bin_buf, size); + auto ret = StringsToMSTensor({str}, cur_tensor); + if (ret != RET_OK) { + MS_LOG(ERROR) << "write strings to tensor failed"; + delete[] bin_buf; + return RET_ERROR; + } + } else { + auto tensor_data_size = cur_tensor->Size(); + if (size != tensor_data_size) { + std::cerr << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size + << std::endl; + MS_LOG(ERROR) << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size; + delete[] bin_buf; + return RET_ERROR; + } + auto input_data = cur_tensor->MutableData(); + memcpy(input_data, bin_buf, tensor_data_size); } - auto input_data = cur_tensor->MutableData(); - memcpy(input_data, bin_buf, tensor_data_size); - delete[](bin_buf); + delete[] bin_buf; } } return RET_OK;