| @@ -1,5 +1,6 @@ | |||
| #pragma OPENCL EXTENSION cl_khr_fp16 : enable | |||
| #define INT4 int4 | |||
| #define C4NUM 4 | |||
| __constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; | |||
| #define CHECK_IDX_FOR_STACK \ | |||
| int X = get_global_id(0); \ | |||
| @@ -10,128 +11,95 @@ __constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | | |||
| } \ | |||
| FLT4 result; | |||
| __kernel void stack8inputaxis1(__read_only image2d_t input0, __read_only image2d_t input1, __read_only image2d_t input2, | |||
| __read_only image2d_t input3, __read_only image2d_t input4, __read_only image2d_t input5, | |||
| __read_only image2d_t input6, __read_only image2d_t input7, | |||
| __write_only image2d_t output, int4 input_shape0, int4 input_shape1, int4 input_shape2, | |||
| int4 input_shape3, int4 input_shape4, int4 input_shape5, int4 input_shape6, | |||
| int4 input_shape7, int4 output_shape) { | |||
| // input -1D | |||
| __kernel void stack_2input_3axis_1inshape(__read_only image2d_t input0, __read_only image2d_t input1, | |||
| __write_only image2d_t output, int4 input_shape, int4 output_shape) { | |||
| int X = get_global_id(0); | |||
| int Y = get_global_id(1); | |||
| if (X >= output_shape.x * output_shape.y || Y >= output_shape.z) { | |||
| return; | |||
| } | |||
| int coordinate_x_out = output_shape.w; | |||
| FLT4 result1 = READ_IMAGE(input0, smp_none, (int2)(0, (X))); | |||
| FLT4 result2 = READ_IMAGE(input1, smp_none, (int2)(0, (X))); | |||
| FLT4 result = {result1.x, result2.x, 0, 0}; | |||
| WRITE_IMAGE(output, (int2)(coordinate_x_out, (X)), result); | |||
| } | |||
| // input -2D -axis = 1 | |||
| __kernel void stack_2input_1axis_2inshape(__read_only image2d_t input0, __read_only image2d_t input1, | |||
| __write_only image2d_t output, int4 input_shape, int4 output_shape) { | |||
| CHECK_IDX_FOR_STACK; | |||
| if (X < input_shape0.y) { | |||
| result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); | |||
| } else if (X < (input_shape0.y + input_shape1.y)) { | |||
| result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z, (X - input_shape0.y))); | |||
| } else if (X < (input_shape0.y + input_shape1.y + input_shape2.y)) { | |||
| result = READ_IMAGE(input2, smp_none, (int2)((Y)*input_shape2.w + Z, (X - input_shape0.y - input_shape1.y))); | |||
| } else if (X < (input_shape0.y + input_shape1.y + input_shape2.y + input_shape3.y)) { | |||
| result = READ_IMAGE(input3, smp_none, | |||
| (int2)((Y)*input_shape3.w + Z, (X - input_shape0.y - input_shape1.y - input_shape2.y))); | |||
| } else if (X < (input_shape0.y + input_shape1.y + input_shape2.y + input_shape3.y + input_shape4.y)) { | |||
| result = READ_IMAGE( | |||
| input4, smp_none, | |||
| (int2)((Y)*input_shape4.w + Z, (X - input_shape0.y - input_shape1.y - input_shape2.y - input_shape3.y))); | |||
| } else if (X < | |||
| (input_shape0.y + input_shape1.y + input_shape2.y + input_shape3.y + input_shape4.y + input_shape5.y)) { | |||
| result = READ_IMAGE(input5, smp_none, | |||
| (int2)((Y)*input_shape5.w + Z, (X - input_shape0.y - input_shape1.y - input_shape2.y - | |||
| input_shape3.y - input_shape4.y))); | |||
| } else if (X < (input_shape0.y + input_shape1.y + input_shape2.y + input_shape3.y + input_shape4.y + input_shape5.y + | |||
| input_shape6.y)) { | |||
| result = READ_IMAGE(input6, smp_none, | |||
| (int2)((Y)*input_shape6.w + Z, (X - input_shape0.y - input_shape1.y - input_shape2.y - | |||
| input_shape3.y - input_shape4.y - input_shape5.y))); | |||
| int IN = X / output_shape.y; | |||
| int IH = X % output_shape.y; | |||
| int boundary0 = input_shape.z; | |||
| if (Y < boundary0) { | |||
| int coordinate_x = Y * input_shape.w + Z; | |||
| int coordinate_y = IN * input_shape.y + IH; | |||
| result = READ_IMAGE(input0, smp_none, (int2)(coordinate_x, coordinate_y)); | |||
| } else { | |||
| result = | |||
| READ_IMAGE(input7, smp_none, | |||
| (int2)((Y)*input_shape7.w + Z, (X - input_shape0.y - input_shape1.y - input_shape2.y - input_shape3.y - | |||
| input_shape4.y - input_shape5.y - input_shape6.y))); | |||
| int coordinate_x = (Y - boundary0) * input_shape.w + Z; | |||
| int coordinate_y = IN * input_shape.y + IH; | |||
| result = READ_IMAGE(input1, smp_none, (int2)(coordinate_x, coordinate_y)); | |||
| } | |||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); | |||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (IN * output_shape.y + IH)), result); | |||
| } | |||
| __kernel void stack8inputaxis2(__read_only image2d_t input0, __read_only image2d_t input1, __read_only image2d_t input2, | |||
| __read_only image2d_t input3, __read_only image2d_t input4, __read_only image2d_t input5, | |||
| __read_only image2d_t input6, __read_only image2d_t input7, | |||
| __write_only image2d_t output, int4 input_shape0, int4 input_shape1, int4 input_shape2, | |||
| int4 input_shape3, int4 input_shape4, int4 input_shape5, int4 input_shape6, | |||
| int4 input_shape7, int4 output_shape) { | |||
| // input -3D -axis = 1 | |||
| __kernel void stack_2input_1axis_3inshape(__read_only image2d_t input0, __read_only image2d_t input1, | |||
| __write_only image2d_t output, int4 input_shape, int4 output_shape) { | |||
| CHECK_IDX_FOR_STACK; | |||
| if (Y < input_shape0.z) { | |||
| result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); | |||
| } else if (Y < (input_shape0.z + input_shape1.z)) { | |||
| result = READ_IMAGE(input1, smp_none, (int2)((Y - input_shape0.z) * input_shape1.w + Z, (X))); | |||
| } else if (Y < (input_shape0.z + input_shape1.z + input_shape2.z)) { | |||
| result = READ_IMAGE(input2, smp_none, (int2)((Y - input_shape0.z - input_shape1.z) * input_shape2.w + Z, (X))); | |||
| } else if (Y < (input_shape0.z + input_shape1.z + input_shape2.z + input_shape3.z)) { | |||
| result = READ_IMAGE(input3, smp_none, | |||
| (int2)((Y - input_shape0.z - input_shape1.z - input_shape2.z) * input_shape3.w + Z, (X))); | |||
| } else if (Y < (input_shape0.z + input_shape1.z + input_shape2.z + input_shape3.z + input_shape4.z)) { | |||
| result = READ_IMAGE( | |||
| input4, smp_none, | |||
| (int2)((Y - input_shape0.z - input_shape1.z - input_shape2.z - input_shape3.z) * input_shape4.w + Z, (X))); | |||
| } else if (Y < | |||
| (input_shape0.z + input_shape1.z + input_shape2.z + input_shape3.z + input_shape4.z + input_shape5.z)) { | |||
| result = READ_IMAGE( | |||
| input5, smp_none, | |||
| (int2)( | |||
| (Y - input_shape0.z - input_shape1.z - input_shape2.z - input_shape3.z - input_shape4.z) * input_shape5.w + Z, | |||
| (X))); | |||
| } else if (Y < (input_shape0.z + input_shape1.z + input_shape2.z + input_shape3.z + input_shape4.z + input_shape5.z + | |||
| input_shape6.z)) { | |||
| result = READ_IMAGE( | |||
| input6, smp_none, | |||
| (int2)((Y - input_shape0.z - input_shape1.z - input_shape2.z - input_shape3.z - input_shape4.z - input_shape5.z) * | |||
| input_shape6.w + | |||
| Z, | |||
| (X))); | |||
| int IN = X / output_shape.y; | |||
| int IH = X % output_shape.y; | |||
| int boundary0 = input_shape.y; | |||
| if (IH < boundary0) { | |||
| int coordinate_x = Y * input_shape.w + Z; | |||
| int coordinate_y = IN * input_shape.y + IH; | |||
| result = READ_IMAGE(input0, smp_none, (int2)(coordinate_x, coordinate_y)); | |||
| } else { | |||
| result = READ_IMAGE(input7, smp_none, | |||
| (int2)((Y - input_shape0.z - input_shape1.z - input_shape2.z - input_shape3.z - input_shape4.z - | |||
| input_shape5.z - input_shape6.z) * | |||
| input_shape7.w + | |||
| Z, | |||
| (X))); | |||
| int coordinate_x = Y * input_shape.w + Z; | |||
| int coordinate_y = IN * input_shape.y + IH - boundary0; | |||
| result = READ_IMAGE(input1, smp_none, (int2)(coordinate_x, coordinate_y)); | |||
| } | |||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); | |||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (IN * output_shape.y + IH)), result); | |||
| } | |||
| __kernel void stack8inputaxis3(__read_only image2d_t input0, __read_only image2d_t input1, __read_only image2d_t input2, | |||
| __read_only image2d_t input3, __read_only image2d_t input4, __read_only image2d_t input5, | |||
| __read_only image2d_t input6, __read_only image2d_t input7, | |||
| __write_only image2d_t output, int4 input_shape0, int4 input_shape1, int4 input_shape2, | |||
| int4 input_shape3, int4 input_shape4, int4 input_shape5, int4 input_shape6, | |||
| int4 input_shape7, int4 output_shape) { | |||
| // input -3D -axis = 2 | |||
| __kernel void stack_2input_2axis_3inshape(__read_only image2d_t input0, __read_only image2d_t input1, | |||
| __write_only image2d_t output, int4 input_shape, int4 output_shape) { | |||
| CHECK_IDX_FOR_STACK; | |||
| if (Z < input_shape0.w) { | |||
| result = READ_IMAGE(input0, smp_none, (int2)((Y)*input_shape0.w + Z, (X))); | |||
| } else if (Z < (input_shape0.w + input_shape1.w)) { | |||
| result = READ_IMAGE(input1, smp_none, (int2)((Y)*input_shape1.w + Z - input_shape0.w, (X))); | |||
| } else if (Z < (input_shape0.w + input_shape1.w + input_shape2.w)) { | |||
| result = READ_IMAGE(input2, smp_none, (int2)((Y)*input_shape2.w + Z - input_shape0.w - input_shape1.w, (X))); | |||
| } else if (Z < (input_shape0.w + input_shape1.w + input_shape2.w + input_shape3.w)) { | |||
| result = READ_IMAGE(input3, smp_none, | |||
| (int2)((Y)*input_shape3.w + Z - input_shape0.w - input_shape1.w - input_shape2.w, (X))); | |||
| } else if (Z < (input_shape0.w + input_shape1.w + input_shape2.w + input_shape3.w + input_shape4.w)) { | |||
| result = READ_IMAGE( | |||
| input4, smp_none, | |||
| (int2)((Y)*input_shape4.w + Z - input_shape0.w - input_shape1.w - input_shape2.w - input_shape3.w, (X))); | |||
| } else if (Z < | |||
| (input_shape0.w + input_shape1.w + input_shape2.w + input_shape3.w + input_shape4.w + input_shape5.w)) { | |||
| result = READ_IMAGE(input5, smp_none, | |||
| (int2)((Y)*input_shape5.w + Z - input_shape0.w - input_shape1.w - input_shape2.w - | |||
| input_shape3.w - input_shape4.w, | |||
| (X))); | |||
| } else if (Z < (input_shape0.w + input_shape1.w + input_shape2.w + input_shape3.w + input_shape4.w + input_shape5.w + | |||
| input_shape6.w)) { | |||
| result = READ_IMAGE(input6, smp_none, | |||
| (int2)((Y)*input_shape6.w + Z - input_shape0.w - input_shape1.w - input_shape2.w - | |||
| input_shape3.w - input_shape4.w - input_shape5.w, | |||
| (X))); | |||
| int boundary0 = input_shape.y; | |||
| int IN = X / output_shape.y; | |||
| int IW = X % output_shape.y; | |||
| int IC = Z; | |||
| int coordinate_x = IW * input_shape.w + IC; | |||
| int coordinate_y = IN * input_shape.y; | |||
| if (Y < boundary0) { | |||
| result = READ_IMAGE(input0, smp_none, (int2)(coordinate_x, coordinate_y)); | |||
| } else { | |||
| result = READ_IMAGE(input7, smp_none, | |||
| (int2)((Y)*input_shape7.w + Z - input_shape0.w - input_shape1.w - input_shape2.w - | |||
| input_shape3.w - input_shape4.w - input_shape5.w - input_shape6.w, | |||
| (X))); | |||
| result = READ_IMAGE(input1, smp_none, (int2)(coordinate_x, coordinate_y)); | |||
| } | |||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, IN * output_shape.y + IW), result); | |||
| } | |||
| // input -3D -axis = 3 and input -2D -axis = 2 boundary stack | |||
| __kernel void stack_2input_boundary(__global float *input0, __global float *input1, __global float *output, | |||
| int4 input_shape, int4 output_shape, int2 stride_w) { | |||
| int X = get_global_id(0); // N | |||
| int Y = get_global_id(1); // H | |||
| if (X >= output_shape.x || Y >= output_shape.y) { | |||
| return; | |||
| } | |||
| int IW = output_shape.z; | |||
| int Align_out = output_shape.w * C4NUM; | |||
| int Align_in = input_shape.w * C4NUM; | |||
| int index_out = X * output_shape.y * stride_w.x + Y * stride_w.x; | |||
| int index_in = X * input_shape.y * stride_w.y + Y * Align_in; | |||
| for (int iw = 0; iw < IW; iw++) { | |||
| int index_out_tmp = index_out + iw * Align_out; | |||
| int index_in_tmp = index_in + iw; | |||
| output[index_out_tmp] = input0[index_in_tmp]; | |||
| index_out_tmp++; | |||
| output[index_out_tmp] = input1[index_in_tmp]; | |||
| } | |||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); | |||
| } | |||
| @@ -47,40 +47,6 @@ int StackOpenCLKernel::RunAxis0() { | |||
| return RET_OK; | |||
| } | |||
| int StackOpenCLKernel::Init() { | |||
| if (in_tensors_[0]->shape().size() > 4 || in_tensors_[0]->shape().size() <= 0) { | |||
| MS_LOG(ERROR) << " only support dim <= 4 "; | |||
| return RET_ERROR; | |||
| } | |||
| auto param = reinterpret_cast<StackParameter *>(this->op_parameter_); | |||
| axis_ = param->axis_; | |||
| axis_ = axis_ < 0 ? axis_ + in_tensors_[0]->shape().size() + 1 : axis_; | |||
| if (in_tensors_[0]->shape().size() != 4) { | |||
| if (in_tensors_[0]->shape().size() == 2) { | |||
| axis_ = axis_ + 2; | |||
| } | |||
| } | |||
| if (param->axis_ < -3 || param->axis_ > 3) { | |||
| MS_LOG(ERROR) << " only support axis >= -3 and axis <= 3 "; | |||
| return RET_ERROR; | |||
| } | |||
| std::string kernel_name = "stack"; | |||
| if (in_tensors_.size() == 8) { | |||
| kernel_name += "8inputaxis" + std::to_string(axis_); | |||
| } else { | |||
| MS_LOG(ERROR) << " input must be 8"; | |||
| return RET_ERROR; | |||
| } | |||
| MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; | |||
| std::string source = stack_source; | |||
| std::string program_name = "stack"; | |||
| ocl_runtime_->LoadSource(program_name, source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); | |||
| return RET_OK; | |||
| } | |||
| int StackOpenCLKernel::ReSize() { return RET_OK; } | |||
| void StackGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *local, int max_size) { | |||
| @@ -97,114 +63,123 @@ void StackGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *l | |||
| local->push_back(z); | |||
| } | |||
| int StackOpenCLKernel::InferInTensorShapeTo4D(int *arg_cn) { | |||
| if (in_tensors_.size() == 8) { | |||
| int size = in_tensors_[0]->shape().size(); | |||
| switch (size) { | |||
| case 1: | |||
| for (int i = 0; i < in_tensors_.size(); ++i) { | |||
| ocl_runtime_->SetKernelArg(kernel_, (*arg_cn)++, in_tensors_[i]->data_c()); | |||
| } | |||
| ocl_runtime_->SetKernelArg(kernel_, (*arg_cn)++, out_tensors_[0]->data_c()); | |||
| for (int i = 0; i < in_tensors_.size(); ++i) { | |||
| cl_int4 temp = {in_tensors_[i]->shape()[0], 1, 1, 1}; | |||
| ocl_runtime_->SetKernelArg(kernel_, (*arg_cn)++, temp); | |||
| } | |||
| break; | |||
| case 2: | |||
| for (int i = 0; i < in_tensors_.size(); ++i) { | |||
| ocl_runtime_->SetKernelArg(kernel_, (*arg_cn)++, in_tensors_[i]->data_c()); | |||
| } | |||
| ocl_runtime_->SetKernelArg(kernel_, (*arg_cn)++, out_tensors_[0]->data_c()); | |||
| for (int i = 0; i < in_tensors_.size(); ++i) { | |||
| cl_int4 temp = {in_tensors_[i]->shape()[0], 1, 1, UP_DIV(in_tensors_[i]->shape()[1], C4NUM)}; | |||
| ocl_runtime_->SetKernelArg(kernel_, (*arg_cn)++, temp); | |||
| } | |||
| break; | |||
| case 3: | |||
| for (int i = 0; i < in_tensors_.size(); ++i) { | |||
| ocl_runtime_->SetKernelArg(kernel_, (*arg_cn)++, in_tensors_[i]->data_c()); | |||
| } | |||
| ocl_runtime_->SetKernelArg(kernel_, (*arg_cn)++, out_tensors_[0]->data_c()); | |||
| for (int i = 0; i < in_tensors_.size(); ++i) { | |||
| cl_int4 temp = {in_tensors_[i]->shape()[0], 1, in_tensors_[i]->shape()[1], | |||
| UP_DIV(in_tensors_[i]->shape()[2], C4NUM)}; | |||
| ocl_runtime_->SetKernelArg(kernel_, (*arg_cn)++, temp); | |||
| } | |||
| break; | |||
| default: | |||
| MS_LOG(ERROR) << "unsupported input size > 3 or size <= 0 :" << in_tensors_.size(); | |||
| return RET_ERROR; | |||
| } | |||
| } else { | |||
| MS_LOG(ERROR) << "unsupported input size :" << in_tensors_.size(); | |||
| int StackOpenCLKernel::CheckSpecs() { | |||
| if (in_tensors_[0]->shape().size() > 2 && (axis_ != 0)) { | |||
| MS_LOG(ERROR) << " only support input size = 2 "; | |||
| return RET_ERROR; | |||
| } | |||
| if (in_tensors_[0]->shape().size() > 4 || in_tensors_[0]->shape().size() <= 0) { | |||
| MS_LOG(ERROR) << " only support dim <= 4 "; | |||
| return RET_ERROR; | |||
| } | |||
| auto param = reinterpret_cast<StackParameter *>(this->op_parameter_); | |||
| axis_ = param->axis_; | |||
| axis_ = axis_ < 0 ? axis_ + in_tensors_[0]->shape().size() : axis_; | |||
| if (axis_ > 3) { | |||
| MS_LOG(ERROR) << " only support axis <= 3 "; | |||
| return RET_ERROR; | |||
| } | |||
| if (axis_ > in_tensors_[0]->shape().size()) { | |||
| MS_LOG(ERROR) << " stack axis must been <= in_tensors_[0]->shape().size() "; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int StackOpenCLKernel::InferOutTensorShapeTo4D(cl_int4 *output_shape) { | |||
| std::vector<int> out_shape = out_tensors_[0]->shape(); | |||
| if (out_shape.size() == 3) { | |||
| N_ = out_shape[0]; | |||
| C_ = out_shape[1] * UP_DIV(out_shape[2], C4NUM); | |||
| } else if (out_shape.size() == 4) { | |||
| if (axis_ == 1) { | |||
| N_ = out_shape[0]; | |||
| H_ = out_shape[1]; | |||
| W_ = out_shape[2]; | |||
| C_ = UP_DIV(out_shape[3], C4NUM); | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupported out_shape.size=: " << out_shape.size() << " axis=: " << axis_; | |||
| return RET_ERROR; | |||
| } | |||
| void StackOpenCLKernel::SetConstArgs() { | |||
| int arg_cn = in_tensors_.size() + 1; | |||
| cl_int4 inshape_tmp = {}, outshape_tmp = {}; | |||
| for (int i = 0; i < in_tensors_[0]->shape().size(); ++i) { | |||
| inshape_tmp.s[i] = in_tensors_[0]->shape()[i]; | |||
| } | |||
| Broadcast2GpuShape(in_shape_.s, inshape_tmp.s, in_tensors_[0]->shape().size(), 1); | |||
| for (int i = 0; i < out_tensors_[0]->shape().size(); ++i) { | |||
| outshape_tmp.s[i] = out_tensors_[0]->shape()[i]; | |||
| } | |||
| Broadcast2GpuShape(out_shape_.s, outshape_tmp.s, out_tensors_[0]->shape().size(), 1); | |||
| in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM); | |||
| out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); | |||
| if (buffer_button_) { | |||
| GpuTensorInfo img_info_out(out_tensors_[0]); | |||
| GpuTensorInfo img_info_in(in_tensors_[0]); | |||
| size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float); | |||
| stride_w_out = img_info_out.RowPitch() / dtype; | |||
| stride_w_in = img_info_in.RowPitch() / dtype; | |||
| cl_int2 stride_w = {stride_w_out, stride_w_in}; | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w); | |||
| } | |||
| OH_ = N_ * H_; | |||
| OW_ = W_; | |||
| OC_ = C_; | |||
| output_shape->s[0] = N_; | |||
| output_shape->s[1] = H_; | |||
| output_shape->s[2] = W_; | |||
| output_shape->s[3] = C_; | |||
| return RET_OK; | |||
| } | |||
| int StackOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running! "; | |||
| if (axis_ == 0) { | |||
| return RunAxis0(); | |||
| void StackOpenCLKernel::SetGlobalLocal() { | |||
| if (((in_tensors_[0]->shape().size() == 2 || in_tensors_[0]->shape().size() == 3) && axis_ == 1) || | |||
| (in_tensors_[0]->shape().size() == 3 && axis_ == 2)) { | |||
| OH_ = out_shape_.s[0] * out_shape_.s[1]; | |||
| OW_ = out_shape_.s[2]; | |||
| OC_ = out_shape_.s[3]; | |||
| } else if (in_tensors_[0]->shape().size() == 1) { | |||
| OH_ = out_shape_.s[0] * out_shape_.s[1]; | |||
| OW_ = out_shape_.s[2]; | |||
| } else { | |||
| OH_ = out_shape_.s[0]; | |||
| OW_ = out_shape_.s[1]; | |||
| } | |||
| cl_int4 output_shape = {1, 1, 1, 1}; | |||
| const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize(); | |||
| std::vector<size_t> local = {1, 1, 1}; | |||
| int arg_cn = 0; | |||
| InferInTensorShapeTo4D(&arg_cn); | |||
| InferOutTensorShapeTo4D(&output_shape); | |||
| std::vector<size_t> global = {OH_, OW_, OC_}; | |||
| StackGetWorkGroup(global, &local, max_global[0]); | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape); | |||
| ocl_runtime_->RunKernel(kernel_, global, local); | |||
| OpenCLKernel::AlignGlobalLocal(global, local); | |||
| } | |||
| int StackOpenCLKernel::Prepare() { | |||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||
| if (in_tensors_[0]->shape().size() == 1 && axis_ == 1) { | |||
| axis_ += 2; | |||
| } else if (in_tensors_[0]->shape().size() == axis_) { | |||
| buffer_button_ = true; // boundary stack judge | |||
| } | |||
| std::string kernel_name = "stack_"; | |||
| if (!buffer_button_) { | |||
| kernel_name += std::to_string(in_tensors_.size()) + "input_" + std::to_string(axis_) + "axis_" + | |||
| std::to_string(in_tensors_[0]->shape().size()) + "inshape"; | |||
| } else { | |||
| kernel_name += std::to_string(in_tensors_.size()) + "input_" + "boundary"; | |||
| } | |||
| MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; | |||
| std::string source = stack_source; | |||
| std::string program_name = "stack"; | |||
| ocl_runtime_->LoadSource(program_name, source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); | |||
| SetConstArgs(); | |||
| SetGlobalLocal(); | |||
| return RET_OK; | |||
| } | |||
| kernel::LiteKernel *OpenCLStackKernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | |||
| const lite::InnerContext *ctx, const kernel::KernelKey &desc, | |||
| const mindspore::lite::PrimitiveC *primitive) { | |||
| auto *kernel = new (std::nothrow) StackOpenCLKernel(opParameter, inputs, outputs); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << " new StackOpenCLKernel failed "; | |||
| return nullptr; | |||
| int StackOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running! "; | |||
| if (axis_ == 0) { | |||
| return RunAxis0(); | |||
| } | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << " Init kernel failed, name: Stack "; | |||
| delete kernel; | |||
| return nullptr; | |||
| int arg_cn = 0; | |||
| if (buffer_button_) { | |||
| for (int i = 0; i < in_tensors_.size(); ++i) { | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c(), lite::opencl::MemType::BUF); | |||
| } | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); | |||
| } else { | |||
| for (int i = 0; i < in_tensors_.size(); ++i) { | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c()); | |||
| } | |||
| ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); | |||
| } | |||
| return kernel; | |||
| ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); | |||
| return RET_OK; | |||
| } | |||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Stack, OpenCLKernelCreator<StackOpenCLKernel>); | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Stack, OpenCLKernelCreator<StackOpenCLKernel>); | |||
| REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Stack, OpenCLStackKernelCreator); | |||
| REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Stack, OpenCLStackKernelCreator); | |||
| } // namespace mindspore::kernel | |||
| @@ -30,8 +30,10 @@ class StackOpenCLKernel : public OpenCLKernel { | |||
| : OpenCLKernel(parameter, inputs, outputs) {} | |||
| ~StackOpenCLKernel() override{}; | |||
| int Init() override; | |||
| int Prepare() override; | |||
| int CheckSpecs() override; | |||
| void SetConstArgs() override; | |||
| void SetGlobalLocal() override; | |||
| int ReSize() override; | |||
| @@ -40,18 +42,17 @@ class StackOpenCLKernel : public OpenCLKernel { | |||
| private: | |||
| int RunAxis0(); | |||
| int InferInTensorShapeTo4D(int *arg_cn); | |||
| int InferOutTensorShapeTo4D(cl_int4 *output_shape); | |||
| cl::Kernel kernel_; | |||
| int axis_{0}; | |||
| size_t N_{1}; | |||
| size_t H_{1}; | |||
| size_t W_{1}; | |||
| size_t C_{1}; | |||
| size_t OH_{1}; | |||
| size_t OW_{1}; | |||
| size_t OC_{1}; | |||
| bool buffer_button_{false}; | |||
| bool enable_fp16_{false}; | |||
| cl_int stride_w_in{1}; | |||
| cl_int stride_w_out{1}; | |||
| cl_int4 in_shape_ = {}; | |||
| cl_int4 out_shape_ = {}; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -29,34 +29,155 @@ OpParameter *CreateParameter(int axis) { | |||
| } | |||
| } // namespace | |||
| TEST_F(TestOpenCL_Stack, input8_ndim3_axis0) { | |||
| constexpr int INPUT_NUM = 8; | |||
| int axis = 0; | |||
| std::vector<int> input_shapes[INPUT_NUM] = {{1, 1, 8}, {1, 1, 8}, {1, 1, 8}, {1, 1, 8}, | |||
| {1, 1, 8}, {1, 1, 8}, {1, 1, 8}, {1, 1, 8}}; | |||
| std::vector<int> output_shape = {8, 1, 1, 8}; | |||
| float input_datas[INPUT_NUM][8] = { | |||
| {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.03, 0.37}, {0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.47}, | |||
| {0.31, 0.63, 0.84, 0.43, 0.56, 0.79, 0.12, 0.57}, {0.35, 0.26, 0.17, 0.33, 0.66, 0.89, 0.93, 0.77}, | |||
| {0.57, 0.6, 0.84, 0.83, 0.48, 0.78, 0.63, 0.87}, {0.66, 0.56, 0.64, 0.63, 0.56, 0.59, 0.73, 0.37}, | |||
| {0.35, 0.26, 0.54, 0.33, 0.76, 0.59, 0.73, 0.34}, {0.15, 0.36, 0.44, 0.73, 0.56, 0.49, 0.93, 0.37}}; | |||
| float output_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.03, 0.37, 0.5, 0.6, 0.74, 0.23, 0.46, | |||
| 0.69, 0.13, 0.47, 0.31, 0.63, 0.84, 0.43, 0.56, 0.79, 0.12, 0.57, 0.35, 0.26, | |||
| 0.17, 0.33, 0.66, 0.89, 0.93, 0.77, 0.57, 0.6, 0.84, 0.83, 0.48, 0.78, 0.63, | |||
| 0.87, 0.66, 0.56, 0.64, 0.63, 0.56, 0.59, 0.73, 0.37, 0.35, 0.26, 0.54, 0.33, | |||
| 0.76, 0.59, 0.73, 0.34, 0.15, 0.36, 0.44, 0.73, 0.56, 0.49, 0.93, 0.37}; | |||
| // stack test cases | |||
| TEST_F(TestOpenCL_Stack, input2_ndim1_axis1) { | |||
| constexpr int INPUT_NUM = 2; | |||
| int axis = 1; | |||
| std::vector<int> input_shapes[INPUT_NUM] = {{8}, {8}}; | |||
| std::vector<int> output_shape = {8, 2}; | |||
| float input_datas[INPUT_NUM][8] = {{0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.03, 0.37}, | |||
| {0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.47}}; | |||
| float output_data[] = {0.75, 0.5, 0.06, 0.6, 0.74, 0.74, 0.30, 0.23, 0.9, 0.46, 0.59, 0.69, 0.03, 0.13, 0.37, 0.47}; | |||
| for (auto fp16_enable : {true}) { | |||
| auto *param = CreateParameter(axis); | |||
| TestMain({{input_shapes[0], input_datas[0], VAR}, {input_shapes[1], input_datas[1], VAR}}, | |||
| {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||
| } | |||
| } | |||
| TEST_F(TestOpenCL_Stack, input2_ndim2_axis1) { | |||
| constexpr int INPUT_NUM = 2; | |||
| int axis = 1; | |||
| std::vector<int> input_shapes[INPUT_NUM] = {{3, 4}, {3, 4}}; | |||
| std::vector<int> output_shape = {3, 2, 4}; | |||
| float input_datas[INPUT_NUM][12] = { | |||
| {1.317, -2.094, -1.892, -0.4612, -0.884, -0.524, 0.4504, 0.0284, 3.227, -0.4673, -1.115, -0.1572}, | |||
| {-0.0677, -1.289, 0.0685, 0.889, 0.8145, 1.6455, 0.6587, -0.236, 0.3625, 0.7393, -1.393, 0.2534}}; | |||
| float output_data[] = {1.317, -2.094, -1.892, -0.4612, -0.0677, -1.289, 0.0685, 0.889, | |||
| -0.884, -0.524, 0.4504, 0.0284, 0.8145, 1.6455, 0.6587, -0.236, | |||
| 3.227, -0.4673, -1.115, -0.1572, 0.3625, 0.7393, -1.393, 0.2534}; | |||
| for (auto fp16_enable : {true}) { | |||
| auto *param = CreateParameter(axis); | |||
| TestMain({{input_shapes[0], input_datas[0], VAR}, {input_shapes[1], input_datas[1], VAR}}, | |||
| {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||
| } | |||
| } | |||
| TEST_F(TestOpenCL_Stack, input2_ndim3_axis1) { | |||
| constexpr int INPUT_NUM = 2; | |||
| int axis = 1; | |||
| std::vector<int> input_shapes[INPUT_NUM] = {{3, 4, 5}, {3, 4, 5}}; | |||
| std::vector<int> output_shape = {3, 2, 4, 5}; | |||
| size_t input1_size, input2_size, output_size; | |||
| std::string input1Ppath = "./test_data/stackfp32_input1.bin"; | |||
| std::string input2Ppath = "./test_data/stackfp32_input2.bin"; | |||
| std::string correctOutputPath = "./test_data/stackfp32_output.bin"; | |||
| auto input_data1 = reinterpret_cast<float *>(mindspore::lite::ReadFile(input1Ppath.c_str(), &input1_size)); | |||
| auto input_data2 = reinterpret_cast<float *>(mindspore::lite::ReadFile(input2Ppath.c_str(), &input2_size)); | |||
| auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(correctOutputPath.c_str(), &output_size)); | |||
| for (auto fp16_enable : {false}) { | |||
| auto *param = CreateParameter(axis); | |||
| TestMain({{input_shapes[0], input_data1, VAR}, {input_shapes[1], input_data2, VAR}}, {output_shape, output_data}, | |||
| param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||
| } | |||
| } | |||
| TEST_F(TestOpenCL_Stack, input2_ndim3_axis2) { | |||
| constexpr int INPUT_NUM = 2; | |||
| int axis = 2; | |||
| std::vector<int> input_shapes[INPUT_NUM] = {{3, 4, 5}, {3, 4, 5}}; | |||
| std::vector<int> output_shape = {3, 4, 2, 5}; | |||
| size_t input1_size, input2_size, output_size; | |||
| std::string input1Ppath = "./test_data/stackfp32_input1.bin"; | |||
| std::string input2Ppath = "./test_data/stackfp32_input2.bin"; | |||
| std::string correctOutputPath = "./test_data/stackfp32_output.bin"; | |||
| auto input_data1 = reinterpret_cast<float *>(mindspore::lite::ReadFile(input1Ppath.c_str(), &input1_size)); | |||
| auto input_data2 = reinterpret_cast<float *>(mindspore::lite::ReadFile(input2Ppath.c_str(), &input2_size)); | |||
| auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(correctOutputPath.c_str(), &output_size)); | |||
| for (auto fp16_enable : {false}) { | |||
| auto *param = CreateParameter(axis); | |||
| TestMain({{input_shapes[0], input_datas[0], VAR}, | |||
| {input_shapes[1], input_datas[1], VAR}, | |||
| {input_shapes[2], input_datas[2], VAR}, | |||
| {input_shapes[3], input_datas[3], VAR}, | |||
| {input_shapes[4], input_datas[4], VAR}, | |||
| {input_shapes[5], input_datas[5], VAR}, | |||
| {input_shapes[6], input_datas[6], VAR}, | |||
| {input_shapes[7], input_datas[7], VAR}}, | |||
| {output_shape, output_data}, param, fp16_enable); | |||
| TestMain({{input_shapes[0], input_data1, VAR}, {input_shapes[1], input_data2, VAR}}, {output_shape, output_data}, | |||
| param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||
| } | |||
| } | |||
| TEST_F(TestOpenCL_Stack, input2_ndim2_axis2) { | |||
| constexpr int INPUT_NUM = 2; | |||
| int axis = 2; | |||
| std::vector<int> input_shapes[INPUT_NUM] = {{1, 96}, {1, 96}}; | |||
| std::vector<int> output_shape = {1, 96, 2}; | |||
| size_t input1_size, input2_size, output_size; | |||
| std::string input1Ppath = "./test_data/stackfp32_input1.bin"; | |||
| std::string input2Ppath = "./test_data/stackfp32_input2.bin"; | |||
| std::string correctOutputPath = "./test_data/stackfp32_output.bin"; | |||
| auto input_data1 = reinterpret_cast<float *>(mindspore::lite::ReadFile(input1Ppath.c_str(), &input1_size)); | |||
| auto input_data2 = reinterpret_cast<float *>(mindspore::lite::ReadFile(input2Ppath.c_str(), &input2_size)); | |||
| auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(correctOutputPath.c_str(), &output_size)); | |||
| for (auto fp16_enable : {false}) { | |||
| auto *param = CreateParameter(axis); | |||
| TestMain({{input_shapes[0], input_data1, VAR}, {input_shapes[1], input_data2, VAR}}, {output_shape, output_data}, | |||
| param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||
| } | |||
| } | |||
| TEST_F(TestOpenCL_Stack, input2_ndim3_axis3) { | |||
| constexpr int INPUT_NUM = 2; | |||
| int axis = 3; | |||
| std::vector<int> input_shapes[INPUT_NUM] = {{3, 4, 6}, {3, 4, 6}}; | |||
| std::vector<int> output_shape = {3, 4, 6, 2}; | |||
| size_t input1_size, input2_size, output_size; | |||
| std::string input1Ppath = "./test_data/stackfp32_input1.bin"; | |||
| std::string input2Ppath = "./test_data/stackfp32_input2.bin"; | |||
| std::string correctOutputPath = "./test_data/stackfp32_output.bin"; | |||
| auto input_data1 = reinterpret_cast<float *>(mindspore::lite::ReadFile(input1Ppath.c_str(), &input1_size)); | |||
| auto input_data2 = reinterpret_cast<float *>(mindspore::lite::ReadFile(input2Ppath.c_str(), &input2_size)); | |||
| auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(correctOutputPath.c_str(), &output_size)); | |||
| for (auto fp16_enable : {false}) { | |||
| auto *param = CreateParameter(axis); | |||
| TestMain({{input_shapes[0], input_data1, VAR}, {input_shapes[1], input_data2, VAR}}, {output_shape, output_data}, | |||
| param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||
| } | |||
| } | |||
| TEST_F(TestOpenCL_Stack, input6_ndim3_axis0) { | |||
| constexpr int INPUT_NUM = 8; | |||
| int axis = 0; | |||
| std::vector<int> input_shapes[INPUT_NUM] = {{1, 17, 18}, {1, 17, 18}, {1, 17, 18}, {1, 17, 18}, | |||
| {1, 17, 18}, {1, 17, 18}, {1, 17, 18}, {1, 17, 18}}; | |||
| std::vector<int> output_shape = {8, 1, 17, 18}; | |||
| size_t input1_size, input2_size, input3_size, input4_size, input5_size, input6_size, input7_size, input8_size, | |||
| output_size; | |||
| std::string input1Ppath = "./test_data/stackfp32_input1.bin"; | |||
| std::string input2Ppath = "./test_data/stackfp32_input2.bin"; | |||
| std::string input3Ppath = "./test_data/stackfp32_input3.bin"; | |||
| std::string input4Ppath = "./test_data/stackfp32_input4.bin"; | |||
| std::string input5Ppath = "./test_data/stackfp32_input5.bin"; | |||
| std::string input6Ppath = "./test_data/stackfp32_input6.bin"; | |||
| std::string input7Ppath = "./test_data/stackfp32_input7.bin"; | |||
| std::string input8Ppath = "./test_data/stackfp32_input8.bin"; | |||
| std::string correctOutputPath = "./test_data/stackfp32_output.bin"; | |||
| auto input_data1 = reinterpret_cast<float *>(mindspore::lite::ReadFile(input1Ppath.c_str(), &input1_size)); | |||
| auto input_data2 = reinterpret_cast<float *>(mindspore::lite::ReadFile(input2Ppath.c_str(), &input2_size)); | |||
| auto input_data3 = reinterpret_cast<float *>(mindspore::lite::ReadFile(input3Ppath.c_str(), &input3_size)); | |||
| auto input_data4 = reinterpret_cast<float *>(mindspore::lite::ReadFile(input4Ppath.c_str(), &input4_size)); | |||
| auto input_data5 = reinterpret_cast<float *>(mindspore::lite::ReadFile(input5Ppath.c_str(), &input5_size)); | |||
| auto input_data6 = reinterpret_cast<float *>(mindspore::lite::ReadFile(input6Ppath.c_str(), &input6_size)); | |||
| auto input_data7 = reinterpret_cast<float *>(mindspore::lite::ReadFile(input7Ppath.c_str(), &input7_size)); | |||
| auto input_data8 = reinterpret_cast<float *>(mindspore::lite::ReadFile(input8Ppath.c_str(), &input8_size)); | |||
| auto output_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(correctOutputPath.c_str(), &output_size)); | |||
| for (auto fp16_enable : {true}) { | |||
| auto *param = CreateParameter(axis); | |||
| TestMain({{input_shapes[0], input_data1, VAR}, | |||
| {input_shapes[1], input_data2, VAR}, | |||
| {input_shapes[2], input_data3, VAR}, | |||
| {input_shapes[3], input_data4, VAR}, | |||
| {input_shapes[4], input_data5, VAR}, | |||
| {input_shapes[5], input_data6, VAR}, | |||
| {input_shapes[6], input_data7, VAR}, | |||
| {input_shapes[7], input_data8, VAR}}, | |||
| {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||
| } | |||
| } | |||
| @@ -0,0 +1,2 @@ | |||
| ßç¾°=®¿t$¿,Ã0¾ž>‚¿æ5ê>Æ•8=;ÿˆ?…p½¿žâD> B‡?Aȼ;^f?üÔS¿ILz¾P‚“¿·´¾l¡Ó=øåƾÅb”¿Êi ¾iª?våñ>ñŸŒ¿?þé>°Ï›¿ÇÂá?–5Y?@�>‹]>š5¿Ï,s¿"„Ö=Åõ¿X_À?G | |||
| ’¿Q½Ö¿ÄBÀ²CŠ=Pti½9R>ÖZ©?�º–¿>»ÀxA«>g&©?ü´{¿0Ú)?�Š»¿E¾M^ÀíÜ><�j=[FÛ¿¬ß¼?uù¾a—¿JȘ¿"^½æüß? | |||
| @@ -0,0 +1,2 @@ | |||
| ÎKþ¼úÞ%><´?¦ªO¿aVâ>X2G¿‹,¿ýs”=|\¿|lø>(ÀÀ×c(?�Œ¿Êï¤<¬%¿îª?f9õ¿úŸ£¿ XG½ìîŒ?—¡>�RL¿�]Ò½ê£?b' @î7¿,é>Al&@Rg†?¹Þ¾~^³¿ôRŠ?9+6ÀëÿB?ßÙ)>Äç„?÷è…¿�¯=(p¿¥íø¿h\†?•bü?ÑLF>Ç�?ˆÜ½e׿Í'@{(“>Þ‡n?¢F‡¾±î~=y{ç½c¯Ï¾h™ì=Žë½¼V¿ »K?Ìã俈 =åô¿¿\3"À£?—»EÁ¾Œâ¿S¹¾ë)M¿BË´?mÊ? £ì¿¿ | |||
| s©?¬!¿ | |||
| @@ -0,0 +1,3 @@ | |||
| _T?°Ë¿`?;”q¿åR¿š©?ò^ª?†œ¬¿®¢¾î”=£Ãœ¾¾¬>{;B¿Ã<ì¾£¡Ó>³/?q’½ë¯[?Öô ½ù1¿ A¾_í?a¥J?qþŸ¿.ÀÊ?·3r¾‡.'¿4;·¿pk‚¿•×Ñ>L2?¾Æ>X6ÿ¾·Ú=q8±¿ZWú?‘¨¢¿9Ž>ñ#?·–;=7Òv¾Õ�¿/[‚¿íYý>lɽèÁ‰¿K?®¾Sù¿÷^?¶ç�¿ñ$�¿n’ ?Ò°K¿êº=A¡s>â | |||
| ?ëÀ¿y™)¿¾~>Úôg¾ÒiÌ?fáI¿¹ö¿ ÑÀ 4#?_ÂÁ¾ÑµÀÑÖ®>Ìà? | |||
| D¿—@ü=!g>? | |||
| @@ -0,0 +1,4 @@ | |||
| ÎKþ¼_T?úÞ%>°Ë¿<´?`?¦ªO¿;”q¿aVâ>åR¿X2G¿š©?‹,¿ò^ª?ýs”=†œ¬¿|\¿®¢¾|lø>î”=(ÀÀ£Ãœ¾×c(?¾¬>�Œ¿{;B¿Êï¤<Ã<쾬%¿£¡Ó>îª?³/?f9õ¿q’½úŸ£¿ë¯[? XG½Öô ½ìîŒ?ù1¿—¡> A¾�RL¿_í?�]Ò½a¥J?ê£?qþŸ¿b' @.ÀÊ?î7¿·3r¾,é>‡.'¿Al&@4;·¿Rg†?pk‚¿¹Þ¾•×Ñ>~^³¿L2?ôRŠ?¾Æ>9+6ÀX6ÿ¾ëÿB?·Ú=ßÙ)>q8±¿Äç„?ZWú?÷è…¿‘¨¢¿�¯=9Ž>(p¿ñ#?¥íø¿·–;=h\†?7Òv¾•bü?Õ�¿ÑLF>/[‚¿Ç�?íYý>ˆÜ½lɽe׿èÁ‰¿Í'@K?®¾{(“>Sù¿Þ‡n?÷^?¢F‡¾¶ç�¿±î~=ñ$�¿y{ç½n’ ?c¯Ï¾Ò°K¿h™ì=êº=Žë½A¡s>¼V¿â | |||
| ? »K?ëÀ¿Ìãä¿y™)¿ˆ =¾~>åô¿¿Úôg¾\3"ÀÒiÌ?£?—»fáI¿EÁ¾¹ö¿Œâ¿ ÑÀS¹¾ 4#?ë)M¿_ÂÁ¾BË´?ѵÀmÊ?ÑÖ®> £ì¿Ìà?¿ | |||
| D¿ | |||
| s©?—@ü=¬!¿!g>? | |||