| @@ -82,7 +82,7 @@ class MS_API CPUDeviceInfo : public DeviceInfoContext { | |||
| public: | |||
| enum DeviceType GetDeviceType() const override { return DeviceType::kCPU; }; | |||
| /// \brief Set the thread affinity of CPU cores. | |||
| /// \brief Set the thread affinity to CPU cores. | |||
| /// | |||
| /// \param mode: 0: no affinities, 1: big cores first, 2: little cores first | |||
| void SetThreadAffinity(int mode); | |||
| @@ -53,7 +53,7 @@ class MS_API MSTensor { | |||
| virtual Vector<int> shape() const = 0; | |||
| /// \brief Set the shape of MSTensor. | |||
| virtual void set_shape(const Vector<int> &name) = 0; | |||
| virtual void set_shape(const Vector<int> &shape) = 0; | |||
| /// \brief Get number of element in MSTensor. | |||
| /// | |||
| @@ -71,7 +71,7 @@ class MS_API MSTensor { | |||
| virtual String tensor_name() const = 0; | |||
| /// \brief Set the name of MSTensor. | |||
| virtual void set_tensor_name(const String name) = 0; | |||
| virtual void set_tensor_name(const String &name) = 0; | |||
| /// \brief Get the pointer of data in MSTensor. | |||
| /// | |||
| @@ -458,115 +458,91 @@ LoopRow4: | |||
| b WriteEnd | |||
| Write2: | |||
| add x2, x2, #8 | |||
| str d9, [x11] | |||
| st1 {v9.2s}, [x11], x8 | |||
| cmp x6, #1 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d11, [x11] | |||
| st1 {v11.2s}, [x11], x8 | |||
| cmp x6, #2 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d13, [x11] | |||
| st1 {v13.2s}, [x11], x8 | |||
| cmp x6, #3 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d15, [x11] | |||
| st1 {v15.2s}, [x11], x8 | |||
| cmp x6, #4 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d17, [x11] | |||
| st1 {v17.2s}, [x11], x8 | |||
| cmp x6, #5 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d19, [x11] | |||
| st1 {v19.2s}, [x11], x8 | |||
| cmp x6, #6 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d21, [x11] | |||
| st1 {v21.2s}, [x11], x8 | |||
| cmp x6, #7 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d23, [x11] | |||
| st1 {v23.2s}, [x11], x8 | |||
| cmp x6, #8 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d25, [x11] | |||
| st1 {v25.2s}, [x11], x8 | |||
| cmp x6, #9 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d27, [x11] | |||
| st1 {v27.2s}, [x11], x8 | |||
| cmp x6, #10 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d29, [x11] | |||
| st1 {v29.2s}, [x11], x8 | |||
| cmp x6, #11 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d31, [x11] | |||
| add x11, x11, x8 | |||
| st1 {v31.2s}, [x11], x8 | |||
| add x11, x11, #8 | |||
| b WriteEnd | |||
| Write3: | |||
| add x2, x2, #12 | |||
| add x19, x11, #8 | |||
| str d9, [x11] | |||
| st1 {v9.2s}, [x11], x8 | |||
| st1 {v9.s}[2], [x19], x8 | |||
| cmp x6, #1 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d11, [x11] | |||
| st1 {v11.2s}, [x11], x8 | |||
| st1 {v11.s}[2], [x19], x8 | |||
| cmp x6, #2 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d13, [x11] | |||
| st1 {v13.2s}, [x11], x8 | |||
| st1 {v13.s}[2], [x19], x8 | |||
| cmp x6, #3 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d15, [x11] | |||
| st1 {v15.2s}, [x11], x8 | |||
| st1 {v15.s}[2], [x19], x8 | |||
| cmp x6, #4 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d17, [x11] | |||
| st1 {v17.2s}, [x11], x8 | |||
| st1 {v17.s}[2], [x19], x8 | |||
| cmp x6, #5 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d19, [x11] | |||
| st1 {v19.2s}, [x11], x8 | |||
| st1 {v19.s}[2], [x19], x8 | |||
| cmp x6, #6 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d21, [x11] | |||
| st1 {v21.2s}, [x11], x8 | |||
| st1 {v21.s}[2], [x19], x8 | |||
| cmp x6, #7 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d23, [x11] | |||
| st1 {v23.2s}, [x11], x8 | |||
| st1 {v23.s}[2], [x19], x8 | |||
| cmp x6, #8 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d25, [x11] | |||
| st1 {v25.2s}, [x11], x8 | |||
| st1 {v25.s}[2], [x19], x8 | |||
| cmp x6, #9 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d27, [x11] | |||
| st1 {v27.2s}, [x11], x8 | |||
| st1 {v27.s}[2], [x19], x8 | |||
| cmp x6, #10 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d29, [x11] | |||
| st1 {v29.2s}, [x11], x8 | |||
| st1 {v29.s}[2], [x19], x8 | |||
| cmp x6, #11 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d31, [x11] | |||
| st1 {v31.2s}, [x11], x8 | |||
| st1 {v31.s}[2], [x19] | |||
| add x11, x11, x8 | |||
| add x11, x11, #12 | |||
| b WriteEnd | |||
| Write4: | |||
| @@ -129,7 +129,7 @@ asm_function ConvDwFp32Indirect3x3 | |||
| tbnz w11, #1, Write2 | |||
| tbnz w11, #0, Write1 | |||
| Write2: | |||
| str d29, [x0], #8 | |||
| st1 {v29.2s}, [x0], #8 | |||
| ext v29.16b, v29.16b, v29.16b, #8 | |||
| tbz w11, #0, NextPixel | |||
| Write1: | |||
| @@ -260,7 +260,7 @@ asm_function ConvDwFp32Indirect5x5 | |||
| tbnz w2, #1, Write2 | |||
| tbnz w2, #0, Write1 | |||
| Write2: | |||
| str d29, [x0], #8 | |||
| st1 {v29.2s}, [x0], #8 | |||
| ext v29.16b, v29.16b, v29.16b, #8 | |||
| tbz w2, #0, NextPixel | |||
| Write1: | |||
| @@ -740,115 +740,91 @@ LoopRow4: | |||
| b WriteEnd | |||
| Write2: | |||
| add x2, x2, #8 | |||
| str d8, [x11] | |||
| st1 {v8.2s}, [x11], x8 | |||
| cmp x6, #1 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d10, [x11] | |||
| st1 {v10.2s}, [x11], x8 | |||
| cmp x6, #2 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d12, [x11] | |||
| st1 {v12.2s}, [x11], x8 | |||
| cmp x6, #3 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d14, [x11] | |||
| st1 {v14.2s}, [x11], x8 | |||
| cmp x6, #4 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d16, [x11] | |||
| st1 {v16.2s}, [x11], x8 | |||
| cmp x6, #5 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d18, [x11] | |||
| st1 {v18.2s}, [x11], x8 | |||
| cmp x6, #6 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d20, [x11] | |||
| st1 {v20.2s}, [x11], x8 | |||
| cmp x6, #7 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d22, [x11] | |||
| st1 {v22.2s}, [x11], x8 | |||
| cmp x6, #8 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d24, [x11] | |||
| st1 {v24.2s}, [x11], x8 | |||
| cmp x6, #9 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d26, [x11] | |||
| st1 {v26.2s}, [x11], x8 | |||
| cmp x6, #10 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d28, [x11] | |||
| st1 {v28.2s}, [x11], x8 | |||
| cmp x6, #11 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d30, [x11] | |||
| add x11, x11, x8 | |||
| st1 {v30.2s}, [x11], x8 | |||
| add x11, x11, #8 | |||
| b WriteEnd | |||
| Write3: | |||
| add x2, x2, #12 | |||
| add x19, x11, #8 | |||
| str d8, [x11] | |||
| st1 {v8.2s}, [x11], x8 | |||
| st1 {v8.s}[2], [x19], x8 | |||
| cmp x6, #1 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d10, [x11] | |||
| st1 {v10.2s}, [x11], x8 | |||
| st1 {v10.s}[2], [x19], x8 | |||
| cmp x6, #2 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d12, [x11] | |||
| st1 {v12.2s}, [x11], x8 | |||
| st1 {v12.s}[2], [x19], x8 | |||
| cmp x6, #3 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d14, [x11] | |||
| st1 {v14.2s}, [x11], x8 | |||
| st1 {v14.s}[2], [x19], x8 | |||
| cmp x6, #4 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d16, [x11] | |||
| st1 {v16.2s}, [x11], x8 | |||
| st1 {v16.s}[2], [x19], x8 | |||
| cmp x6, #5 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d18, [x11] | |||
| st1 {v18.2s}, [x11], x8 | |||
| st1 {v18.s}[2], [x19], x8 | |||
| cmp x6, #6 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d20, [x11] | |||
| st1 {v20.2s}, [x11], x8 | |||
| st1 {v20.s}[2], [x19], x8 | |||
| cmp x6, #7 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d22, [x11] | |||
| st1 {v22.2s}, [x11], x8 | |||
| st1 {v22.s}[2], [x19], x8 | |||
| cmp x6, #8 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d24, [x11] | |||
| st1 {v24.2s}, [x11], x8 | |||
| st1 {v24.s}[2], [x19], x8 | |||
| cmp x6, #9 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d26, [x11] | |||
| st1 {v26.2s}, [x11], x8 | |||
| st1 {v26.s}[2], [x19], x8 | |||
| cmp x6, #10 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d28, [x11] | |||
| st1 {v28.2s}, [x11], x8 | |||
| st1 {v28.s}[2], [x19], x8 | |||
| cmp x6, #11 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d30, [x11] | |||
| st1 {v30.2s}, [x11], x8 | |||
| st1 {v30.s}[2], [x19] | |||
| add x11, x11, x8 | |||
| add x11, x11, #12 | |||
| b WriteEnd | |||
| Write4: | |||
| @@ -955,62 +931,51 @@ LoopRow4: | |||
| add x2, x2, #24 | |||
| add x19, x11, #16 | |||
| st1 {v8.4s}, [x11], x8 | |||
| str d9, [x19] | |||
| st1 {v9.2s}, [x19], x8 | |||
| cmp x6, #1 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v10.4s}, [x11], x8 | |||
| str d11, [x19] | |||
| st1 {v11.2s}, [x19], x8 | |||
| cmp x6, #2 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v12.4s}, [x11], x8 | |||
| str d13, [x19] | |||
| st1 {v13.2s}, [x19], x8 | |||
| cmp x6, #3 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v14.4s}, [x11], x8 | |||
| str d15, [x19] | |||
| st1 {v15.2s}, [x19], x8 | |||
| cmp x6, #4 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v16.4s}, [x11], x8 | |||
| str d17, [x19] | |||
| st1 {v17.2s}, [x19], x8 | |||
| cmp x6, #5 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v18.4s}, [x11], x8 | |||
| str d19, [x19] | |||
| st1 {v19.2s}, [x19], x8 | |||
| cmp x6, #6 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v20.4s}, [x11], x8 | |||
| str d21, [x19] | |||
| st1 {v21.2s}, [x19], x8 | |||
| cmp x6, #7 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v22.4s}, [x11], x8 | |||
| str d23, [x19] | |||
| st1 {v23.2s}, [x19], x8 | |||
| cmp x6, #8 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v24.4s}, [x11], x8 | |||
| str d25, [x19] | |||
| st1 {v25.2s}, [x19], x8 | |||
| cmp x6, #9 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v26.4s}, [x11], x8 | |||
| str d27, [x19] | |||
| st1 {v27.2s}, [x19], x8 | |||
| cmp x6, #10 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v28.4s}, [x11], x8 | |||
| str d29, [x19] | |||
| st1 {v29.2s}, [x19], x8 | |||
| cmp x6, #11 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v30.4s}, [x11], x8 | |||
| str d31, [x19] | |||
| st1 {v31.2s}, [x19] | |||
| add x11, x11, #24 | |||
| b WriteEnd | |||
| Write7: | |||
| @@ -1018,75 +983,63 @@ LoopRow4: | |||
| add x19, x11, #16 | |||
| add x20, x11, #24 | |||
| st1 {v8.4s}, [x11], x8 | |||
| str d9, [x19] | |||
| st1 {v9.2s}, [x19], x8 | |||
| st1 {v9.s}[2], [x20], x8 | |||
| cmp x6, #1 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v10.4s}, [x11], x8 | |||
| str d11, [x19] | |||
| st1 {v11.2s}, [x19], x8 | |||
| st1 {v11.s}[2], [x20], x8 | |||
| cmp x6, #2 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v12.4s}, [x11], x8 | |||
| str d13, [x19] | |||
| st1 {v13.2s}, [x19], x8 | |||
| st1 {v13.s}[2], [x20], x8 | |||
| cmp x6, #3 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v14.4s}, [x11], x8 | |||
| str d15, [x19] | |||
| st1 {v15.2s}, [x19], x8 | |||
| st1 {v15.s}[2], [x20], x8 | |||
| cmp x6, #4 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v16.4s}, [x11], x8 | |||
| str d17, [x19] | |||
| st1 {v17.2s}, [x19], x8 | |||
| st1 {v17.s}[2], [x20], x8 | |||
| cmp x6, #5 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v18.4s}, [x11], x8 | |||
| str d19, [x19] | |||
| st1 {v19.2s}, [x19], x8 | |||
| st1 {v19.s}[2], [x20], x8 | |||
| cmp x6, #6 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v20.4s}, [x11], x8 | |||
| str d21, [x19] | |||
| st1 {v21.2s}, [x19], x8 | |||
| st1 {v21.s}[2], [x20], x8 | |||
| cmp x6, #7 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v22.4s}, [x11], x8 | |||
| str d23, [x19] | |||
| st1 {v23.2s}, [x19], x8 | |||
| st1 {v23.s}[2], [x20], x8 | |||
| cmp x6, #8 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v24.4s}, [x11], x8 | |||
| str d25, [x19] | |||
| st1 {v25.2s}, [x19], x8 | |||
| st1 {v25.s}[2], [x20], x8 | |||
| cmp x6, #9 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v26.4s}, [x11], x8 | |||
| str d27, [x19] | |||
| st1 {v27.2s}, [x19], x8 | |||
| st1 {v27.s}[2], [x20], x8 | |||
| cmp x6, #10 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v28.4s}, [x11], x8 | |||
| str d29, [x19] | |||
| st1 {v29.2s}, [x19], x8 | |||
| st1 {v29.s}[2], [x20], x8 | |||
| cmp x6, #11 | |||
| beq WriteEnd | |||
| add x19, x19, x8 | |||
| st1 {v30.4s}, [x11], x8 | |||
| str d31, [x19] | |||
| add x19, x19, x8 | |||
| st1 {v31.s}[2], [x20], x8 | |||
| st1 {v31.2s}, [x19] | |||
| st1 {v31.s}[2], [x20] | |||
| add x11, x11, #28 | |||
| b WriteEnd | |||
| WriteC8: | |||
| @@ -334,353 +334,301 @@ IndirectGemmStart: | |||
| add x0, x0, #2 | |||
| b WriteEnd | |||
| Write2: | |||
| str s16, [x15] | |||
| add x15, x15, x7 | |||
| str s17, [x15] | |||
| add x15, x15, x7 | |||
| str s18, [x15] | |||
| add x15, x15, x7 | |||
| str s19, [x15] | |||
| add x15, x15, x7 | |||
| str s20, [x15] | |||
| add x15, x15, x7 | |||
| str s21, [x15] | |||
| add x15, x15, x7 | |||
| str s22, [x15] | |||
| add x15, x15, x7 | |||
| str s23, [x15] | |||
| add x15, x15, x7 | |||
| str s24, [x15] | |||
| add x15, x15, x7 | |||
| str s25, [x15] | |||
| add x15, x15, x7 | |||
| str s26, [x15] | |||
| add x15, x15, x7 | |||
| str s27, [x15] | |||
| add x15, x15, x7 | |||
| str s28, [x15] | |||
| add x15, x15, x7 | |||
| str s29, [x15] | |||
| add x15, x15, x7 | |||
| str s30, [x15] | |||
| add x15, x15, x7 | |||
| str s31, [x15] | |||
| add x17, x15, #2 | |||
| st1 {v16.h}[0], [x15], x7 | |||
| st1 {v16.h}[1], [x17], x7 | |||
| st1 {v17.h}[0], [x15], x7 | |||
| st1 {v17.h}[1], [x17], x7 | |||
| st1 {v18.h}[0], [x15], x7 | |||
| st1 {v18.h}[1], [x17], x7 | |||
| st1 {v19.h}[0], [x15], x7 | |||
| st1 {v19.h}[1], [x17], x7 | |||
| st1 {v20.h}[0], [x15], x7 | |||
| st1 {v20.h}[1], [x17], x7 | |||
| st1 {v21.h}[0], [x15], x7 | |||
| st1 {v21.h}[1], [x17], x7 | |||
| st1 {v22.h}[0], [x15], x7 | |||
| st1 {v22.h}[1], [x17], x7 | |||
| st1 {v23.h}[0], [x15], x7 | |||
| st1 {v23.h}[1], [x17], x7 | |||
| st1 {v24.h}[0], [x15], x7 | |||
| st1 {v24.h}[1], [x17], x7 | |||
| st1 {v25.h}[0], [x15], x7 | |||
| st1 {v25.h}[1], [x17], x7 | |||
| st1 {v26.h}[0], [x15], x7 | |||
| st1 {v26.h}[1], [x17], x7 | |||
| st1 {v27.h}[0], [x15], x7 | |||
| st1 {v27.h}[1], [x17], x7 | |||
| st1 {v28.h}[0], [x15], x7 | |||
| st1 {v28.h}[1], [x17], x7 | |||
| st1 {v29.h}[0], [x15], x7 | |||
| st1 {v29.h}[1], [x17], x7 | |||
| st1 {v30.h}[0], [x15], x7 | |||
| st1 {v30.h}[1], [x17], x7 | |||
| st1 {v31.h}[0], [x15] | |||
| st1 {v31.h}[1], [x17] | |||
| add x0, x0, #4 | |||
| b WriteEnd | |||
| Write3: | |||
| add x17, x15, #4 | |||
| str s16, [x15] | |||
| add x15, x15, x7 | |||
| add x16, x15, #2 | |||
| st1 {v16.h}[0], [x15], x7 | |||
| st1 {v16.h}[1], [x16], x7 | |||
| st1 {v16.h}[2], [x17], x7 | |||
| str s17, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v17.h}[0], [x15], x7 | |||
| st1 {v17.h}[1], [x16], x7 | |||
| st1 {v17.h}[2], [x17], x7 | |||
| str s18, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v18.h}[0], [x15], x7 | |||
| st1 {v18.h}[1], [x16], x7 | |||
| st1 {v18.h}[2], [x17], x7 | |||
| str s19, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v19.h}[0], [x15], x7 | |||
| st1 {v19.h}[1], [x16], x7 | |||
| st1 {v19.h}[2], [x17], x7 | |||
| str s20, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v20.h}[0], [x15], x7 | |||
| st1 {v20.h}[1], [x16], x7 | |||
| st1 {v20.h}[2], [x17], x7 | |||
| str s21, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v21.h}[0], [x15], x7 | |||
| st1 {v21.h}[1], [x16], x7 | |||
| st1 {v21.h}[2], [x17], x7 | |||
| str s22, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v22.h}[0], [x15], x7 | |||
| st1 {v22.h}[1], [x16], x7 | |||
| st1 {v22.h}[2], [x17], x7 | |||
| str s23, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v23.h}[0], [x15], x7 | |||
| st1 {v23.h}[1], [x16], x7 | |||
| st1 {v23.h}[2], [x17], x7 | |||
| str s24, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v24.h}[0], [x15], x7 | |||
| st1 {v24.h}[1], [x16], x7 | |||
| st1 {v24.h}[2], [x17], x7 | |||
| str s25, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v25.h}[0], [x15], x7 | |||
| st1 {v25.h}[1], [x16], x7 | |||
| st1 {v25.h}[2], [x17], x7 | |||
| str s26, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v26.h}[0], [x15], x7 | |||
| st1 {v26.h}[1], [x16], x7 | |||
| st1 {v26.h}[2], [x17], x7 | |||
| str s27, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v27.h}[0], [x15], x7 | |||
| st1 {v27.h}[1], [x16], x7 | |||
| st1 {v27.h}[2], [x17], x7 | |||
| str s28, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v28.h}[0], [x15], x7 | |||
| st1 {v28.h}[1], [x16], x7 | |||
| st1 {v28.h}[2], [x17], x7 | |||
| str s29, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v29.h}[0], [x15], x7 | |||
| st1 {v29.h}[1], [x16], x7 | |||
| st1 {v29.h}[2], [x17], x7 | |||
| str s30, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v30.h}[0], [x15], x7 | |||
| st1 {v30.h}[1], [x16], x7 | |||
| st1 {v30.h}[2], [x17], x7 | |||
| str s31, [x15] | |||
| st1 {v31.h}[0], [x15] | |||
| st1 {v31.h}[1], [x16] | |||
| st1 {v31.h}[2], [x17] | |||
| add x0, x0, #6 | |||
| b WriteEnd | |||
| Write4: | |||
| str d16, [x15] | |||
| add x15, x15, x7 | |||
| str d17, [x15] | |||
| add x15, x15, x7 | |||
| str d18, [x15] | |||
| add x15, x15, x7 | |||
| str d19, [x15] | |||
| add x15, x15, x7 | |||
| str d20, [x15] | |||
| add x15, x15, x7 | |||
| str d21, [x15] | |||
| add x15, x15, x7 | |||
| str d22, [x15] | |||
| add x15, x15, x7 | |||
| str d23, [x15] | |||
| add x15, x15, x7 | |||
| str d24, [x15] | |||
| add x15, x15, x7 | |||
| str d25, [x15] | |||
| add x15, x15, x7 | |||
| str d26, [x15] | |||
| add x15, x15, x7 | |||
| str d27, [x15] | |||
| add x15, x15, x7 | |||
| str d28, [x15] | |||
| add x15, x15, x7 | |||
| str d29, [x15] | |||
| add x15, x15, x7 | |||
| str d30, [x15] | |||
| add x15, x15, x7 | |||
| str d31, [x15] | |||
| st1 {v16.4h}, [x15], x7 | |||
| st1 {v17.4h}, [x15], x7 | |||
| st1 {v18.4h}, [x15], x7 | |||
| st1 {v19.4h}, [x15], x7 | |||
| st1 {v20.4h}, [x15], x7 | |||
| st1 {v21.4h}, [x15], x7 | |||
| st1 {v22.4h}, [x15], x7 | |||
| st1 {v23.4h}, [x15], x7 | |||
| st1 {v24.4h}, [x15], x7 | |||
| st1 {v25.4h}, [x15], x7 | |||
| st1 {v26.4h}, [x15], x7 | |||
| st1 {v27.4h}, [x15], x7 | |||
| st1 {v28.4h}, [x15], x7 | |||
| st1 {v29.4h}, [x15], x7 | |||
| st1 {v30.4h}, [x15], x7 | |||
| st1 {v31.4h}, [x15] | |||
| add x0, x0, #8 | |||
| b WriteEnd | |||
| Write5: | |||
| add x17, x15, #8 | |||
| str d16, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v16.4h}, [x15], x7 | |||
| st1 {v16.h}[4], [x17], x7 | |||
| str d17, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v17.4h}, [x15], x7 | |||
| st1 {v17.h}[4], [x17], x7 | |||
| str d18, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v18.4h}, [x15], x7 | |||
| st1 {v18.h}[4], [x17], x7 | |||
| str d19, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v19.4h}, [x15], x7 | |||
| st1 {v19.h}[4], [x17], x7 | |||
| str d20, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v20.4h}, [x15], x7 | |||
| st1 {v20.h}[4], [x17], x7 | |||
| str d21, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v21.4h}, [x15], x7 | |||
| st1 {v21.h}[4], [x17], x7 | |||
| str d22, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v22.4h}, [x15], x7 | |||
| st1 {v22.h}[4], [x17], x7 | |||
| str d23, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v23.4h}, [x15], x7 | |||
| st1 {v23.h}[4], [x17], x7 | |||
| str d24, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v24.4h}, [x15], x7 | |||
| st1 {v24.h}[4], [x17], x7 | |||
| str d25, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v25.4h}, [x15], x7 | |||
| st1 {v25.h}[4], [x17], x7 | |||
| str d26, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v26.4h}, [x15], x7 | |||
| st1 {v26.h}[4], [x17], x7 | |||
| str d27, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v27.4h}, [x15], x7 | |||
| st1 {v27.h}[4], [x17], x7 | |||
| str d28, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v28.4h}, [x15], x7 | |||
| st1 {v28.h}[4], [x17], x7 | |||
| str d29, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v29.4h}, [x15], x7 | |||
| st1 {v29.h}[4], [x17], x7 | |||
| str d30, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v30.4h}, [x15], x7 | |||
| st1 {v30.h}[4], [x17], x7 | |||
| str d31, [x15] | |||
| st1 {v31.4h}, [x15] | |||
| st1 {v31.h}[4], [x17] | |||
| add x0, x0, #10 | |||
| b WriteEnd | |||
| Write6: | |||
| add x17, x15, #8 | |||
| str d16, [x15] | |||
| add x15, x15, x7 | |||
| add x16, x15, #10 | |||
| st1 {v16.4h}, [x15], x7 | |||
| ins v0.s[0], v16.s[2] | |||
| str s0, [x17] | |||
| add x17, x17, x7 | |||
| str d17, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v0.h}[0], [x17], x7 | |||
| st1 {v0.h}[1], [x16], x7 | |||
| st1 {v17.4h}, [x15], x7 | |||
| ins v1.s[0], v17.s[2] | |||
| str s1, [x17] | |||
| add x17, x17, x7 | |||
| str d18, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v1.h}[0], [x17], x7 | |||
| st1 {v1.h}[1], [x16], x7 | |||
| st1 {v18.4h}, [x15], x7 | |||
| ins v2.s[0], v18.s[2] | |||
| str s2, [x17] | |||
| add x17, x17, x7 | |||
| str d19, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v2.h}[0], [x17], x7 | |||
| st1 {v2.h}[1], [x16], x7 | |||
| st1 {v19.4h}, [x15], x7 | |||
| ins v3.s[0], v19.s[2] | |||
| str s3, [x17] | |||
| add x17, x17, x7 | |||
| str d20, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v3.h}[0], [x17], x7 | |||
| st1 {v3.h}[1], [x16], x7 | |||
| st1 {v20.4h}, [x15], x7 | |||
| ins v4.s[0], v20.s[2] | |||
| str s4, [x17] | |||
| add x17, x17, x7 | |||
| str d21, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v4.h}[0], [x17], x7 | |||
| st1 {v4.h}[1], [x16], x7 | |||
| st1 {v21.4h}, [x15], x7 | |||
| ins v5.s[0], v21.s[2] | |||
| str s5, [x17] | |||
| add x17, x17, x7 | |||
| str d22, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v5.h}[0], [x17], x7 | |||
| st1 {v5.h}[1], [x16], x7 | |||
| st1 {v22.4h}, [x15], x7 | |||
| ins v6.s[0], v22.s[2] | |||
| str s6, [x17] | |||
| add x17, x17, x7 | |||
| str d23, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v6.h}[0], [x17], x7 | |||
| st1 {v6.h}[1], [x16], x7 | |||
| st1 {v23.4h}, [x15], x7 | |||
| ins v7.s[0], v23.s[2] | |||
| str s7, [x17] | |||
| add x17, x17, x7 | |||
| str d24, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v7.h}[0], [x17], x7 | |||
| st1 {v7.h}[1], [x16], x7 | |||
| st1 {v24.4h}, [x15], x7 | |||
| ins v8.s[0], v24.s[2] | |||
| str s8, [x17] | |||
| add x17, x17, x7 | |||
| str d25, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v8.h}[0], [x17], x7 | |||
| st1 {v8.h}[1], [x16], x7 | |||
| st1 {v25.4h}, [x15], x7 | |||
| ins v9.s[0], v25.s[2] | |||
| str s9, [x17] | |||
| add x17, x17, x7 | |||
| str d26, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v9.h}[0], [x17], x7 | |||
| st1 {v9.h}[1], [x16], x7 | |||
| st1 {v26.4h}, [x15], x7 | |||
| ins v10.s[0], v26.s[2] | |||
| str s10, [x17] | |||
| add x17, x17, x7 | |||
| str d27, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v10.h}[0], [x17], x7 | |||
| st1 {v10.h}[1], [x16], x7 | |||
| st1 {v27.4h}, [x15], x7 | |||
| ins v11.s[0], v27.s[2] | |||
| str s11, [x17] | |||
| add x17, x17, x7 | |||
| str d28, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v11.h}[0], [x17], x7 | |||
| st1 {v11.h}[1], [x16], x7 | |||
| st1 {v28.4h}, [x15], x7 | |||
| ins v12.s[0], v28.s[2] | |||
| str s12, [x17] | |||
| add x17, x17, x7 | |||
| str d29, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v12.h}[0], [x17], x7 | |||
| st1 {v12.h}[1], [x16], x7 | |||
| st1 {v29.4h}, [x15], x7 | |||
| ins v13.s[0], v29.s[2] | |||
| str s13, [x17] | |||
| add x17, x17, x7 | |||
| str d30, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v13.h}[0], [x17], x7 | |||
| st1 {v13.h}[1], [x16], x7 | |||
| st1 {v30.4h}, [x15], x7 | |||
| ins v14.s[0], v30.s[2] | |||
| str s14, [x17] | |||
| add x17, x17, x7 | |||
| str d31, [x15] | |||
| st1 {v14.h}[0], [x17], x7 | |||
| st1 {v14.h}[1], [x16], x7 | |||
| st1 {v31.4h}, [x15] | |||
| ins v15.s[0], v31.s[2] | |||
| str s15, [x17] | |||
| st1 {v14.h}[0], [x17] | |||
| st1 {v14.h}[1], [x16] | |||
| add x0, x0, #12 | |||
| b WriteEnd | |||
| Write7: | |||
| add x17, x15, #8 | |||
| add x18, x15, #10 | |||
| add x16, x15, #12 | |||
| str d16, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v16.4h}, [x15], x7 | |||
| ins v0.s[0], v16.s[2] | |||
| str s0, [x17] | |||
| add x17, x17, x7 | |||
| st1 {v0.h}[0], [x17], x7 | |||
| st1 {v0.h}[1], [x18], x7 | |||
| st1 {v16.h}[6], [x16], x7 | |||
| str d17, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v17.4h}, [x15], x7 | |||
| ins v1.s[0], v17.s[2] | |||
| str s1, [x17] | |||
| add x17, x17, x7 | |||
| st1 {v1.h}[0], [x17], x7 | |||
| st1 {v1.h}[1], [x18], x7 | |||
| st1 {v17.h}[6], [x16], x7 | |||
| str d18, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v18.4h}, [x15], x7 | |||
| ins v2.s[0], v18.s[2] | |||
| str s2, [x17] | |||
| add x17, x17, x7 | |||
| st1 {v2.h}[0], [x17], x7 | |||
| st1 {v2.h}[1], [x18], x7 | |||
| st1 {v18.h}[6], [x16], x7 | |||
| str d19, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v19.4h}, [x15], x7 | |||
| ins v3.s[0], v19.s[2] | |||
| str s3, [x17] | |||
| add x17, x17, x7 | |||
| st1 {v3.h}[0], [x17], x7 | |||
| st1 {v3.h}[1], [x18], x7 | |||
| st1 {v19.h}[6], [x16], x7 | |||
| str d20, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v20.4h}, [x15], x7 | |||
| ins v4.s[0], v20.s[2] | |||
| str s4, [x17] | |||
| add x17, x17, x7 | |||
| st1 {v4.h}[0], [x17], x7 | |||
| st1 {v4.h}[1], [x18], x7 | |||
| st1 {v20.h}[6], [x16], x7 | |||
| str d21, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v21.4h}, [x15], x7 | |||
| ins v5.s[0], v21.s[2] | |||
| str s5, [x17] | |||
| add x17, x17, x7 | |||
| st1 {v5.h}[0], [x17], x7 | |||
| st1 {v5.h}[1], [x18], x7 | |||
| st1 {v21.h}[6], [x16], x7 | |||
| str d22, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v22.4h}, [x15], x7 | |||
| ins v6.s[0], v22.s[2] | |||
| str s6, [x17] | |||
| add x17, x17, x7 | |||
| st1 {v6.h}[0], [x17], x7 | |||
| st1 {v6.h}[1], [x18], x7 | |||
| st1 {v22.h}[6], [x16], x7 | |||
| str d23, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v23.4h}, [x15], x7 | |||
| ins v7.s[0], v23.s[2] | |||
| str s7, [x17] | |||
| add x17, x17, x7 | |||
| st1 {v7.h}[0], [x17], x7 | |||
| st1 {v7.h}[1], [x18], x7 | |||
| st1 {v23.h}[6], [x16], x7 | |||
| str d24, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v24.4h}, [x15], x7 | |||
| ins v8.s[0], v24.s[2] | |||
| str s8, [x17] | |||
| add x17, x17, x7 | |||
| st1 {v8.h}[0], [x17], x7 | |||
| st1 {v8.h}[1], [x18], x7 | |||
| st1 {v24.h}[6], [x16], x7 | |||
| str d25, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v25.4h}, [x15], x7 | |||
| ins v9.s[0], v25.s[2] | |||
| str s9, [x17] | |||
| add x17, x17, x7 | |||
| st1 {v9.h}[0], [x17], x7 | |||
| st1 {v9.h}[1], [x18], x7 | |||
| st1 {v25.h}[6], [x16], x7 | |||
| str d26, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v26.4h}, [x15], x7 | |||
| ins v10.s[0], v26.s[2] | |||
| str s10, [x17] | |||
| add x17, x17, x7 | |||
| st1 {v10.h}[0], [x17], x7 | |||
| st1 {v10.h}[1], [x18], x7 | |||
| st1 {v26.h}[6], [x16], x7 | |||
| str d27, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v27.4h}, [x15], x7 | |||
| ins v11.s[0], v27.s[2] | |||
| str s11, [x17] | |||
| add x17, x17, x7 | |||
| st1 {v11.h}[0], [x17], x7 | |||
| st1 {v11.h}[1], [x18], x7 | |||
| st1 {v27.h}[6], [x16], x7 | |||
| str d28, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v28.4h}, [x15], x7 | |||
| ins v12.s[0], v28.s[2] | |||
| str s12, [x17] | |||
| add x17, x17, x7 | |||
| st1 {v12.h}[0], [x17], x7 | |||
| st1 {v12.h}[1], [x18], x7 | |||
| st1 {v28.h}[6], [x16], x7 | |||
| str d29, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v29.4h}, [x15], x7 | |||
| ins v13.s[0], v29.s[2] | |||
| str s13, [x17] | |||
| add x17, x17, x7 | |||
| st1 {v13.h}[0], [x17], x7 | |||
| st1 {v13.h}[1], [x18], x7 | |||
| st1 {v29.h}[6], [x16], x7 | |||
| str d30, [x15] | |||
| add x15, x15, x7 | |||
| st1 {v30.4h}, [x15], x7 | |||
| ins v14.s[0], v30.s[2] | |||
| str s14, [x17] | |||
| add x17, x17, x7 | |||
| st1 {v14.h}[0], [x17], x7 | |||
| st1 {v14.h}[1], [x18], x7 | |||
| st1 {v30.h}[6], [x16], x7 | |||
| str d31, [x15] | |||
| st1 {v31.4h}, [x15] | |||
| ins v15.s[0], v31.s[2] | |||
| str s15, [x17] | |||
| st1 {v15.h}[0], [x17] | |||
| st1 {v15.h}[1], [x18] | |||
| st1 {v31.h}[6], [x16] | |||
| add x0, x0, #14 | |||
| b WriteEnd | |||
| @@ -677,400 +677,354 @@ LoopRow: | |||
| b WriteEnd | |||
| Write2: | |||
| add x2, x2, #4 | |||
| str s16, [x11] | |||
| add x19, x11, #2 | |||
| st1 {v16.h}[0], [x11], x8 | |||
| st1 {v16.h}[1], [x19], x8 | |||
| cmp x6, #1 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s17, [x11] | |||
| st1 {v17.h}[0], [x11], x8 | |||
| st1 {v17.h}[1], [x19], x8 | |||
| cmp x6, #2 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s18, [x11] | |||
| st1 {v18.h}[0], [x11], x8 | |||
| st1 {v18.h}[1], [x19], x8 | |||
| cmp x6, #3 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s19, [x11] | |||
| st1 {v19.h}[0], [x11], x8 | |||
| st1 {v19.h}[1], [x19], x8 | |||
| cmp x6, #4 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s20, [x11] | |||
| st1 {v20.h}[0], [x11], x8 | |||
| st1 {v20.h}[1], [x19], x8 | |||
| cmp x6, #5 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s21, [x11] | |||
| st1 {v21.h}[0], [x11], x8 | |||
| st1 {v21.h}[1], [x19], x8 | |||
| cmp x6, #6 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s22, [x11] | |||
| st1 {v22.h}[0], [x11], x8 | |||
| st1 {v22.h}[1], [x19], x8 | |||
| cmp x6, #7 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s23, [x11] | |||
| st1 {v23.h}[0], [x11], x8 | |||
| st1 {v23.h}[1], [x19], x8 | |||
| cmp x6, #8 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s24, [x11] | |||
| st1 {v24.h}[0], [x11], x8 | |||
| st1 {v24.h}[1], [x19], x8 | |||
| cmp x6, #9 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s25, [x11] | |||
| st1 {v25.h}[0], [x11], x8 | |||
| st1 {v25.h}[1], [x19], x8 | |||
| cmp x6, #10 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s26, [x11] | |||
| st1 {v26.h}[0], [x11], x8 | |||
| st1 {v26.h}[1], [x19], x8 | |||
| cmp x6, #11 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s27, [x11] | |||
| st1 {v27.h}[0], [x11], x8 | |||
| st1 {v27.h}[1], [x19], x8 | |||
| cmp x6, #12 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s28, [x11] | |||
| st1 {v28.h}[0], [x11], x8 | |||
| st1 {v28.h}[1], [x19], x8 | |||
| cmp x6, #13 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s29, [x11] | |||
| st1 {v29.h}[0], [x11], x8 | |||
| st1 {v29.h}[1], [x19], x8 | |||
| cmp x6, #14 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s30, [x11] | |||
| st1 {v30.h}[0], [x11], x8 | |||
| st1 {v30.h}[1], [x19], x8 | |||
| cmp x6, #15 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s31, [x11] | |||
| add x11, x11, x8 | |||
| st1 {v31.h}[0], [x11], x8 | |||
| st1 {v31.h}[1], [x19] | |||
| add x11, x11, #4 | |||
| b WriteEnd | |||
| Write3: | |||
| add x2, x2, #6 | |||
| add x19, x11, #4 | |||
| str s16, [x11] | |||
| add x20, x11, #2 | |||
| st1 {v16.h}[0], [x11], x8 | |||
| st1 {v16.h}[1], [x20], x8 | |||
| st1 {v16.h}[2], [x19], x8 | |||
| cmp x6, #1 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s17, [x11] | |||
| st1 {v17.h}[0], [x11], x8 | |||
| st1 {v17.h}[1], [x20], x8 | |||
| st1 {v17.h}[2], [x19], x8 | |||
| cmp x6, #2 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s18, [x11] | |||
| st1 {v18.h}[0], [x11], x8 | |||
| st1 {v18.h}[1], [x20], x8 | |||
| st1 {v18.h}[2], [x19], x8 | |||
| cmp x6, #3 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s19, [x11] | |||
| st1 {v19.h}[0], [x11], x8 | |||
| st1 {v19.h}[1], [x20], x8 | |||
| st1 {v19.h}[2], [x19], x8 | |||
| cmp x6, #4 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s20, [x11] | |||
| st1 {v20.h}[0], [x11], x8 | |||
| st1 {v20.h}[1], [x20], x8 | |||
| st1 {v20.h}[2], [x19], x8 | |||
| cmp x6, #5 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s21, [x11] | |||
| st1 {v21.h}[0], [x11], x8 | |||
| st1 {v21.h}[1], [x20], x8 | |||
| st1 {v21.h}[2], [x19], x8 | |||
| cmp x6, #6 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s22, [x11] | |||
| st1 {v22.h}[0], [x11], x8 | |||
| st1 {v22.h}[1], [x20], x8 | |||
| st1 {v22.h}[2], [x19], x8 | |||
| cmp x6, #7 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s23, [x11] | |||
| st1 {v23.h}[0], [x11], x8 | |||
| st1 {v23.h}[1], [x20], x8 | |||
| st1 {v23.h}[2], [x19], x8 | |||
| cmp x6, #8 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s24, [x11] | |||
| st1 {v24.h}[0], [x11], x8 | |||
| st1 {v24.h}[1], [x20], x8 | |||
| st1 {v24.h}[2], [x19], x8 | |||
| cmp x6, #9 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s25, [x11] | |||
| st1 {v25.h}[0], [x11], x8 | |||
| st1 {v25.h}[1], [x20], x8 | |||
| st1 {v25.h}[2], [x19], x8 | |||
| cmp x6, #10 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s26, [x11] | |||
| st1 {v26.h}[0], [x11], x8 | |||
| st1 {v26.h}[1], [x20], x8 | |||
| st1 {v26.h}[2], [x19], x8 | |||
| cmp x6, #11 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s27, [x11] | |||
| st1 {v27.h}[0], [x11], x8 | |||
| st1 {v27.h}[1], [x20], x8 | |||
| st1 {v27.h}[2], [x19], x8 | |||
| cmp x6, #12 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s28, [x11] | |||
| st1 {v28.h}[0], [x11], x8 | |||
| st1 {v28.h}[1], [x20], x8 | |||
| st1 {v28.h}[2], [x19], x8 | |||
| cmp x6, #13 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s29, [x11] | |||
| st1 {v29.h}[0], [x11], x8 | |||
| st1 {v29.h}[1], [x20], x8 | |||
| st1 {v29.h}[2], [x19], x8 | |||
| cmp x6, #14 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s30, [x11] | |||
| st1 {v30.h}[0], [x11], x8 | |||
| st1 {v30.h}[1], [x20], x8 | |||
| st1 {v30.h}[2], [x19], x8 | |||
| cmp x6, #15 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str s31, [x11] | |||
| st1 {v31.h}[0], [x11], x8 | |||
| st1 {v31.h}[1], [x20] | |||
| st1 {v31.h}[2], [x19] | |||
| add x11, x11, x8 | |||
| add x11, x11, #6 | |||
| b WriteEnd | |||
| Write4: | |||
| add x2, x2, #8 | |||
| str d16, [x11] | |||
| st1 {v16.4h}, [x11], x8 | |||
| cmp x6, #1 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d17, [x11] | |||
| st1 {v17.4h}, [x11], x8 | |||
| cmp x6, #2 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d18, [x11] | |||
| st1 {v18.4h}, [x11], x8 | |||
| cmp x6, #3 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d19, [x11] | |||
| st1 {v19.4h}, [x11], x8 | |||
| cmp x6, #4 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d20, [x11] | |||
| st1 {v20.4h}, [x11], x8 | |||
| cmp x6, #5 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d21, [x11] | |||
| st1 {v21.4h}, [x11], x8 | |||
| cmp x6, #6 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d22, [x11] | |||
| st1 {v22.4h}, [x11], x8 | |||
| cmp x6, #7 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d23, [x11] | |||
| st1 {v23.4h}, [x11], x8 | |||
| cmp x6, #8 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d24, [x11] | |||
| st1 {v24.4h}, [x11], x8 | |||
| cmp x6, #9 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d25, [x11] | |||
| st1 {v25.4h}, [x11], x8 | |||
| cmp x6, #10 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d26, [x11] | |||
| st1 {v26.4h}, [x11], x8 | |||
| cmp x6, #11 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d27, [x11] | |||
| st1 {v27.4h}, [x11], x8 | |||
| cmp x6, #12 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d28, [x11] | |||
| st1 {v28.4h}, [x11], x8 | |||
| cmp x6, #13 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d29, [x11] | |||
| st1 {v29.4h}, [x11], x8 | |||
| cmp x6, #14 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d30, [x11] | |||
| st1 {v30.4h}, [x11], x8 | |||
| cmp x6, #15 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d31, [x11] | |||
| add x11, x11, x8 | |||
| st1 {v31.4h}, [x11], x8 | |||
| add x11, x11, #8 | |||
| b WriteEnd | |||
| Write5: | |||
| add x2, x2, #10 | |||
| add x19, x11, #8 | |||
| str d16, [x11] | |||
| st1 {v16.4h}, [x11], x8 | |||
| st1 {v16.h}[4], [x19], x8 | |||
| cmp x6, #1 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d17, [x11] | |||
| st1 {v17.4h}, [x11], x8 | |||
| st1 {v17.h}[4], [x19], x8 | |||
| cmp x6, #2 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d18, [x11] | |||
| st1 {v18.4h}, [x11], x8 | |||
| st1 {v18.h}[4], [x19], x8 | |||
| cmp x6, #3 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d19, [x11] | |||
| st1 {v19.4h}, [x11], x8 | |||
| st1 {v19.h}[4], [x19], x8 | |||
| cmp x6, #4 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d20, [x11] | |||
| st1 {v20.4h}, [x11], x8 | |||
| st1 {v20.h}[4], [x19], x8 | |||
| cmp x6, #5 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d21, [x11] | |||
| st1 {v21.4h}, [x11], x8 | |||
| st1 {v21.h}[4], [x19], x8 | |||
| cmp x6, #6 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d22, [x11] | |||
| st1 {v22.4h}, [x11], x8 | |||
| st1 {v22.h}[4], [x19], x8 | |||
| cmp x6, #7 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d23, [x11] | |||
| st1 {v23.4h}, [x11], x8 | |||
| st1 {v23.h}[4], [x19], x8 | |||
| cmp x6, #8 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d24, [x11] | |||
| st1 {v24.4h}, [x11], x8 | |||
| st1 {v24.h}[4], [x19], x8 | |||
| cmp x6, #9 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d25, [x11] | |||
| st1 {v25.4h}, [x11], x8 | |||
| st1 {v25.h}[4], [x19], x8 | |||
| cmp x6, #10 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d26, [x11] | |||
| st1 {v26.4h}, [x11], x8 | |||
| st1 {v26.h}[4], [x19], x8 | |||
| cmp x6, #11 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d27, [x11] | |||
| st1 {v27.4h}, [x11], x8 | |||
| st1 {v27.h}[4], [x19], x8 | |||
| cmp x6, #12 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d28, [x11] | |||
| st1 {v28.4h}, [x11], x8 | |||
| st1 {v28.h}[4], [x19], x8 | |||
| cmp x6, #13 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d29, [x11] | |||
| st1 {v29.4h}, [x11], x8 | |||
| st1 {v29.h}[4], [x19], x8 | |||
| cmp x6, #14 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d30, [x11] | |||
| st1 {v30.4h}, [x11], x8 | |||
| st1 {v30.h}[4], [x19], x8 | |||
| cmp x6, #15 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d31, [x11] | |||
| st1 {v31.4h}, [x11], x8 | |||
| st1 {v31.h}[4], [x19] | |||
| add x11, x11, x8 | |||
| add x11, x11, #10 | |||
| b WriteEnd | |||
| Write6: | |||
| add x2, x2, #12 | |||
| add x19, x11, #8 | |||
| add x20, x11, #10 | |||
| str d16, [x11] | |||
| st1 {v16.4h}, [x11], x8 | |||
| st1 {v16.h}[4], [x19], x8 | |||
| st1 {v16.h}[5], [x20], x8 | |||
| cmp x6, #1 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d17, [x11] | |||
| st1 {v17.4h}, [x11], x8 | |||
| st1 {v17.h}[4], [x19], x8 | |||
| st1 {v17.h}[5], [x20], x8 | |||
| cmp x6, #2 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d18, [x11] | |||
| st1 {v18.4h}, [x11], x8 | |||
| st1 {v18.h}[4], [x19], x8 | |||
| st1 {v18.h}[5], [x20], x8 | |||
| cmp x6, #3 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d19, [x11] | |||
| st1 {v19.4h}, [x11], x8 | |||
| st1 {v19.h}[4], [x19], x8 | |||
| st1 {v19.h}[5], [x20], x8 | |||
| cmp x6, #4 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d20, [x11] | |||
| st1 {v20.4h}, [x11], x8 | |||
| st1 {v20.h}[4], [x19], x8 | |||
| st1 {v20.h}[5], [x20], x8 | |||
| cmp x6, #5 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d21, [x11] | |||
| st1 {v21.4h}, [x11], x8 | |||
| st1 {v21.h}[4], [x19], x8 | |||
| st1 {v21.h}[5], [x20], x8 | |||
| cmp x6, #6 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d22, [x11] | |||
| st1 {v22.4h}, [x11], x8 | |||
| st1 {v22.h}[4], [x19], x8 | |||
| st1 {v22.h}[5], [x20], x8 | |||
| cmp x6, #7 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d23, [x11] | |||
| st1 {v23.4h}, [x11], x8 | |||
| st1 {v23.h}[4], [x19], x8 | |||
| st1 {v23.h}[5], [x20], x8 | |||
| cmp x6, #8 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d24, [x11] | |||
| st1 {v24.4h}, [x11], x8 | |||
| st1 {v24.h}[4], [x19], x8 | |||
| st1 {v24.h}[5], [x20], x8 | |||
| cmp x6, #9 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d25, [x11] | |||
| st1 {v25.4h}, [x11], x8 | |||
| st1 {v25.h}[4], [x19], x8 | |||
| st1 {v25.h}[5], [x20], x8 | |||
| cmp x6, #10 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d26, [x11] | |||
| st1 {v26.4h}, [x11], x8 | |||
| st1 {v26.h}[4], [x19], x8 | |||
| st1 {v26.h}[5], [x20], x8 | |||
| cmp x6, #11 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d27, [x11] | |||
| st1 {v27.4h}, [x11], x8 | |||
| st1 {v27.h}[4], [x19], x8 | |||
| st1 {v27.h}[5], [x20], x8 | |||
| cmp x6, #12 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d28, [x11] | |||
| st1 {v28.4h}, [x11], x8 | |||
| st1 {v28.h}[4], [x19], x8 | |||
| st1 {v28.h}[5], [x20], x8 | |||
| cmp x6, #13 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d29, [x11] | |||
| st1 {v29.4h}, [x11], x8 | |||
| st1 {v29.h}[4], [x19], x8 | |||
| st1 {v29.h}[5], [x20], x8 | |||
| cmp x6, #14 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d30, [x11] | |||
| st1 {v30.4h}, [x11], x8 | |||
| st1 {v30.h}[4], [x19], x8 | |||
| st1 {v30.h}[5], [x20], x8 | |||
| cmp x6, #15 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d31, [x11] | |||
| st1 {v31.4h}, [x11], x8 | |||
| st1 {v31.h}[4], [x19] | |||
| st1 {v31.h}[5], [x20] | |||
| add x11, x11, x8 | |||
| add x11, x11, #12 | |||
| b WriteEnd | |||
| Write7: | |||
| @@ -1078,116 +1032,100 @@ LoopRow: | |||
| add x19, x11, #8 | |||
| add x20, x11, #10 | |||
| add x10, x11, #12 | |||
| str d16, [x11] | |||
| st1 {v16.4h}, [x11], x8 | |||
| st1 {v16.h}[4], [x19], x8 | |||
| st1 {v16.h}[5], [x20], x8 | |||
| st1 {v16.h}[6], [x10], x8 | |||
| cmp x6, #1 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d17, [x11] | |||
| st1 {v17.4h}, [x11], x8 | |||
| st1 {v17.h}[4], [x19], x8 | |||
| st1 {v17.h}[5], [x20], x8 | |||
| st1 {v17.h}[6], [x10], x8 | |||
| cmp x6, #2 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d18, [x11] | |||
| st1 {v18.4h}, [x11], x8 | |||
| st1 {v18.h}[4], [x19], x8 | |||
| st1 {v18.h}[5], [x20], x8 | |||
| st1 {v18.h}[6], [x10], x8 | |||
| cmp x6, #3 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d19, [x11] | |||
| st1 {v19.4h}, [x11], x8 | |||
| st1 {v19.h}[4], [x19], x8 | |||
| st1 {v19.h}[5], [x20], x8 | |||
| st1 {v19.h}[6], [x10], x8 | |||
| cmp x6, #4 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d20, [x11] | |||
| st1 {v20.4h}, [x11], x8 | |||
| st1 {v20.h}[4], [x19], x8 | |||
| st1 {v20.h}[5], [x20], x8 | |||
| st1 {v20.h}[6], [x10], x8 | |||
| cmp x6, #5 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d21, [x11] | |||
| st1 {v21.4h}, [x11], x8 | |||
| st1 {v21.h}[4], [x19], x8 | |||
| st1 {v21.h}[5], [x20], x8 | |||
| st1 {v21.h}[6], [x10], x8 | |||
| cmp x6, #6 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d22, [x11] | |||
| st1 {v22.4h}, [x11], x8 | |||
| st1 {v22.h}[4], [x19], x8 | |||
| st1 {v22.h}[5], [x20], x8 | |||
| st1 {v22.h}[6], [x10], x8 | |||
| cmp x6, #7 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d23, [x11] | |||
| st1 {v23.4h}, [x11], x8 | |||
| st1 {v23.h}[4], [x19], x8 | |||
| st1 {v23.h}[5], [x20], x8 | |||
| st1 {v23.h}[6], [x10], x8 | |||
| cmp x6, #8 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d24, [x11] | |||
| st1 {v24.4h}, [x11], x8 | |||
| st1 {v24.h}[4], [x19], x8 | |||
| st1 {v24.h}[5], [x20], x8 | |||
| st1 {v24.h}[6], [x10], x8 | |||
| cmp x6, #9 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d25, [x11] | |||
| st1 {v25.4h}, [x11], x8 | |||
| st1 {v25.h}[4], [x19], x8 | |||
| st1 {v25.h}[5], [x20], x8 | |||
| st1 {v25.h}[6], [x10], x8 | |||
| cmp x6, #10 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d26, [x11] | |||
| st1 {v26.4h}, [x11], x8 | |||
| st1 {v26.h}[4], [x19], x8 | |||
| st1 {v26.h}[5], [x20], x8 | |||
| st1 {v26.h}[6], [x10], x8 | |||
| cmp x6, #11 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d27, [x11] | |||
| st1 {v27.4h}, [x11], x8 | |||
| st1 {v27.h}[4], [x19], x8 | |||
| st1 {v27.h}[5], [x20], x8 | |||
| st1 {v27.h}[6], [x10], x8 | |||
| cmp x6, #12 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d28, [x11] | |||
| st1 {v28.4h}, [x11], x8 | |||
| st1 {v28.h}[4], [x19], x8 | |||
| st1 {v28.h}[5], [x20], x8 | |||
| st1 {v28.h}[6], [x10], x8 | |||
| cmp x6, #13 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d29, [x11] | |||
| st1 {v29.4h}, [x11], x8 | |||
| st1 {v29.h}[4], [x19], x8 | |||
| st1 {v29.h}[5], [x20], x8 | |||
| st1 {v29.h}[6], [x10], x8 | |||
| cmp x6, #14 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d30, [x11] | |||
| st1 {v30.4h}, [x11], x8 | |||
| st1 {v30.h}[4], [x19], x8 | |||
| st1 {v30.h}[5], [x20], x8 | |||
| st1 {v30.h}[6], [x10], x8 | |||
| cmp x6, #15 | |||
| beq WriteEnd | |||
| add x11, x11, x8 | |||
| str d31, [x11] | |||
| st1 {v31.4h}, [x11], x8 | |||
| st1 {v31.h}[4], [x19] | |||
| st1 {v31.h}[5], [x20] | |||
| st1 {v31.h}[6], [x10] | |||
| add x11, x11, x8 | |||
| add x11, x11, #14 | |||
| b WriteEnd | |||
| WriteC8: | |||
| @@ -30,8 +30,8 @@ constexpr auto kModelOptionKirinNpuFrequency = "mindspore.option.kirin_npu.frequ | |||
| struct Context::Data { | |||
| std::vector<std::shared_ptr<DeviceInfoContext>> device_info_list; | |||
| int32_t thread_num; | |||
| std::shared_ptr<Allocator> allocator; | |||
| int32_t thread_num = 2; | |||
| std::shared_ptr<Allocator> allocator = nullptr; | |||
| }; | |||
| struct DeviceInfoContext::Data { | |||
| @@ -74,7 +74,7 @@ class Tensor : public mindspore::tensor::MSTensor { | |||
| virtual bool operator==(const Tensor &tensor); | |||
| void set_tensor_name(std::string name) override { tensor_name_ = name; } | |||
| void set_tensor_name(const std::string &name) override { tensor_name_ = name; } | |||
| std::string tensor_name() const override { return tensor_name_; } | |||