| @@ -453,6 +453,16 @@ if(MSVC OR WIN32) | |||||
| # FIXME: fix MegRay on windows | # FIXME: fix MegRay on windows | ||||
| message(STATUS "Disable distributed build on windows host build...") | message(STATUS "Disable distributed build on windows host build...") | ||||
| set(MGE_WITH_DISTRIBUTED OFF) | set(MGE_WITH_DISTRIBUTED OFF) | ||||
| if(${MGE_ARCH} STREQUAL "i386" AND ${CMAKE_BUILD_TYPE} STREQUAL "Debug") | |||||
| # https://docs.microsoft.com/en-us/cpp/build/reference/z7-zi-zi-debug-information-format?view=msvc-170 | |||||
| # Workround for error LNK1318 | |||||
| message( | |||||
| STATUS | |||||
| "force use full symbolic debugging with build for 32bit for Windows with Debug mode" | |||||
| ) | |||||
| set(CMAKE_C_FLAGS_DEBUG "/Z7") | |||||
| set(CMAKE_CXX_FLAGS_DEBUG "/Z7") | |||||
| endif() | |||||
| else() | else() | ||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") | ||||
| @@ -290,6 +290,13 @@ struct InputTransform5X4 { | |||||
| CONCAT(s, 4).mla(m1addm2, 0.0625f).add(m3addm4).mla(m5addm6, 16.0f); \ | CONCAT(s, 4).mla(m1addm2, 0.0625f).add(m3addm4).mla(m5addm6, 16.0f); \ | ||||
| } while (0) | } while (0) | ||||
| #if defined(__GNUC__) && !defined(__llvm__) && !defined(_MSC_VER) | |||||
| #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) | |||||
| #if GCC_VERSION < 80000 | |||||
| #pragma GCC push_options | |||||
| #pragma GCC optimize("O0") | |||||
| #endif | |||||
| #endif | |||||
| template <BiasMode bmode, typename Op> | template <BiasMode bmode, typename Op> | ||||
| struct OutputTransform5X4 { | struct OutputTransform5X4 { | ||||
| static void transform( | static void transform( | ||||
| @@ -393,6 +400,12 @@ struct OutputTransform5X4 { | |||||
| } | } | ||||
| } | } | ||||
| }; | }; | ||||
| #if defined(__GNUC__) && !defined(__llvm__) && !defined(_MSC_VER) | |||||
| #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) | |||||
| #if GCC_VERSION < 80000 | |||||
| #pragma GCC pop_options | |||||
| #endif | |||||
| #endif | |||||
| #undef OUTPUT_TRANSFORM | #undef OUTPUT_TRANSFORM | ||||
| #undef GET_VECTOR_HIGH_ELEM | #undef GET_VECTOR_HIGH_ELEM | ||||
| #undef GET_VECTOR_LOW_ELEM | #undef GET_VECTOR_LOW_ELEM | ||||
| @@ -207,6 +207,13 @@ struct InputTransform6X3 { | |||||
| CONCAT(s, 5).mla(m3subm4, 32.f).add(m5subm6).add(m##7); \ | CONCAT(s, 5).mla(m3subm4, 32.f).add(m5subm6).add(m##7); \ | ||||
| } while (0); | } while (0); | ||||
| #if defined(__GNUC__) && !defined(__llvm__) && !defined(_MSC_VER) | |||||
| #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) | |||||
| #if GCC_VERSION < 80000 | |||||
| #pragma GCC push_options | |||||
| #pragma GCC optimize("O0") | |||||
| #endif | |||||
| #endif | |||||
| template <BiasMode bmode, typename Op> | template <BiasMode bmode, typename Op> | ||||
| struct OutputTransform6X3 { | struct OutputTransform6X3 { | ||||
| static void transform( | static void transform( | ||||
| @@ -318,6 +325,12 @@ struct OutputTransform6X3 { | |||||
| } | } | ||||
| } | } | ||||
| }; | }; | ||||
| #if defined(__GNUC__) && !defined(__llvm__) && !defined(_MSC_VER) | |||||
| #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) | |||||
| #if GCC_VERSION < 80000 | |||||
| #pragma GCC pop_options | |||||
| #endif | |||||
| #endif | |||||
| #undef GET_VECTOR_HIGH_ELEM | #undef GET_VECTOR_HIGH_ELEM | ||||
| #undef GET_VECTOR_LOW_ELEM | #undef GET_VECTOR_LOW_ELEM | ||||
| @@ -905,7 +905,7 @@ TEST_F(FALLBACK, GiMultiplyAddFloat32) { | |||||
| naive.push_back(s1[i] * s2[i] + s0[i]); | naive.push_back(s1[i] * s2[i] + s0[i]); | ||||
| } | } | ||||
| assert_eq((float*)&ret, naive); | |||||
| assert_lt((float*)&ret, naive, 1e-3); | |||||
| } | } | ||||
| TEST_F(FALLBACK, GiMultiplyAddScalarFloat32) { | TEST_F(FALLBACK, GiMultiplyAddScalarFloat32) { | ||||
| @@ -2793,7 +2793,7 @@ TEST_F(FALLBACK, GiMaximumInt8) { | |||||
| for (size_t i = 0; i < SIMD_LEN_8; i++) { | for (size_t i = 0; i < SIMD_LEN_8; i++) { | ||||
| s2.push_back(s1[i] < s0[i] ? 0xFF : 0); | s2.push_back(s1[i] < s0[i] ? 0xFF : 0); | ||||
| } | } | ||||
| s2.resize(SIMD_LEN); | |||||
| s2.resize(SIMD_LEN_8); | |||||
| init((int8_t*)&src2, s2, SIMD_LEN_8); | init((int8_t*)&src2, s2, SIMD_LEN_8); | ||||
| ret = GiMaximumInt8(src0, src1); | ret = GiMaximumInt8(src0, src1); | ||||
| @@ -2853,7 +2853,7 @@ TEST_F(FALLBACK, GiMinimumInt8) { | |||||
| for (size_t i = 0; i < SIMD_LEN_8; i++) { | for (size_t i = 0; i < SIMD_LEN_8; i++) { | ||||
| s2.push_back(s1[i] > s0[i] ? 0xFF : 0); | s2.push_back(s1[i] > s0[i] ? 0xFF : 0); | ||||
| } | } | ||||
| s2.resize(SIMD_LEN); | |||||
| s2.resize(SIMD_LEN_8); | |||||
| init((int8_t*)&src2, s2, SIMD_LEN_8); | init((int8_t*)&src2, s2, SIMD_LEN_8); | ||||
| ret = GiMinimumInt8(src0, src1); | ret = GiMinimumInt8(src0, src1); | ||||