Browse Source

Fix NCNN_SIMPLESTL compatibility and improve bit shift safety

- Add conditional header includes for uint64_t in all build modes
- Include <stdint.h> in SIMPLESTL mode, <cstdint> in normal mode
- Move standard library headers to conditional compilation blocks
- Fix unsafe bit shift operations that could cause undefined behavior
- Ensure >64 CPU support works correctly in both SIMPLESTL and normal modes
- Tested successfully in NCNN_SIMPLESTL=ON mode
pull/6185/head
KRT nihui 11 months ago
parent
commit
252f30680e
3 changed files with 49 additions and 24 deletions
  1. +44
    -23
      src/cpu.cpp
  2. +0
    -1
      src/cpu.h
  3. +5
    -0
      src/platform.h.in

+ 44
- 23
src/cpu.cpp View File

@@ -5,7 +5,6 @@


#include "platform.h" #include "platform.h"


#include <cstdint>
#include <limits.h> #include <limits.h>
#ifndef __wasi__ #ifndef __wasi__
#include <setjmp.h> #include <setjmp.h>
@@ -14,9 +13,13 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>

#if !NCNN_SIMPLESTL
#include <algorithm> #include <algorithm>
#include <cstdint>
#include <utility> #include <utility>
#include <vector> #include <vector>
#endif


#ifdef _OPENMP #ifdef _OPENMP
#if NCNN_SIMPLEOMP #if NCNN_SIMPLEOMP
@@ -2422,41 +2425,41 @@ namespace ncnn {


// New unified CpuSet implementation supporting >64 CPUs // New unified CpuSet implementation supporting >64 CPUs
CpuSet::CpuSet() CpuSet::CpuSet()
: fast_mask(0), extended_mask(nullptr), extended_capacity(0), use_extended(false)
: fast_mask(0)
, extended_mask(nullptr)
, extended_capacity(0)
, use_extended(false)
#if defined _WIN32 #if defined _WIN32
,
legacy_mask_cache(0),
legacy_mask_valid(false)
, legacy_mask_cache(0)
, legacy_mask_valid(false)
#endif #endif
#if defined __ANDROID__ || defined __linux__ #if defined __ANDROID__ || defined __linux__
,
cpu_set_cache(nullptr),
cpu_set_valid(false)
, cpu_set_cache(nullptr)
, cpu_set_valid(false)
#endif #endif
#if __APPLE__ #if __APPLE__
,
legacy_policy_cache(0),
legacy_policy_valid(false)
, legacy_policy_cache(0)
, legacy_policy_valid(false)
#endif #endif
{ {
} }


CpuSet::CpuSet(const CpuSet& other) CpuSet::CpuSet(const CpuSet& other)
: fast_mask(0), extended_mask(nullptr), extended_capacity(0), use_extended(false)
: fast_mask(0)
, extended_mask(nullptr)
, extended_capacity(0)
, use_extended(false)
#if defined _WIN32 #if defined _WIN32
,
legacy_mask_cache(0),
legacy_mask_valid(false)
, legacy_mask_cache(0)
, legacy_mask_valid(false)
#endif #endif
#if defined __ANDROID__ || defined __linux__ #if defined __ANDROID__ || defined __linux__
,
cpu_set_cache(nullptr),
cpu_set_valid(false)
, cpu_set_cache(nullptr)
, cpu_set_valid(false)
#endif #endif
#if __APPLE__ #if __APPLE__
,
legacy_policy_cache(0),
legacy_policy_valid(false)
, legacy_policy_cache(0)
, legacy_policy_valid(false)
#endif #endif
{ {
copy_from(other); copy_from(other);
@@ -2781,12 +2784,30 @@ ULONG_PTR CpuSet::get_legacy_mask() const
if (!use_extended) if (!use_extended)
{ {
// Fast path: directly use fast_mask (truncated to ULONG_PTR size) // Fast path: directly use fast_mask (truncated to ULONG_PTR size)
legacy_mask_cache = (ULONG_PTR)(fast_mask & ((1ULL << (sizeof(ULONG_PTR) * 8)) - 1));
if (sizeof(ULONG_PTR) >= sizeof(uint64_t))
{
legacy_mask_cache = (ULONG_PTR)fast_mask;
}
else
{
// Create mask for ULONG_PTR size without undefined behavior
const uint64_t ptr_mask = (sizeof(ULONG_PTR) == 4) ? 0xFFFFFFFFULL : 0xFFFFFFFFFFFFFFFFULL;
legacy_mask_cache = (ULONG_PTR)(fast_mask & ptr_mask);
}
} }
else if (extended_mask && extended_capacity > 0) else if (extended_mask && extended_capacity > 0)
{ {
// Extended path: use first word, truncated to ULONG_PTR size // Extended path: use first word, truncated to ULONG_PTR size
legacy_mask_cache = (ULONG_PTR)(extended_mask[0] & ((1ULL << (sizeof(ULONG_PTR) * 8)) - 1));
if (sizeof(ULONG_PTR) >= sizeof(uint64_t))
{
legacy_mask_cache = (ULONG_PTR)extended_mask[0];
}
else
{
// Create mask for ULONG_PTR size without undefined behavior
const uint64_t ptr_mask = (sizeof(ULONG_PTR) == 4) ? 0xFFFFFFFFULL : 0xFFFFFFFFFFFFFFFFULL;
legacy_mask_cache = (ULONG_PTR)(extended_mask[0] & ptr_mask);
}
} }


legacy_mask_valid = true; legacy_mask_valid = true;


+ 0
- 1
src/cpu.h View File

@@ -5,7 +5,6 @@
#define NCNN_CPU_H #define NCNN_CPU_H


#include <stddef.h> #include <stddef.h>
#include <cstdint>


#if defined _WIN32 #if defined _WIN32
#define WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN


+ 5
- 0
src/platform.h.in View File

@@ -4,7 +4,12 @@
#ifndef NCNN_PLATFORM_H #ifndef NCNN_PLATFORM_H
#define NCNN_PLATFORM_H #define NCNN_PLATFORM_H


// Ensure basic integer types are available in all modes
#if NCNN_SIMPLESTL
#include <stdint.h>
#else
#include <cstdint> #include <cstdint>
#endif


#cmakedefine01 NCNN_STDIO #cmakedefine01 NCNN_STDIO
#cmakedefine01 NCNN_STRING #cmakedefine01 NCNN_STRING


Loading…
Cancel
Save