Browse Source

Fix NCNN_SIMPLESTL compatibility and improve bit shift safety

- Add conditional header includes for uint64_t in all build modes
- Include <stdint.h> in SIMPLESTL mode, <cstdint> in normal mode
- Move standard library headers to conditional compilation blocks
- Fix unsafe bit shift operations that could cause undefined behavior
- Ensure >64 CPU support works correctly in both SIMPLESTL and normal modes
- Tested successfully in NCNN_SIMPLESTL=ON mode
pull/6185/head
KRT nihui 11 months ago
parent
commit
252f30680e
3 changed files with 49 additions and 24 deletions
  1. +44
    -23
      src/cpu.cpp
  2. +0
    -1
      src/cpu.h
  3. +5
    -0
      src/platform.h.in

+ 44
- 23
src/cpu.cpp View File

@@ -5,7 +5,6 @@

#include "platform.h"

#include <cstdint>
#include <limits.h>
#ifndef __wasi__
#include <setjmp.h>
@@ -14,9 +13,13 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#if !NCNN_SIMPLESTL
#include <algorithm>
#include <cstdint>
#include <utility>
#include <vector>
#endif

#ifdef _OPENMP
#if NCNN_SIMPLEOMP
@@ -2422,41 +2425,41 @@ namespace ncnn {

// New unified CpuSet implementation supporting >64 CPUs
CpuSet::CpuSet()
: fast_mask(0), extended_mask(nullptr), extended_capacity(0), use_extended(false)
: fast_mask(0)
, extended_mask(nullptr)
, extended_capacity(0)
, use_extended(false)
#if defined _WIN32
,
legacy_mask_cache(0),
legacy_mask_valid(false)
, legacy_mask_cache(0)
, legacy_mask_valid(false)
#endif
#if defined __ANDROID__ || defined __linux__
,
cpu_set_cache(nullptr),
cpu_set_valid(false)
, cpu_set_cache(nullptr)
, cpu_set_valid(false)
#endif
#if __APPLE__
,
legacy_policy_cache(0),
legacy_policy_valid(false)
, legacy_policy_cache(0)
, legacy_policy_valid(false)
#endif
{
}

CpuSet::CpuSet(const CpuSet& other)
: fast_mask(0), extended_mask(nullptr), extended_capacity(0), use_extended(false)
: fast_mask(0)
, extended_mask(nullptr)
, extended_capacity(0)
, use_extended(false)
#if defined _WIN32
,
legacy_mask_cache(0),
legacy_mask_valid(false)
, legacy_mask_cache(0)
, legacy_mask_valid(false)
#endif
#if defined __ANDROID__ || defined __linux__
,
cpu_set_cache(nullptr),
cpu_set_valid(false)
, cpu_set_cache(nullptr)
, cpu_set_valid(false)
#endif
#if __APPLE__
,
legacy_policy_cache(0),
legacy_policy_valid(false)
, legacy_policy_cache(0)
, legacy_policy_valid(false)
#endif
{
copy_from(other);
@@ -2781,12 +2784,30 @@ ULONG_PTR CpuSet::get_legacy_mask() const
if (!use_extended)
{
// Fast path: directly use fast_mask (truncated to ULONG_PTR size)
legacy_mask_cache = (ULONG_PTR)(fast_mask & ((1ULL << (sizeof(ULONG_PTR) * 8)) - 1));
if (sizeof(ULONG_PTR) >= sizeof(uint64_t))
{
legacy_mask_cache = (ULONG_PTR)fast_mask;
}
else
{
// Create mask for ULONG_PTR size without undefined behavior
const uint64_t ptr_mask = (sizeof(ULONG_PTR) == 4) ? 0xFFFFFFFFULL : 0xFFFFFFFFFFFFFFFFULL;
legacy_mask_cache = (ULONG_PTR)(fast_mask & ptr_mask);
}
}
else if (extended_mask && extended_capacity > 0)
{
// Extended path: use first word, truncated to ULONG_PTR size
legacy_mask_cache = (ULONG_PTR)(extended_mask[0] & ((1ULL << (sizeof(ULONG_PTR) * 8)) - 1));
if (sizeof(ULONG_PTR) >= sizeof(uint64_t))
{
legacy_mask_cache = (ULONG_PTR)extended_mask[0];
}
else
{
// Create mask for ULONG_PTR size without undefined behavior
const uint64_t ptr_mask = (sizeof(ULONG_PTR) == 4) ? 0xFFFFFFFFULL : 0xFFFFFFFFFFFFFFFFULL;
legacy_mask_cache = (ULONG_PTR)(extended_mask[0] & ptr_mask);
}
}

legacy_mask_valid = true;


+ 0
- 1
src/cpu.h View File

@@ -5,7 +5,6 @@
#define NCNN_CPU_H

#include <stddef.h>
#include <cstdint>

#if defined _WIN32
#define WIN32_LEAN_AND_MEAN


+ 5
- 0
src/platform.h.in View File

@@ -4,7 +4,12 @@
#ifndef NCNN_PLATFORM_H
#define NCNN_PLATFORM_H

// Ensure basic integer types are available in all modes
#if NCNN_SIMPLESTL
#include <stdint.h>
#else
#include <cstdint>
#endif

#cmakedefine01 NCNN_STDIO
#cmakedefine01 NCNN_STRING


Loading…
Cancel
Save