!11139 [MD][BugFix] MindData remove the compile link of _c_dataengine and numa

From: @xiefangqi Reviewed-by: Signed-off-by:
5 years ago · 0e03630503
--- a/mindspore/ccsrc/minddata/dataset/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/CMakeLists.txt
@@ -35,26 +35,6 @@ if (MS_BUILD_GRPC)
    message(STATUS "Cache is enabled")
 endif()

 if (${CMAKE_SYSTEM_NAME} MATCHES "Linux")
    # Try to find numa header file and its library
    FIND_PATH(NUMA_INCLUDE_DIR numa.h)
    MESSAGE("Numa include dir is: ${NUMA_INCLUDE_DIR}")

    FIND_LIBRARY(NUMA_LIBRARY NAMES libnuma.so)
    MESSAGE("Numa library is: ${NUMA_LIBRARY}")

    FIND_PACKAGE_HANDLE_STANDARD_ARGS(NUMA DEFAULT_MSG
            NUMA_INCLUDE_DIR
            NUMA_LIBRARY
            )
    if (NUMA_FOUND)
        ADD_DEFINITIONS(-DNUMA_ENABLED)
        MESSAGE("Numa package found")
    else()
        MESSAGE(FATAL_ERROR "Numa package not found, try 'sudo yum install numactl-devel' or 'sudo apt-get install libnuma-dev'")
    endif()
 endif ()

 # conde coverage
 # option(ENABLE_COVERAGE "Enable code coverage report" OFF)
 # if (ENABLE_COVERAGE)
@@ -269,9 +249,6 @@ else ()
        target_link_libraries(_c_dataengine PRIVATE -ldl ${SECUREC_LIBRARY})
    endif ()
    target_link_libraries(_c_dataengine PUBLIC mindspore::sentencepiece)
    if (NUMA_FOUND)
        target_link_libraries(_c_dataengine PUBLIC numa)
    endif()
 endif()

 target_link_libraries(_c_dataengine PUBLIC mindspore::jpeg_turbo mindspore::turbojpeg mindspore::opencv_core mindspore::opencv_imgcodecs
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/core/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/core/bindings.cc
@@ -39,6 +39,8 @@ PYBIND_REGISTER(ConfigManager, 0, ([](const py::module *m) {
                    .def("get_callback_timeout", &ConfigManager::callback_timeout)
                    .def("get_monitor_sampling_interval", &ConfigManager::monitor_sampling_interval)
                    .def("get_num_parallel_workers", &ConfigManager::num_parallel_workers)
                    .def("get_numa_enable", &ConfigManager::numa_enable)
                    .def("set_numa_enable", &ConfigManager::set_numa_enable)
                    .def("get_op_connector_size", &ConfigManager::op_connector_size)
                    .def("get_rows_per_buffer", &ConfigManager::rows_per_buffer)
                    .def("get_seed", &ConfigManager::seed)
--- a/mindspore/ccsrc/minddata/dataset/core/config_manager.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/config_manager.cc
@@ -37,6 +37,7 @@ ConfigManager::ConfigManager()
      op_connector_size_(kCfgOpConnectorSize),
      rank_id_(kCfgDefaultRankId),
      seed_(kCfgDefaultSeed),
      numa_enable_(false),
      monitor_sampling_interval_(kCfgMonitorSamplingInterval),
      callback_timout_(kCfgCallbackTimeout),
      cache_host_(kCfgDefaultCacheHost),
@@ -131,6 +132,8 @@ uint32_t ConfigManager::seed() const { return seed_; }

 void ConfigManager::set_rank_id(int32_t rank_id) { rank_id_ = rank_id; }

 void ConfigManager::set_numa_enable(bool numa_enable) { numa_enable_ = numa_enable; }

 void ConfigManager::set_seed(uint32_t seed) { seed_ = seed; }

 void ConfigManager::set_monitor_sampling_interval(uint32_t interval) { monitor_sampling_interval_ = interval; }
--- a/mindspore/ccsrc/minddata/dataset/core/config_manager.h
+++ b/mindspore/ccsrc/minddata/dataset/core/config_manager.h
@@ -143,6 +143,16 @@ class ConfigManager {
  /// \param prefetch_size
  void set_prefetch_size(int32_t prefetch_size);

  /// setter function
  /// \param numa_switch
  void set_numa_enable(bool numa_enable);

  /// getter function
  /// Now we want to seperate the numa link to _c_dataengine in the CMakeLists,
  /// so we want user to choose whether to open numa switch.
  /// @return Get the current numa switch state.
  bool numa_enable() const { return numa_enable_; }

  // getter function
  // This rank_id is for numa and device_queue, one process work with only one rank_id
  // for standalone scenario, this rank_id may come from env 'CUDA_VISIBLE_DEVICES',
@@ -217,6 +227,7 @@ class ConfigManager {
  std::string cache_host_;
  int32_t cache_port_;
  int32_t num_connections_;
  bool numa_enable_;
  int32_t prefetch_size_;
  bool auto_num_workers_;
  const int32_t num_cpu_threads_;
--- a/mindspore/ccsrc/minddata/dataset/engine/cache/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/CMakeLists.txt
@@ -6,6 +6,26 @@ ms_build_flatbuffers("de_tensor.fbs" ${CMAKE_CURRENT_SOURCE_DIR} generated_engin
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)

 if (${CMAKE_SYSTEM_NAME} MATCHES "Linux")
  # Try to find numa header file and its library
  FIND_PATH(NUMA_INCLUDE_DIR numa.h)
  MESSAGE("Numa include dir is: ${NUMA_INCLUDE_DIR}")

  FIND_LIBRARY(NUMA_LIBRARY NAMES libnuma.so)
  MESSAGE("Numa library is: ${NUMA_LIBRARY}")

  FIND_PACKAGE_HANDLE_STANDARD_ARGS(NUMA DEFAULT_MSG
          NUMA_INCLUDE_DIR
          NUMA_LIBRARY
          )
  if (NUMA_FOUND)
    ADD_DEFINITIONS(-DNUMA_ENABLED)
    MESSAGE("Numa package found")
  else()
    MESSAGE(FATAL_ERROR "Numa package not found, try 'sudo yum install numactl-devel' or 'sudo apt-get install libnuma-dev'")
  endif()
 endif ()

 if (NUMA_FOUND)
  ADD_DEFINITIONS(-DCACHE_LOCAL_CLIENT)
 endif ()
--- a/mindspore/ccsrc/minddata/dataset/engine/execution_tree.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/execution_tree.cc
@@ -18,13 +18,9 @@
 #include <string>
 #include <utility>
 #include <limits>
 #if defined(NUMA_ENABLED) && (defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE))
 #include <numa.h>
 #endif
 #include "minddata/dataset/engine/datasetops/dataset_op.h"
 #include "minddata/dataset/engine/datasetops/shuffle_op.h"
 #include "minddata/dataset/engine/datasetops/device_queue_op.h"
 #include "minddata/dataset/util/task_manager.h"
 #include "minddata/dataset/engine/opt/pass.h"
 #include "minddata/dataset/engine/opt/pre/removal_pass.h"
 #ifndef ENABLE_ANDROID
@@ -36,6 +32,10 @@
 #include "minddata/dataset/engine/opt/pre/epoch_injection_pass.h"
 #include "minddata/dataset/engine/perf/profiling.h"
 #include "minddata/dataset/engine/perf/monitor.h"
 #if defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE)
 #include "minddata/dataset/util/numa_interface.h"
 #endif
 #include "minddata/dataset/util/task_manager.h"

 namespace mindspore {
 namespace dataset {
@@ -43,19 +43,28 @@ namespace dataset {
 ExecutionTree::ExecutionTree() : id_count_(0), tree_state_(kDeTStateInit), prepare_flags_(kDePrepNone) {
  tg_ = std::make_unique<TaskGroup>();
  profiling_manager_ = std::make_unique<ProfilingManager>(this);
 #if defined(NUMA_ENABLED) && (defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE))
 #if defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE)
  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
  rank_id_ = cfg->rank_id();
  numa_enable_ = cfg->numa_enable();
  handle_ = nullptr;
 #endif
 }

 // Destructor
 ExecutionTree::~ExecutionTree() {
 #ifdef ENABLE_TDTQUE
 #if defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE)
  if (numa_enable_) {
    if (handle_ != nullptr) {
      ReleaseLibrary(handle_);
    }
  }
 #if defined(ENABLE_TDTQUE)
  DeviceQueueOp *op = dynamic_cast<DeviceQueueOp *>(root_.get());
  if (op != nullptr) {
    op->StopWaiting();
  }
 #endif
 #endif
  (void)tg_->ServiceStop();
 }
@@ -140,30 +149,26 @@ void ExecutionTree::PrintNode(std::ostream &out, const std::shared_ptr<DatasetOp
 // Start the execution of the tree
 Status ExecutionTree::Launch() {
  // opencv limit too many threads
 #ifndef ENABLE_ANDROID
 #if !defined(_WIN32) && !defined(_WIN64) && !defined(__APPLE__)
 #if defined(NUMA_ENABLED) && (defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE))
 #if !defined(_WIN32) && !defined(_WIN64) && !defined(__APPLE__) && !defined(ENABLE_ANDROID)
 #if defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE)
  // Here we do numa bind for performance optimization, as our test result,
  // if we do numa bind when get_dataset_size launch a tree, we'll get a
  // better performance than only we do numa bind at the time _To_Device
  // launch a tree. Our numa bind work is a process level bind, bind with
  // both cpu and memory and we choose numa_node with a polling logic:
  // numa_bind_id = rank_id_ % (numa_max_node() + 1)
  // Now we only test pass in GPU scenario, we've not tested D scenario,
  // without enough test we don't suggest numa feature open in D scenario
  int numa_node_max_id = numa_max_node();
  if (numa_node_max_id < 0) {
    RETURN_STATUS_UNEXPECTED("Get numa max node failed.");
  }
  if (rank_id_ >= 0) {
    uint32_t numa_bind_id = static_cast<uint32_t>(rank_id_ % (numa_node_max_id + 1));
    auto bm = numa_allocate_nodemask();
    numa_bitmask_clearall(bm);
    numa_bitmask_setbit(bm, numa_bind_id);
    numa_bind(bm);
    numa_bitmask_free(bm);
  } else {
    MS_LOG(INFO) << "Numa bind feature doesn't work now.";
  // Now we only support GPU scenario and the single process scenario of Ascend,
  // now we remove the target_link of numa with _c_dataengine, and user can use
  // a config api to control whether to open numa feature.
  if (numa_enable_ && rank_id_ >= 0) {
    if (handle_ == nullptr) {
      handle_ = GetNumaAdapterHandle();
      if (handle_ == nullptr) {
        RETURN_STATUS_UNEXPECTED("Numa package not found.");
      }
    }
    RETURN_IF_NOT_OK(NumaBind(handle_, rank_id_));
    MS_LOG(INFO) << "Numa bind memory and cpu successful.";
  }
 #endif
  int32_t thread_num = get_nprocs();
@@ -176,7 +181,7 @@ Status ExecutionTree::Launch() {
  else
    cv::setNumThreads(thread_num);
 #endif
 #endif

  // Tree must be built and prepared before it can be launched!
  if (tree_state_ != kDeTStateReady) {
    std::string err_msg =
--- a/mindspore/ccsrc/minddata/dataset/engine/execution_tree.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/execution_tree.h
@@ -257,11 +257,13 @@ class ExecutionTree {
  int32_t num_epochs_;                                   // Total number of epochs to run for this tree
  std::unique_ptr<ProfilingManager> profiling_manager_;  // Profiling manager
  bool partially_prepare_;                               // Temp: during migration to IR, if true, run remaining passes.
 #if defined(NUMA_ENABLED) && (defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE))
 #if defined(ENABLE_GPUQUE) || defined(ENABLE_TDTQUE)
  // This rank_id is for numa and device_queue, one process work with only one rank_id,
  // for standalone scenario, this rank_id may come from env 'CUDA_VISIBLE_DEVICES',
  // but for distribute scenario, this rank_id come from _get_global_rank() in python
  int32_t rank_id_;
  bool numa_enable_;
  void *handle_;
 #endif
 };
 }  // namespace dataset
--- a/mindspore/ccsrc/minddata/dataset/util/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/util/CMakeLists.txt
@@ -1,21 +1,6 @@
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 add_library(utils OBJECT
    arena.cc
    buddy.cc
    circular_pool.cc
    data_helper.cc
    memory_pool.cc
    cond_var.cc
    intrp_service.cc
    task.cc
    task_manager.cc
    service.cc
    services.cc
    lock.cc
    semaphore.cc
    status.cc
    slice.cc
    path.cc
    wait_post.cc
    sig_handler.cc)
 if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Linux")
  LIST(REMOVE_ITEM _CURRENT_SRC_FILES numa_interface.cc)
 endif()
 add_library(utils OBJECT ${_CURRENT_SRC_FILES})
--- a/mindspore/ccsrc/minddata/dataset/util/numa_interface.cc
+++ b/mindspore/ccsrc/minddata/dataset/util/numa_interface.cc
@@ -0,0 +1,100 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "minddata/dataset/util/numa_interface.h"
 #include <dlfcn.h>

 namespace mindspore {
 namespace dataset {
 inline void *LoadLibrary(const char *name) {
  auto handle = dlopen(name, RTLD_LAZY | RTLD_LOCAL);
  return handle;
 }

 inline void *GetNumaAdapterFunc(void *handle, const char *name) {
  void *func = dlsym(handle, name);
  return func;
 }

 void ReleaseLibrary(void *handle) {
  if (handle != nullptr) {
    (void)dlclose(handle);
  }
 }

 void *GetNumaAdapterHandle() {
  void *handle = LoadLibrary("libnuma.so");
  return handle;
 }

 typedef int (*GetNumaMaxNodeFunc)(void);
 typedef struct bitmask *(*NumaAllocateNodemaskFunc)(void);
 typedef struct bitmask *(*NumaBitmaskClearallFunc)(struct bitmask *);
 typedef struct bitmask *(*NumaBitmaskSetbitFunc)(struct bitmask *, unsigned int);
 typedef void (*NumaBindFunc)(struct bitmask *);
 typedef void (*NumaBitmaskFreeFunc)(struct bitmask *);

 Status NumaBind(void *handle, const int32_t &rank_id) {
  if (handle == nullptr) {
    RETURN_STATUS_UNEXPECTED("Numa package not found.");
  }
  auto numa_max_node_func_pointer = GetNumaAdapterFunc(handle, "numa_max_node");
  if (numa_max_node_func_pointer == nullptr) {
    RETURN_STATUS_UNEXPECTED("Numa api: numa_max_node not found.");
  }
  auto numa_allocate_nodemask_func_pointer = GetNumaAdapterFunc(handle, "numa_allocate_nodemask");
  if (numa_allocate_nodemask_func_pointer == nullptr) {
    RETURN_STATUS_UNEXPECTED("Numa api: numa_allocate_nodemask not found.");
  }
  auto numa_bitmask_clearall_func_pointer = GetNumaAdapterFunc(handle, "numa_bitmask_clearall");
  if (numa_bitmask_clearall_func_pointer == nullptr) {
    RETURN_STATUS_UNEXPECTED("Numa api: numa_bitmask_clearall not found.");
  }
  auto numa_bitmask_setbit_func_pointer = GetNumaAdapterFunc(handle, "numa_bitmask_setbit");
  if (numa_bitmask_setbit_func_pointer == nullptr) {
    RETURN_STATUS_UNEXPECTED("Numa api: numa_bitmask_setbit not found.");
  }
  auto numa_bind_func_pointer = GetNumaAdapterFunc(handle, "numa_bind");
  if (numa_bind_func_pointer == nullptr) {
    RETURN_STATUS_UNEXPECTED("Numa api: numa_bind not found.");
  }
  auto numa_bitmask_free_func_pointer = GetNumaAdapterFunc(handle, "numa_bitmask_free");
  if (numa_bitmask_free_func_pointer == nullptr) {
    RETURN_STATUS_UNEXPECTED("Numa api: numa_bitmask_free not found.");
  }
  auto numa_max_node_func = reinterpret_cast<GetNumaMaxNodeFunc>(numa_max_node_func_pointer);
  auto numa_allocate_nodemask_func = reinterpret_cast<NumaAllocateNodemaskFunc>(numa_allocate_nodemask_func_pointer);
  auto numa_bitmask_clearall_func = reinterpret_cast<NumaBitmaskClearallFunc>(numa_bitmask_clearall_func_pointer);
  auto numa_bitmask_setbit_func = reinterpret_cast<NumaBitmaskSetbitFunc>(numa_bitmask_setbit_func_pointer);
  auto numa_bind_func = reinterpret_cast<NumaBindFunc>(numa_bind_func_pointer);
  auto numa_bitmask_free_func = reinterpret_cast<NumaBitmaskFreeFunc>(numa_bitmask_free_func_pointer);
  int numa_node_max_id = numa_max_node_func();
  if (numa_node_max_id < 0) {
    RETURN_STATUS_UNEXPECTED("Get numa max node failed.");
  }
  if (rank_id >= 0) {
    uint32_t numa_bind_id = static_cast<uint32_t>(rank_id % (numa_node_max_id + 1));
    auto bm = numa_allocate_nodemask_func();
    numa_bitmask_clearall_func(bm);
    numa_bitmask_setbit_func(bm, numa_bind_id);
    numa_bind_func(bm);
    numa_bitmask_free_func(bm);
  } else {
    RETURN_STATUS_UNEXPECTED("Value error, rank_id is a negative value.");
  }
  return Status::OK();
 }
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/util/numa_interface.h
+++ b/mindspore/ccsrc/minddata/dataset/util/numa_interface.h
@@ -0,0 +1,44 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_NUMA_INTERFACE_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_NUMA_INTERFACE_H_

 #include "minddata/dataset/util/status.h"

 namespace mindspore {
 namespace dataset {
 struct bitmask {
  uint64_t size;
  uint64_t *maskp;
 };

 // Now we seperate the link from _c_dataengine with numa,
 // and we use dlopen("libnuma") instead. This function will
 // return a handle which you can do NumaBind and ReleaseLibrary.
 void *GetNumaAdapterHandle();

 // Totally this function will do:
 // 1. Get function pointer of numa api
 // 2. Do numa_bind
 Status NumaBind(void *handle, const int32_t &rank_id);

 // Release the numa handle for avoid memory leak, we should
 // not allow handle is nullptr before we use it.
 void ReleaseLibrary(void *handle);
 }  // namespace dataset
 }  // namespace mindspore

 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_NUMA_INTERFACE_H_
--- a/mindspore/dataset/core/config.py
+++ b/mindspore/dataset/core/config.py
@@ -62,6 +62,10 @@ def _init_device_info():
            rank_id = int(env_rank_id.strip())
            if rank_size > 1:
                _config.set_rank_id(rank_id)
            # Now single process under ascend mode doesn't support numa bind for performance consideration.
            if _config.get_numa_enable() is True and rank_size == 1:
                raise ValueError("single process under Ascend mode doesn't support numa bind for "
                                 "performance consideration.")


 def set_seed(seed):
@@ -163,6 +167,37 @@ def get_num_parallel_workers():
    return _config.get_num_parallel_workers()


 def set_numa_enable(numa_enable):
    """
    Set the default state of numa enabled.

    Args:
        numa_enable (bool): Whether to use numa bind feature.

    Raises:
        TypeError: If numa_enable is not a boolean data type.

    Examples:
        >>> # Set a new global configuration value for the state of numa enabled.
        >>> # Now parallel dataset operators will run with numa bind function
        >>> ds.config.set_numa_enable(True)
    """
    if not isinstance(numa_enable, bool):
        raise TypeError("numa_enable must be a boolean dtype.")
    _config.set_numa_enable(numa_enable)


 def get_numa_enable():
    """
    Get the default state of numa enabled.
    This is the DEFAULT numa enabled value used for the all process.

    Returns:
        boolean, the default state of numa enabled
    """
    return _config.get_numa_enable()


 def set_monitor_sampling_interval(interval):
    """
    Set the default interval (in milliseconds) for monitor sampling.
--- a/mindspore/lite/minddata/CMakeLists.txt
+++ b/mindspore/lite/minddata/CMakeLists.txt
@@ -254,6 +254,10 @@ if (BUILD_MINDDATA STREQUAL "full")
    list(REMOVE_ITEM MINDDATA_KERNELS_DATA_SRC_FILES
            "${MINDDATA_DIR}/kernels/data/unique_op.cc"
            )

    list(REMOVE_ITEM MINDDATA_UTIL_SRC_FILES
            "${MINDDATA_DIR}/util/numa_interface.cc"
            )
    include_directories("${CMAKE_BINARY_DIR}/minddata/dataset/engine/cache")

    if (BUILD_MINDDATA_EXAMPLE AND (PLATFORM_ARM32 OR PLATFORM_ARM64))
--- a/model_zoo/official/cv/alexnet/src/dataset.py
+++ b/model_zoo/official/cv/alexnet/src/dataset.py
@@ -89,6 +89,8 @@ def create_dataset_imagenet(dataset_path, batch_size=32, repeat_num=1, training=
    if device_num == 1:
        num_parallel_workers = 48
        ds.config.set_prefetch_size(8)
    else:
        ds.config.set_numa_enable(True)
    data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=4,
                                     shuffle=shuffle, sampler=sampler, class_indexing=class_indexing,
                                     num_shards=device_num, shard_id=rank_id)
--- a/model_zoo/official/cv/resnet/gpu_resnet_benchmark.py
+++ b/model_zoo/official/cv/resnet/gpu_resnet_benchmark.py
@@ -94,6 +94,7 @@ class MyTimeMonitor(Callback):

 def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="GPU", dtype="fp16",
                   device_num=1):
    ds.config.set_numa_enable(True)
    if device_num == 1:
        data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=4, shuffle=True)
    else: