diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc
index a2750f3243..ee720c6e0a 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.cc
@@ -74,6 +74,38 @@ std::string GpuKernelFactory::SupportedTypeList(const std::string &kernel_name)
   return type_lists;
 }
 
+bool GpuKernelFactory::ReducePrecision(
+  const std::string &kernel_name, std::shared_ptr<mindspore::kernel::KernelBuildInfo::KernelBuildInfoBuilder> builder) {
+  auto kernel_info = builder->Build();
+  auto iter = map_kernel_name_to_creater_.find(kernel_name);
+  if (map_kernel_name_to_creater_.end() == iter) {
+    MS_LOG(INFO) << "Not registered GPU kernel: op[" << kernel_name << "]!";
+    return false;
+  }
+  reduce_flag_.first.clear();
+  for (size_t attr_index = 0; attr_index < (iter->second).size(); ++attr_index) {
+    auto attr_size = (&(iter->second))->at(attr_index).first.GetInputSize();
+    for (size_t input_index = 0; input_index < kernel_info->GetInputNum(); input_index++) {
+      if (kernel_info->GetInputDeviceType(input_index) == kNumberTypeInt64 &&
+          (iter->second)[attr_index].first.GetInputAttr(input_index % attr_size).first == kNumberTypeInt32) {
+        builder->SetInputDeviceType(kNumberTypeInt32, input_index);
+        reduce_flag_.first.push_back(input_index);
+        MS_LOG(WARNING) << "Kernel [" << kernel_name << "] does not support int64, cast input " << input_index
+                        << " to int32.";
+      }
+    }
+    for (size_t output_index = 0; output_index < kernel_info->GetOutputNum(); output_index++) {
+      if (kernel_info->GetOutputDeviceType(output_index) == kNumberTypeInt64 &&
+          (iter->second)[attr_index].first.GetOutputAttr(output_index % attr_size).first == kNumberTypeInt32) {
+        builder->SetOutputDeviceType(kNumberTypeInt32, output_index);
+        MS_LOG(WARNING) << "Kernel [" << kernel_name << "] does not support int64, cast output " << output_index
+                        << " to int32.";
+      }
+    }
+  }
+  return GpuKernelFactory::SearchRegistered(kernel_name, builder->Build());
+}
+
 std::pair<bool, size_t> GpuKernelFactory::GpuKernelAttrCheck(const std::string &kernel_name,
                                                              const KernelBuildInfo *kernel_info) {
   auto iter = map_kernel_name_to_creater_.find(kernel_name);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.h
index 967f143aa2..c4667c56c8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel_factory.h
@@ -21,6 +21,7 @@
 #include <string>
 #include <vector>
 #include <utility>
+#include <memory>
 #include "backend/kernel_compiler/gpu/gpu_kernel.h"
 #include "runtime/device/gpu/kernel_info_setter.h"
 #include "backend/kernel_compiler/kernel_build_info.h"
@@ -43,6 +44,11 @@ class GpuKernelFactory {
 
   std::string SupportedTypeList(const std::string &kernel_name);
 
+  bool ReducePrecision(const std::string &kernel_name,
+                       std::shared_ptr<mindspore::kernel::KernelBuildInfo::KernelBuildInfoBuilder> builder);
+
+  std::pair<std::vector<size_t>, TypeId> reduce_flag_{{}, kNumberTypeInt64};
+
  private:
   GpuKernelFactory() = default;
 
diff --git a/mindspore/ccsrc/backend/optimizer/gpu/reduce_precision_fusion.cc b/mindspore/ccsrc/backend/optimizer/gpu/reduce_precision_fusion.cc
new file mode 100644
index 0000000000..445236a4f7
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/gpu/reduce_precision_fusion.cc
@@ -0,0 +1,85 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/gpu/reduce_precision_fusion.h"
+
+#include <memory>
+#include <vector>
+#include <string>
+
+#include "backend/session/anf_runtime_algorithm.h"
+#include "ir/primitive.h"
+#include "utils/utils.h"
+#include "backend/optimizer/common/helper.h"
+
+namespace mindspore {
+namespace opt {
+namespace {
+void ReducePrecision(const FuncGraphPtr &graph, const AnfNodePtr &node, size_t i, const TypeId &src_type,
+                     const TypeId &cast_type) {
+  auto prim = std::make_shared<Primitive>(prim::kPrimCast->name());
+  MS_EXCEPTION_IF_NULL(prim);
+  std::vector<AnfNodePtr> inputs = {NewValueNode(prim), AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), i)};
+  auto cast = graph->NewCNode(inputs);
+  auto cast_shape = {AnfAlgo::GetInputDeviceShape(node, i)};
+  AnfAlgo::SetOutputInferTypeAndShape({cast_type}, cast_shape, cast.get());
+  FuncGraphManagerPtr manager = graph->manager();
+  MS_EXCEPTION_IF_NULL(manager);
+  manager->SetEdge(node, i + 1, cast);
+  kernel::KernelBuildInfo::KernelBuildInfoBuilder builder;
+  builder.SetInputsFormat({kOpFormat_DEFAULT});
+  builder.SetOutputsFormat({kOpFormat_DEFAULT});
+  builder.SetInputsDeviceType({src_type});
+  builder.SetOutputsDeviceType({cast_type});
+  builder.SetKernelType(AKG_KERNEL);
+  builder.SetProcessor(kernel::Processor::CUDA);
+  AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), cast.get());
+}
+
+}  // namespace
+bool ReducePrecisionFusion::Run(const FuncGraphPtr &graph) {
+  MS_EXCEPTION_IF_NULL(graph);
+  std::vector<AnfNodePtr> node_list = TopoSort(graph->get_return());
+  for (auto node : node_list) {
+    if (node != nullptr && node->isa<CNode>() && AnfAlgo::IsRealKernel(node)) {
+      size_t input_num = AnfAlgo::GetInputTensorNum(node);
+      size_t output_num = AnfAlgo::GetOutputTensorNum(node);
+      for (size_t i = 0; i < input_num; i++) {
+        auto inferType = AnfAlgo::GetPrevNodeOutputInferDataType(node, i);
+        auto deviceType = AnfAlgo::GetInputDeviceDataType(node, i);
+        if (inferType == kNumberTypeInt64 && deviceType == kNumberTypeInt32) {
+          ReducePrecision(graph, node, i, inferType, deviceType);
+          MS_LOG(WARNING) << "Reduce precision for [" << AnfAlgo::GetCNodeName(utils::cast<CNodePtr>(node))
+                          << "] input " << i;
+        }
+      }
+      for (size_t i = 0; i < output_num; i++) {
+        auto inferType = AnfAlgo::GetOutputInferDataType(node, i);
+        auto deviceType = AnfAlgo::GetOutputDeviceDataType(node, i);
+        if (inferType == kNumberTypeInt64 && deviceType == kNumberTypeInt32) {
+          auto used_node_list = GetRealNodeUsedListByOutputIdx(graph, node, i);
+          for (size_t j = 0; j < used_node_list->size(); j++) {
+            auto used_node = used_node_list->at(j).first;
+            auto used_node_index = used_node_list->at(j).second - 1;
+            ReducePrecision(graph, used_node, used_node_index, deviceType, inferType);
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/gpu/reduce_precision_fusion.h b/mindspore/ccsrc/backend/optimizer/gpu/reduce_precision_fusion.h
new file mode 100644
index 0000000000..fa4506bd65
--- /dev/null
+++ b/mindspore/ccsrc/backend/optimizer/gpu/reduce_precision_fusion.h
@@ -0,0 +1,34 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_REDUCE_PRECISION_FUSION_H_
+#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_REDUCE_PRECISION_FUSION_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+#include "backend/optimizer/common/optimizer.h"
+
+namespace mindspore {
+namespace opt {
+class ReducePrecisionFusion : public Pass {
+ public:
+  explicit ReducePrecisionFusion(const std::string &name) : Pass("reduce_precision") {}
+  ~ReducePrecisionFusion() override = default;
+  bool Run(const FuncGraphPtr &graph) override;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_REDUCE_PRECISION_FUSION_H_
diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc
index 551561a234..9c62c1a5f6 100644
--- a/mindspore/ccsrc/backend/session/gpu_session.cc
+++ b/mindspore/ccsrc/backend/session/gpu_session.cc
@@ -38,6 +38,7 @@
 #include "backend/optimizer/gpu/remove_format_transform_pair.h"
 #include "backend/optimizer/gpu/remove_redundant_format_transform.h"
 #include "backend/optimizer/gpu/cudnn_inplace_fusion.h"
+#include "backend/optimizer/gpu/reduce_precision_fusion.h"
 #include "backend/optimizer/graph_kernel/value_graph_binder.h"
 #include "backend/optimizer/graph_kernel/graph_kernel_splitter.h"
 #include "backend/optimizer/graph_kernel/graph_kernel_expander.h"
@@ -101,6 +102,7 @@ void GPUSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_gra
   pm->AddPass(std::make_shared<opt::RemoveRedundantFormatTransform>());
   pm->AddPass(std::make_shared<opt::AllReduceFusion>());
   pm->AddPass(std::make_shared<opt::GetitemTuple>());
+  pm->AddPass(std::make_shared<opt::ReducePrecisionFusion>("reduce_precision"));
   optimizer->AddPassManager(pm);
   (void)optimizer->Optimize(kernel_graph);
   kernel_graph->SetExecOrderByDefault();
diff --git a/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
index 8d73a33329..5fd88d166e 100644
--- a/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
@@ -371,6 +371,9 @@ void SetKernelInfo(const CNodePtr &kernel_node, KernelType kernel_type) {
   if (kernel_type == UNKNOWN_KERNEL_TYPE) {
     result =
       kernel::GpuKernelFactory::GetInstance().SearchRegistered(AnfAlgo::GetCNodeName(kernel_node), builder->Build());
+    if (!result) {
+      result = kernel::GpuKernelFactory::GetInstance().ReducePrecision(AnfAlgo::GetCNodeName(kernel_node), builder);
+    }
     if (!result) {
       result = SelectAkgKernel(kernel_node, builder->Build());
       kernel_type = AKG_KERNEL;