You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

default_schedule.py 2.1 kB

5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """default schedule function for GPU"""
  15. from queue import Queue
  16. import akg.tvm as tvm
  17. DEFAULT_GPU_THREAD = 1024
  18. def default_schedule(outs):
  19. """
  20. default schedule function.
  21. Args:
  22. outs (Union[tvm.tensor.Tensor, list[tvm.tensor.Tensor]]): outputs of compute.
  23. Returns:
  24. sch (schedule.Schedule): The created schedule.
  25. """
  26. if not isinstance(outs, tvm.tensor.Tensor) and not isinstance(outs, list):
  27. raise ValueError("outs should be list of akg.tvm.tensor.Tensor or akg.tvm.tensor.Tensor")
  28. device = 'cuda'
  29. ctx = tvm.context(device, 0)
  30. if not ctx.exist:
  31. raise SystemError("Skip because %s is not enabled" % device)
  32. outs_list = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
  33. with tvm.target.create(device):
  34. sch = tvm.create_schedule(outs_list[0].op)
  35. outputs_tensor = Queue()
  36. outputs_tensor.put(outs_list[0])
  37. op_list = []
  38. while not outputs_tensor.empty():
  39. out = outputs_tensor.get()
  40. if out.op not in op_list and isinstance(out.op, tvm.tensor.ComputeOp):
  41. op_list.append(out.op)
  42. for input_tensor in out.op.input_tensors:
  43. outputs_tensor.put(input_tensor)
  44. for op in op_list:
  45. stage = sch[op.output(0)]
  46. bx, tx = stage.split(op.axis[0], factor=DEFAULT_GPU_THREAD)
  47. stage.bind(bx, tvm.thread_axis("blockIdx.x"))
  48. stage.bind(tx, tvm.thread_axis("threadIdx.x"))
  49. return sch