You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_rl_buffer_op.py 5.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. import numpy as np
  16. import pytest
  17. import mindspore.context as context
  18. import mindspore.nn as nn
  19. from mindspore import Tensor
  20. from mindspore.common.api import ms_function
  21. from mindspore.common.parameter import Parameter
  22. from mindspore.ops import operations as P
  23. import mindspore as ms
  24. class RLBufferAppend(nn.Cell):
  25. def __init__(self, capcity, shapes, types):
  26. super(RLBufferAppend, self).__init__()
  27. self._capacity = capcity
  28. self.count = Parameter(Tensor(0, ms.int32), name="count")
  29. self.head = Parameter(Tensor(0, ms.int32), name="head")
  30. self.buffer_append = P.BufferAppend(self._capacity, shapes, types)
  31. @ms_function
  32. def construct(self, buffer, exps):
  33. return self.buffer_append(buffer, exps, self.count, self.head)
  34. class RLBufferGet(nn.Cell):
  35. def __init__(self, capcity, shapes, types):
  36. super(RLBufferGet, self).__init__()
  37. self._capacity = capcity
  38. self.count = Parameter(Tensor(5, ms.int32), name="count")
  39. self.head = Parameter(Tensor(0, ms.int32), name="head")
  40. self.buffer_get = P.BufferGetItem(self._capacity, shapes, types)
  41. @ms_function
  42. def construct(self, buffer, index):
  43. return self.buffer_get(buffer, self.count, self.head, index)
  44. class RLBufferSample(nn.Cell):
  45. def __init__(self, capcity, batch_size, shapes, types):
  46. super(RLBufferSample, self).__init__()
  47. self._capacity = capcity
  48. self.count = Parameter(Tensor(5, ms.int32), name="count")
  49. self.head = Parameter(Tensor(0, ms.int32), name="head")
  50. self.buffer_sample = P.BufferSample(
  51. self._capacity, batch_size, shapes, types)
  52. @ms_function
  53. def construct(self, buffer):
  54. return self.buffer_sample(buffer, self.count, self.head)
  55. states = Tensor(np.arange(4*5).reshape(5, 4).astype(np.float32)/10.0)
  56. actions = Tensor(np.arange(2*5).reshape(5, 2).astype(np.int32))
  57. rewards = Tensor(np.ones((5, 1)).astype(np.int32))
  58. states_ = Tensor(np.arange(4*5).reshape(5, 4).astype(np.float32))
  59. b = [states, actions, rewards, states_]
  60. s = Tensor(np.array([2, 2, 2, 2]), ms.float32)
  61. a = Tensor(np.array([0, 0]), ms.int32)
  62. r = Tensor(np.array([0]), ms.int32)
  63. s_ = Tensor(np.array([3, 3, 3, 3]), ms.float32)
  64. exp = [s, a, r, s_]
  65. exp1 = [s_, a, r, s]
  66. c = [Tensor(np.array([[6, 6, 6, 6], [6, 6, 6, 6]]), ms.float32),
  67. Tensor(np.array([[6, 6], [6, 6]]), ms.int32),
  68. Tensor(np.array([[6], [6]]), ms.int32),
  69. Tensor(np.array([[6, 6, 6, 6], [6, 6, 6, 6]]), ms.float32)]
  70. @ pytest.mark.level0
  71. @ pytest.mark.platform_x86_gpu_training
  72. @ pytest.mark.env_onecard
  73. def test_BufferSample():
  74. context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU')
  75. buffer_sample = RLBufferSample(capcity=5, batch_size=3, shapes=[(4,), (2,), (1,), (4,)], types=[
  76. ms.float32, ms.int32, ms.int32, ms.float32])
  77. ss, aa, rr, ss_ = buffer_sample(b)
  78. print(ss, aa, rr, ss_)
  79. @ pytest.mark.level0
  80. @ pytest.mark.platform_x86_gpu_training
  81. @ pytest.mark.env_onecard
  82. def test_BufferGet():
  83. context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU')
  84. buffer_get = RLBufferGet(capcity=5, shapes=[(4,), (2,), (1,), (4,)], types=[
  85. ms.float32, ms.int32, ms.int32, ms.float32])
  86. ss, aa, rr, ss_ = buffer_get(b, 1)
  87. expect_s = [0.4, 0.5, 0.6, 0.7]
  88. expect_a = [2, 3]
  89. expect_r = [1]
  90. expect_s_ = [4, 5, 6, 7]
  91. np.testing.assert_almost_equal(ss.asnumpy(), expect_s)
  92. np.testing.assert_almost_equal(aa.asnumpy(), expect_a)
  93. np.testing.assert_almost_equal(rr.asnumpy(), expect_r)
  94. np.testing.assert_almost_equal(ss_.asnumpy(), expect_s_)
  95. @ pytest.mark.level0
  96. @ pytest.mark.platform_x86_gpu_training
  97. @ pytest.mark.env_onecard
  98. def test_BufferAppend():
  99. context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU')
  100. buffer_append = RLBufferAppend(capcity=5, shapes=[(4,), (2,), (1,), (4,)], types=[
  101. ms.float32, ms.int32, ms.int32, ms.float32])
  102. buffer_append(b, exp)
  103. buffer_append(b, exp)
  104. buffer_append(b, exp)
  105. buffer_append(b, exp)
  106. buffer_append(b, exp)
  107. buffer_append(b, exp1)
  108. expect_s = [[3, 3, 3, 3], [2, 2, 2, 2], [2, 2, 2, 2], [2, 2, 2, 2], [2, 2, 2, 2]]
  109. expect_a = [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]
  110. expect_r = [[0], [0], [0], [0], [0]]
  111. expect_s_ = [[2, 2, 2, 2], [3, 3, 3, 3], [3, 3, 3, 3], [3, 3, 3, 3], [3, 3, 3, 3]]
  112. np.testing.assert_almost_equal(b[0].asnumpy(), expect_s)
  113. np.testing.assert_almost_equal(b[1].asnumpy(), expect_a)
  114. np.testing.assert_almost_equal(b[2].asnumpy(), expect_r)
  115. np.testing.assert_almost_equal(b[3].asnumpy(), expect_s_)
  116. buffer_append(b, exp1)
  117. buffer_append(b, c)
  118. buffer_append(b, c)
  119. expect_s2 = [[6, 6, 6, 6], [3, 3, 3, 3], [6, 6, 6, 6], [6, 6, 6, 6], [6, 6, 6, 6]]
  120. expect_a2 = [[6, 6], [0, 0], [6, 6], [6, 6], [6, 6]]
  121. expect_r2 = [[6], [0], [6], [6], [6]]
  122. expect_s2_ = [[6, 6, 6, 6], [2, 2, 2, 2], [6, 6, 6, 6], [6, 6, 6, 6], [6, 6, 6, 6]]
  123. np.testing.assert_almost_equal(b[0].asnumpy(), expect_s2)
  124. np.testing.assert_almost_equal(b[1].asnumpy(), expect_a2)
  125. np.testing.assert_almost_equal(b[2].asnumpy(), expect_r2)
  126. np.testing.assert_almost_equal(b[3].asnumpy(), expect_s2_)