Merge pull request !1921 from liuxiao/ApplyProximalAdagrad-for-GEtags/v0.5.0-beta
| @@ -182,6 +182,7 @@ const char kNameBinaryCrossEntropy[] = "BinaryCrossEntropy"; | |||||
| const char kNameBinaryCrossEntropyGrad[] = "BinaryCrossEntropyGrad"; | const char kNameBinaryCrossEntropyGrad[] = "BinaryCrossEntropyGrad"; | ||||
| const char kNameSparseApplyAdagrad[] = "SparseApplyAdagrad"; | const char kNameSparseApplyAdagrad[] = "SparseApplyAdagrad"; | ||||
| const char kNameSparseApplyFtrlD[] = "SparseApplyFtrlD"; | const char kNameSparseApplyFtrlD[] = "SparseApplyFtrlD"; | ||||
| const char kNameApplyProximalAdagrad[] = "ApplyProximalAdagrad"; | |||||
| const char kNameAcosh[] = "Acosh"; | const char kNameAcosh[] = "Acosh"; | ||||
| const char kNameAcoshGrad[] = "AcoshGrad"; | const char kNameAcoshGrad[] = "AcoshGrad"; | ||||
| const char kNameFloorMod[] = "FloorMod"; | const char kNameFloorMod[] = "FloorMod"; | ||||
| @@ -386,6 +387,7 @@ std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_ma | |||||
| {string(kNameBinaryCrossEntropyGrad), ADPT_DESC(BinaryCrossEntropyGrad)}, | {string(kNameBinaryCrossEntropyGrad), ADPT_DESC(BinaryCrossEntropyGrad)}, | ||||
| {string(kNameSparseApplyAdagrad), ADPT_DESC(SparseApplyAdagradD)}, | {string(kNameSparseApplyAdagrad), ADPT_DESC(SparseApplyAdagradD)}, | ||||
| {string(kNameSparseApplyFtrlD), ADPT_DESC(SparseApplyFtrlD)}, | {string(kNameSparseApplyFtrlD), ADPT_DESC(SparseApplyFtrlD)}, | ||||
| {string(kNameApplyProximalAdagrad), ADPT_DESC(ApplyProximalAdagrad)}, | |||||
| {string(kNameAcosh), ADPT_DESC(Acosh)}, | {string(kNameAcosh), ADPT_DESC(Acosh)}, | ||||
| {string(kNameAcoshGrad), ADPT_DESC(AcoshGrad)}, | {string(kNameAcoshGrad), ADPT_DESC(AcoshGrad)}, | ||||
| {string(kNameFloorMod), ADPT_DESC(FloorMod)}, | {string(kNameFloorMod), ADPT_DESC(FloorMod)}, | ||||
| @@ -1155,6 +1155,12 @@ ATTR_MAP(SparseApplyAdagradD) = {{"lr", ATTR_DESC(lr, AnyTraits<float>())}, | |||||
| {"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}}; | {"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}}; | ||||
| OUTPUT_MAP(SparseApplyAdagradD) = {{0, OUTPUT_DESC(var)}}; | OUTPUT_MAP(SparseApplyAdagradD) = {{0, OUTPUT_DESC(var)}}; | ||||
| // ApplyProximalAdagrad | |||||
| INPUT_MAP(ApplyProximalAdagrad) = {{1, INPUT_DESC(var)}, {2, INPUT_DESC(accum)}, {3, INPUT_DESC(lr)}, | |||||
| {4, INPUT_DESC(l1)}, {5, INPUT_DESC(l2)}, {6, INPUT_DESC(grad)}}; | |||||
| ATTR_MAP(ApplyProximalAdagrad) = {{"use_locking", ATTR_DESC(use_locking, AnyTraits<bool>())}}; | |||||
| OUTPUT_MAP(ApplyProximalAdagrad) = {{0, OUTPUT_DESC(var)}}; | |||||
| // SparseApplyFtrlD | // SparseApplyFtrlD | ||||
| INPUT_MAP(SparseApplyFtrlD) = {{1, INPUT_DESC(var)}, | INPUT_MAP(SparseApplyFtrlD) = {{1, INPUT_DESC(var)}, | ||||
| {2, INPUT_DESC(accum)}, | {2, INPUT_DESC(accum)}, | ||||
| @@ -442,6 +442,8 @@ DECLARE_OP_ADAPTER(BinaryCrossEntropyGrad) | |||||
| DECLARE_OP_USE_OUTPUT(BinaryCrossEntropyGrad) | DECLARE_OP_USE_OUTPUT(BinaryCrossEntropyGrad) | ||||
| DECLARE_OP_ADAPTER(SparseApplyAdagradD) | DECLARE_OP_ADAPTER(SparseApplyAdagradD) | ||||
| DECLARE_OP_USE_OUTPUT(SparseApplyAdagradD) | DECLARE_OP_USE_OUTPUT(SparseApplyAdagradD) | ||||
| DECLARE_OP_ADAPTER(ApplyProximalAdagrad) | |||||
| DECLARE_OP_USE_OUTPUT(ApplyProximalAdagrad) | |||||
| DECLARE_OP_ADAPTER(SpaceToDepth) | DECLARE_OP_ADAPTER(SpaceToDepth) | ||||
| DECLARE_OP_USE_OUTPUT(SpaceToDepth) | DECLARE_OP_USE_OUTPUT(SpaceToDepth) | ||||
| DECLARE_OP_ADAPTER(DepthToSpace) | DECLARE_OP_ADAPTER(DepthToSpace) | ||||
| @@ -3149,7 +3149,7 @@ class SparseApplyFtrl(PrimitiveWithInfer): | |||||
| validator.check_value_type("l1", l1, [float], self.name) | validator.check_value_type("l1", l1, [float], self.name) | ||||
| validator.check_value_type("l2", l2, [float], self.name) | validator.check_value_type("l2", l2, [float], self.name) | ||||
| validator.check_value_type("lr_power", lr_power, [float], self.name) | validator.check_value_type("lr_power", lr_power, [float], self.name) | ||||
| self.lr = validator.check_number("lr", lr, 0.0, Rel.GT, self.name) | |||||
| self.lr = validator.check_number_range("lr", lr, 0.0, float("inf"), Rel.INC_LEFT, self.name) | |||||
| self.l1 = validator.check_number("l1", l1, 0.0, Rel.GE, self.name) | self.l1 = validator.check_number("l1", l1, 0.0, Rel.GE, self.name) | ||||
| self.l2 = validator.check_number("l2", l2, 0.0, Rel.GE, self.name) | self.l2 = validator.check_number("l2", l2, 0.0, Rel.GE, self.name) | ||||
| self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name) | self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name) | ||||
| @@ -244,12 +244,14 @@ class SparseApplyProximalAdagradNet(nn.Cell): | |||||
| def __init__(self): | def __init__(self): | ||||
| super(SparseApplyProximalAdagradNet, self).__init__() | super(SparseApplyProximalAdagradNet, self).__init__() | ||||
| self.sparse_apply_proximal_adagrad = P.SparseApplyProximalAdagrad() | self.sparse_apply_proximal_adagrad = P.SparseApplyProximalAdagrad() | ||||
| self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var") | |||||
| self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum") | |||||
| self.lr = 0.01 | self.lr = 0.01 | ||||
| self.l1 = 0.0 | self.l1 = 0.0 | ||||
| self.l2 = 0.0 | self.l2 = 0.0 | ||||
| def construct(self, var, accum, grad, indices): | |||||
| out = self.sparse_apply_proximal_adagrad(var, accum, self.lr, self.l1, self.l2, grad, indices) | |||||
| def construct(self, grad, indices): | |||||
| out = self.sparse_apply_proximal_adagrad(self.var, self.accum, self.lr, self.l1, self.l2, grad, indices) | |||||
| return out | return out | ||||
| @@ -257,12 +259,14 @@ class ApplyProximalAdagradNet(nn.Cell): | |||||
| def __init__(self): | def __init__(self): | ||||
| super(ApplyProximalAdagradNet, self).__init__() | super(ApplyProximalAdagradNet, self).__init__() | ||||
| self.apply_proximal_adagrad = P.ApplyProximalAdagrad() | self.apply_proximal_adagrad = P.ApplyProximalAdagrad() | ||||
| self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var") | |||||
| self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum") | |||||
| self.lr = 0.01 | self.lr = 0.01 | ||||
| self.l1 = 0.0 | self.l1 = 0.0 | ||||
| self.l2 = 0.0 | self.l2 = 0.0 | ||||
| def construct(self, var, accum, grad): | |||||
| out = self.apply_proximal_adagrad(var, accum, self.lr, self.l1, self.l2, grad) | |||||
| def construct(self, grad): | |||||
| out = self.apply_proximal_adagrad(self.var, self.accum, self.lr, self.l1, self.l2, grad) | |||||
| return out | return out | ||||
| @@ -1061,11 +1065,11 @@ test_case_nn_ops = [ | |||||
| 'skip': ['backward']}), | 'skip': ['backward']}), | ||||
| ('ApplyProximalAdagrad', { | ('ApplyProximalAdagrad', { | ||||
| 'block': ApplyProximalAdagradNet(), | 'block': ApplyProximalAdagradNet(), | ||||
| 'desc_inputs': [[3, 3], [3, 3], [3, 3]], | |||||
| 'desc_inputs': [[3, 3]], | |||||
| 'skip': ['backward']}), | 'skip': ['backward']}), | ||||
| ('SparseApplyProximalAdagrad', { | ('SparseApplyProximalAdagrad', { | ||||
| 'block': SparseApplyProximalAdagradNet(), | 'block': SparseApplyProximalAdagradNet(), | ||||
| 'desc_inputs': [[3, 3], [3, 3], [3, 3], Tensor(np.ones((3,), np.int32))], | |||||
| 'desc_inputs': [[3, 3], Tensor(np.ones((3,), np.int32))], | |||||
| 'skip': ['backward']}), | 'skip': ['backward']}), | ||||
| ('Flatten_1', { | ('Flatten_1', { | ||||
| 'block': NetForFlatten(), | 'block': NetForFlatten(), | ||||