From d957309957e4939bfa64814c06fc72c280b600ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E5=98=89=E7=90=AA?=
 <lijiaqi@lijiaqideMacBook-Pro.local>
Date: Mon, 24 Aug 2020 17:36:11 +0800
Subject: [PATCH] modify comments

---
 mindspore/nn/optim/momentum.py    | 8 ++++----
 mindspore/nn/optim/sgd.py         | 8 ++++----
 mindspore/nn/wrap/cell_wrapper.py | 2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/mindspore/nn/optim/momentum.py b/mindspore/nn/optim/momentum.py
index 7781e52d57..ac77b6c935 100755
--- a/mindspore/nn/optim/momentum.py
+++ b/mindspore/nn/optim/momentum.py
@@ -56,12 +56,12 @@ class Momentum(Optimizer):
     .. math::
             v_{t} = v_{t-1} \ast u + gradients
 
-        If use_nesterov is True:
-            .. math::
+    If use_nesterov is True:
+        .. math::
                 p_{t} =  p_{t-1} - (grad \ast lr + v_{t} \ast u \ast lr)
 
-        If use_nesterov is Flase:
-            .. math::
+    If use_nesterov is Flase:
+        .. math::
                 p_{t} = p_{t-1} - lr \ast v_{t}
 
     Here: where grad, lr, p, v and u denote the gradients, learning_rate, params, moments, and momentum respectively.
diff --git a/mindspore/nn/optim/sgd.py b/mindspore/nn/optim/sgd.py
index e684fae22f..d2ad443f0c 100755
--- a/mindspore/nn/optim/sgd.py
+++ b/mindspore/nn/optim/sgd.py
@@ -49,12 +49,12 @@ class SGD(Optimizer):
     .. math::
             v_{t+1} = u \ast v_{t} + gradient \ast (1-dampening)
 
-        If nesterov is True:
-            .. math::
+    If nesterov is True:
+        .. math::
                 p_{t+1} = p_{t} - lr \ast (gradient + u \ast v_{t+1})
 
-        If nesterov is Flase:
-            .. math::
+    If nesterov is Flase:
+        .. math::
                 p_{t+1} = p_{t} - lr \ast v_{t+1}
 
     To be noticed, for the first step, v_{t+1} = gradient
diff --git a/mindspore/nn/wrap/cell_wrapper.py b/mindspore/nn/wrap/cell_wrapper.py
index 980585e270..268cc7ff2e 100644
--- a/mindspore/nn/wrap/cell_wrapper.py
+++ b/mindspore/nn/wrap/cell_wrapper.py
@@ -82,7 +82,7 @@ class WithGradCell(Cell):
 
     Wraps the network with backward cell to compute gradients. A network with a loss function is necessary
     as argument. If loss function in None, the network must be a wrapper of network and loss function. This
-    Cell accepts *inputs as inputs and returns gradients for each trainable parameter.
+    Cell accepts '*inputs' as inputs and returns gradients for each trainable parameter.
 
     Note:
         Run in PyNative mode.