add history and lambda callbacks

4 years ago · 7afcdfd211
--- a/mindspore/python/mindspore/train/callback/init.py
+++ b/mindspore/python/mindspore/train/callback/init.py
@@ -29,7 +29,9 @@ from ._summary_collector import SummaryCollector
 from ._lr_scheduler_callback import LearningRateScheduler
 from ._landscape import SummaryLandscape
 from ._fl_manager import FederatedLearningManager
 from ._history import History
 from ._lambda_callback import LambdaCallback

 __all__ = ["Callback", "LossMonitor", "TimeMonitor", "ModelCheckpoint",
           "SummaryCollector", "CheckpointConfig", "RunContext", "LearningRateScheduler", "SummaryLandscape",
           "FederatedLearningManager"]
           "FederatedLearningManager", "History", "LambdaCallback"]
--- a/mindspore/python/mindspore/train/callback/_history.py
+++ b/mindspore/python/mindspore/train/callback/_history.py
@@ -0,0 +1,81 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """History Callback class."""

 import numpy as np
 from mindspore.common.tensor import Tensor
 from ._callback import Callback

 class History(Callback):
    """
    Records the first element of network outputs into a `History` object.

    The first element of network outputs is the loss value if not
    custimizing the train network or eval network.

    Note:
        Normally used in `mindspore.Model.train`.

    Examples:
        >>> from mindspore import Model, nn
        >>> data = {"x": np.float32(np.random.rand(64, 10)), "y": np.random.randint(0, 5, (64,))}
        >>> train_dataset = ds.NumpySlicesDataset(data=data).batch(32)
        >>> net = nn.Dense(10, 5)
        >>> crit = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
        >>> opt = nn.Momentum(net.trainable_params(), 0.01, 0.9)
        >>> history_cb = History()
        >>> model = Model(network=net, optimizer=opt, loss_fn=crit, metrics={"recall"})
        >>> model.train(2, train_dataset, callbacks=[history_cb])
        >>> print(history_cb.epoch)
        >>> print(history_cb.history)
        [1, 2]
        {'net_output': [1.607877, 1.6033841]}
    """
    def __init__(self):
        super(History, self).__init__()
        self.history = {}

    def begin(self, run_context):
        """
        Initialize the `epoch` property at the begin of training.

        Args:
            run_context (RunContext): Context of the `mindspore.Model.train/eval`.
        """
        self.epoch = []

    def epoch_end(self, run_context):
        """
        Records the first element of network outputs at the end of epoch.

        Args:
            run_context (RunContext): Context of the `mindspore.Model.train/eval`.
        """
        cb_params = run_context.original_args()
        epoch = cb_params.get("cur_epoch_num", 1)
        self.epoch.append(epoch)
        net_output = cb_params.net_outputs
        if isinstance(net_output, (tuple, list)):
            if isinstance(net_output[0], Tensor) and isinstance(net_output[0].asnumpy(), np.ndarray):
                net_output = net_output[0]
        if isinstance(net_output, Tensor) and isinstance(net_output.asnumpy(), np.ndarray):
            net_output = np.mean(net_output.asnumpy())

        metrics = cb_params.get("metrics")
        cur_history = {"net_output": net_output}
        if metrics:
            cur_history.update(metrics)
        for k, v in cur_history.items():
            self.history.setdefault(k, []).append(v)
--- a/mindspore/python/mindspore/train/callback/_lambda_callback.py
+++ b/mindspore/python/mindspore/train/callback/_lambda_callback.py
@@ -0,0 +1,58 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """Lambda Callback class."""

 from ._callback import Callback

 class LambdaCallback(Callback):
    """
    Callback for creating simple, custom callbacks.

    This callback is constructed with anonymous functions that will be called
    at the appropriate time (during `mindspore.Model.{train | eval}`).

    Note that each stage of callbacks expects one positional arguments: `run_context`.

    Args:
        epoch_begin: called at the beginning of every epoch.
        epoch_end: called at the end of every epoch.
        step_begin: called at the beginning of every batch.
        step_end: called at the end of every batch.
        begin: called at the beginning of model train/eval.
        end: called at the end of model train/eval.

    Example:
        >>> from mindspore import Model, nn
        >>> data = {"x": np.float32(np.random.rand(64, 10)), "y": np.random.randint(0, 5, (64,))}
        >>> train_dataset = ds.NumpySlicesDataset(data=data).batch(32)
        >>> net = nn.Dense(10, 5)
        >>> crit = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
        >>> opt = nn.Momentum(net.trainable_params(), 0.01, 0.9)
        >>> lambda_callback = LambdaCallback(epoch_end=
        ... lambda run_context: print("loss: ", run_context.original_args().net_outputs))
        >>> model = Model(network=net, optimizer=opt, loss_fn=crit, metrics={"recall"})
        >>> model.train(2, train_dataset, callbacks=[lambda_callback])
        loss: 1.6127687
        loss: 1.6106578
    """
    def __init__(self, epoch_begin=None, epoch_end=None, step_begin=None,
                 step_end=None, begin=None, end=None):
        super(LambdaCallback, self).__init__()
        self.epoch_begin = epoch_begin if epoch_begin else lambda run_context: None
        self.epoch_end = epoch_end if epoch_end else lambda run_context: None
        self.step_begin = step_begin if step_begin else lambda run_context: None
        self.step_end = step_end if step_end else lambda run_context: None
        self.begin = begin if begin else lambda run_context: None
        self.end = end if end else lambda run_context: None
--- a/mindspore/python/mindspore/train/model.py
+++ b/mindspore/python/mindspore/train/model.py
@@ -27,7 +27,7 @@ from .callback._checkpoint import _chg_ckpt_file_name_if_same_exist
 from ..common.tensor import Tensor
 from ..nn.metrics import get_metrics
 from .._checkparam import check_input_data, check_output_data, Validator
 from .callback import _InternalCallbackParam, RunContext, _CallbackManager, Callback
 from .callback import _InternalCallbackParam, RunContext, _CallbackManager, Callback, History
 from .. import context
 from ..parallel._utils import _get_parallel_mode, _get_device_num, _get_global_rank, \
    _get_parameter_broadcast, _device_number_check, _parameter_broadcast_check, _parallel_predict_check
@@ -940,6 +940,9 @@ class Model:
        if isinstance(self._eval_network, nn.GraphCell) and dataset_sink_mode:
            raise ValueError("Sink mode is currently not supported when evaluating with a GraphCell.")

        if callbacks and (isinstance(callbacks, History) or any(isinstance(cb, History) for cb in callbacks)):
            logger.warning("History callback is recommended to be used in training process.")

        cb_params = _InternalCallbackParam()
        cb_params.eval_network = self._eval_network
        cb_params.valid_dataset = valid_dataset
--- a/tests/ut/python/utils/test_callback.py
+++ b/tests/ut/python/utils/test_callback.py
@@ -29,7 +29,7 @@ from mindspore.common.tensor import Tensor
 from mindspore.nn import TrainOneStepCell, WithLossCell
 from mindspore.nn.optim import Momentum
 from mindspore.train.callback import ModelCheckpoint, RunContext, LossMonitor, _InternalCallbackParam, \
    _CallbackManager, Callback, CheckpointConfig, _set_cur_net, _checkpoint_cb_for_save_op
    _CallbackManager, Callback, CheckpointConfig, _set_cur_net, _checkpoint_cb_for_save_op, History, LambdaCallback
 from mindspore.train.callback._checkpoint import _chg_ckpt_file_name_if_same_exist


@@ -492,3 +492,58 @@ def test_step_end_save_graph():
        os.remove('./test_files/test-graph.meta')
    ckpoint_cb.step_end(run_context)
    assert not os.path.exists('./test_files/test-graph.meta')


 def test_history():
    """
    Feature: callback.
    Description: Test history object saves epoch and history properties.
    Expectation: run success.
    """
    cb_params = _InternalCallbackParam()
    cb_params.cur_epoch_num = 4
    cb_params.epoch_num = 4
    cb_params.cur_step_num = 2
    cb_params.batch_num = 2
    cb_params.net_outputs = Tensor(2.0)
    cb_params.metrics = {'mae': 6.343789100646973, 'mse': 59.03999710083008}

    run_context = RunContext(cb_params)
    history_cb = History()
    callbacks = [history_cb]
    with _CallbackManager(callbacks) as callbacklist:
        callbacklist.begin(run_context)
        callbacklist.epoch_begin(run_context)
        callbacklist.step_begin(run_context)
        callbacklist.step_end(run_context)
        callbacklist.epoch_end(run_context)
        callbacklist.end(run_context)
    print(history_cb.epoch)
    print(history_cb.history)


 def test_lambda():
    """
    Feature: callback.
    Description: Test lambda callback.
    Expectation: run success.
    """
    cb_params = _InternalCallbackParam()
    cb_params.cur_epoch_num = 4
    cb_params.epoch_num = 4
    cb_params.cur_step_num = 2
    cb_params.batch_num = 2
    cb_params.net_outputs = Tensor(2.0)

    run_context = RunContext(cb_params)
    lambda_cb = LambdaCallback(
        epoch_end=lambda run_context: print("loss result: ", run_context.original_args().net_outputs))

    callbacks = [lambda_cb]
    with _CallbackManager(callbacks) as callbacklist:
        callbacklist.begin(run_context)
        callbacklist.epoch_begin(run_context)
        callbacklist.step_begin(run_context)
        callbacklist.step_end(run_context)
        callbacklist.epoch_end(run_context)
        callbacklist.end(run_context)