Huawei_Technology
/
mindspore

# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np
import pytest

import mindspore.common.dtype as mstype
from mindspore.common.tensor import Tensor
import mindspore.dataset as ds
from mindspore.dataset.engine.iterators import ITERATORS_LIST, _cleanup

DATA_DIR = ["../data/dataset/testTFTestAllTypes/test.data"]
SCHEMA_DIR = "../data/dataset/testTFTestAllTypes/datasetSchema.json"
COLUMNS = ["col_1d", "col_2d", "col_3d", "col_binary", "col_float",
           "col_sint16", "col_sint32", "col_sint64"]


def check(project_columns):
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=COLUMNS, shuffle=False)
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=project_columns, shuffle=False)

    for data_actual, data_expected in zip(data1.create_tuple_iterator(project_columns, num_epochs=1, output_numpy=True),
                                          data2.create_tuple_iterator(num_epochs=1, output_numpy=True)):
        assert len(data_actual) == len(data_expected)
        assert all([np.array_equal(d1, d2) for d1, d2 in zip(data_actual, data_expected)])


def test_iterator_create_tuple_numpy():
    """
    Test creating tuple iterator with output NumPy
    """
    check(COLUMNS)
    check(COLUMNS[0:1])
    check(COLUMNS[0:2])
    check(COLUMNS[0:7])
    check(COLUMNS[7:8])
    check(COLUMNS[0:2:8])

def test_iterator_create_dict_mstensor():
    """
    Test creating dict iterator with output MSTensor
    """
    def generator():
        for i in range(64):
            yield (np.array([i], dtype=np.float32),)

    # apply dataset operations
    data1 = ds.GeneratorDataset(generator, ["data"])

    i = 0
    for item in data1.create_dict_iterator(num_epochs=1):
        golden = np.array([i], dtype=np.float32)
        np.testing.assert_array_equal(item["data"].asnumpy(), golden)
        assert isinstance(item["data"], Tensor)
        assert item["data"].dtype == mstype.float32
        i += 1
    assert i == 64

def test_iterator_create_tuple_mstensor():
    """
    Test creating tuple iterator with output MSTensor
    """
    def generator():
        for i in range(64):
            yield (np.array([i], dtype=np.float32),)

    # apply dataset operations
    data1 = ds.GeneratorDataset(generator, ["data"])

    i = 0
    for item in data1.create_tuple_iterator(num_epochs=1):
        golden = np.array([i], dtype=np.float32)
        np.testing.assert_array_equal(item[0].asnumpy(), golden)
        assert isinstance(item[0], Tensor)
        assert item[0].dtype == mstype.float32
        i += 1
    assert i == 64


def test_iterator_weak_ref():
    ITERATORS_LIST.clear()
    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR)
    itr1 = data.create_tuple_iterator(num_epochs=1)
    itr2 = data.create_tuple_iterator(num_epochs=1)
    itr3 = data.create_tuple_iterator(num_epochs=1)

    assert len(ITERATORS_LIST) == 3
    assert sum(itr() is not None for itr in ITERATORS_LIST) == 3

    del itr1
    assert len(ITERATORS_LIST) == 3
    assert sum(itr() is not None for itr in ITERATORS_LIST) == 2

    del itr2
    assert len(ITERATORS_LIST) == 3
    assert sum(itr() is not None for itr in ITERATORS_LIST) == 1

    del itr3
    assert len(ITERATORS_LIST) == 3
    assert sum(itr() is not None for itr in ITERATORS_LIST) == 0

    itr1 = data.create_tuple_iterator(num_epochs=1)
    itr2 = data.create_tuple_iterator(num_epochs=1)
    itr3 = data.create_tuple_iterator(num_epochs=1)

    _cleanup()
    with pytest.raises(AttributeError) as info:
        itr2.__next__()
    assert "object has no attribute 'depipeline'" in str(info.value)

    del itr1
    assert len(ITERATORS_LIST) == 6
    assert sum(itr() is not None for itr in ITERATORS_LIST) == 2

    _cleanup()

def test_iterator_exception():
    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR)
    try:
        _ = data.create_dict_iterator(output_numpy="123")
        assert False
    except TypeError as e:
        assert "Argument output_numpy with value 123 is not of type" in str(e)

    try:
        _ = data.create_dict_iterator(output_numpy=123)
        assert False
    except TypeError as e:
        assert "Argument output_numpy with value 123 is not of type" in str(e)

    try:
        _ = data.create_tuple_iterator(output_numpy="123")
        assert False
    except TypeError as e:
        assert "Argument output_numpy with value 123 is not of type" in str(e)

    try:
        _ = data.create_tuple_iterator(output_numpy=123)
        assert False
    except TypeError as e:
        assert "Argument output_numpy with value 123 is not of type" in str(e)


class MyDict(dict):
    def __getattr__(self, key):
        return self[key]

    def __setattr__(self, key, value):
        self[key] = value

    def __call__(self, t):
        return t


def test_tree_copy():
    """
    Testing copying the tree with a pyfunc that cannot be pickled
    """

    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=COLUMNS)
    data1 = data.map(operations=[MyDict()])

    itr = data1.create_tuple_iterator(num_epochs=1)

    assert id(data1) != id(itr.dataset)
    assert id(data) != id(itr.dataset.children[0])
    assert id(data1.operations[0]) == id(itr.dataset.operations[0])

    itr.release()


if __name__ == '__main__':
    test_iterator_create_tuple_numpy()
    test_iterator_weak_ref()
    test_iterator_exception()
    test_tree_copy()