|
|
@@ -273,6 +273,7 @@ def test_sequential_sampler(): |
|
|
result.append(item["label"]) |
|
|
result.append(item["label"]) |
|
|
num_iter += 1 |
|
|
num_iter += 1 |
|
|
|
|
|
|
|
|
|
|
|
assert num_iter == 44 |
|
|
logger.info("Result: {}".format(result)) |
|
|
logger.info("Result: {}".format(result)) |
|
|
assert result == golden |
|
|
assert result == golden |
|
|
|
|
|
|
|
|
@@ -383,6 +384,7 @@ def test_weighted_random_sampler(): |
|
|
logger.info("Number of data in data1: {}".format(num_iter)) |
|
|
logger.info("Number of data in data1: {}".format(num_iter)) |
|
|
assert num_iter == 11 |
|
|
assert num_iter == 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_weighted_random_sampler_exception(): |
|
|
def test_weighted_random_sampler_exception(): |
|
|
""" |
|
|
""" |
|
|
Test error cases for WeightedRandomSampler |
|
|
Test error cases for WeightedRandomSampler |
|
|
@@ -413,6 +415,250 @@ def test_weighted_random_sampler_exception(): |
|
|
weights = [0, 0, 0, 0, 0] |
|
|
weights = [0, 0, 0, 0, 0] |
|
|
ds.WeightedRandomSampler(weights) |
|
|
ds.WeightedRandomSampler(weights) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_chained_sampler_01(): |
|
|
|
|
|
logger.info("Test Case Chained Sampler - Random and Sequential, with repeat") |
|
|
|
|
|
|
|
|
|
|
|
# Create chained sampler, random and sequential |
|
|
|
|
|
sampler = ds.RandomSampler() |
|
|
|
|
|
child_sampler = ds.SequentialSampler() |
|
|
|
|
|
sampler.add_child(child_sampler) |
|
|
|
|
|
# Create ImageFolderDataset with sampler |
|
|
|
|
|
data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler) |
|
|
|
|
|
|
|
|
|
|
|
data1 = data1.repeat(count=3) |
|
|
|
|
|
|
|
|
|
|
|
# Verify dataset size |
|
|
|
|
|
data1_size = data1.get_dataset_size() |
|
|
|
|
|
logger.info("dataset size is: {}".format(data1_size)) |
|
|
|
|
|
assert data1_size == 132 |
|
|
|
|
|
|
|
|
|
|
|
# Verify number of iterations |
|
|
|
|
|
num_iter = 0 |
|
|
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary |
|
|
|
|
|
# in this example, each dictionary has keys "image" and "label" |
|
|
|
|
|
logger.info("image is {}".format(item["image"])) |
|
|
|
|
|
logger.info("label is {}".format(item["label"])) |
|
|
|
|
|
num_iter += 1 |
|
|
|
|
|
|
|
|
|
|
|
logger.info("Number of data in data1: {}".format(num_iter)) |
|
|
|
|
|
assert num_iter == 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_chained_sampler_02(): |
|
|
|
|
|
logger.info("Test Case Chained Sampler - Random and Sequential, with batch then repeat") |
|
|
|
|
|
|
|
|
|
|
|
# Create chained sampler, random and sequential |
|
|
|
|
|
sampler = ds.RandomSampler() |
|
|
|
|
|
child_sampler = ds.SequentialSampler() |
|
|
|
|
|
sampler.add_child(child_sampler) |
|
|
|
|
|
# Create ImageFolderDataset with sampler |
|
|
|
|
|
data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler) |
|
|
|
|
|
|
|
|
|
|
|
data1 = data1.batch(batch_size=5, drop_remainder=True) |
|
|
|
|
|
data1 = data1.repeat(count=2) |
|
|
|
|
|
|
|
|
|
|
|
# Verify dataset size |
|
|
|
|
|
data1_size = data1.get_dataset_size() |
|
|
|
|
|
logger.info("dataset size is: {}".format(data1_size)) |
|
|
|
|
|
assert data1_size == 16 |
|
|
|
|
|
|
|
|
|
|
|
# Verify number of iterations |
|
|
|
|
|
num_iter = 0 |
|
|
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary |
|
|
|
|
|
# in this example, each dictionary has keys "image" and "label" |
|
|
|
|
|
logger.info("image is {}".format(item["image"])) |
|
|
|
|
|
logger.info("label is {}".format(item["label"])) |
|
|
|
|
|
num_iter += 1 |
|
|
|
|
|
|
|
|
|
|
|
logger.info("Number of data in data1: {}".format(num_iter)) |
|
|
|
|
|
assert num_iter == 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_chained_sampler_03(): |
|
|
|
|
|
logger.info("Test Case Chained Sampler - Random and Sequential, with repeat then batch") |
|
|
|
|
|
|
|
|
|
|
|
# Create chained sampler, random and sequential |
|
|
|
|
|
sampler = ds.RandomSampler() |
|
|
|
|
|
child_sampler = ds.SequentialSampler() |
|
|
|
|
|
sampler.add_child(child_sampler) |
|
|
|
|
|
# Create ImageFolderDataset with sampler |
|
|
|
|
|
data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler) |
|
|
|
|
|
|
|
|
|
|
|
data1 = data1.repeat(count=2) |
|
|
|
|
|
data1 = data1.batch(batch_size=5, drop_remainder=False) |
|
|
|
|
|
|
|
|
|
|
|
# Verify dataset size |
|
|
|
|
|
data1_size = data1.get_dataset_size() |
|
|
|
|
|
logger.info("dataset size is: {}".format(data1_size)) |
|
|
|
|
|
assert data1_size == 18 |
|
|
|
|
|
|
|
|
|
|
|
# Verify number of iterations |
|
|
|
|
|
num_iter = 0 |
|
|
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary |
|
|
|
|
|
# in this example, each dictionary has keys "image" and "label" |
|
|
|
|
|
logger.info("image is {}".format(item["image"])) |
|
|
|
|
|
logger.info("label is {}".format(item["label"])) |
|
|
|
|
|
num_iter += 1 |
|
|
|
|
|
|
|
|
|
|
|
logger.info("Number of data in data1: {}".format(num_iter)) |
|
|
|
|
|
assert num_iter == 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_chained_sampler_04(): |
|
|
|
|
|
logger.info("Test Case Chained Sampler - Distributed and Random, with batch then repeat") |
|
|
|
|
|
|
|
|
|
|
|
# Create chained sampler, distributed and random |
|
|
|
|
|
sampler = ds.DistributedSampler(num_shards=4, shard_id=3) |
|
|
|
|
|
child_sampler = ds.RandomSampler() |
|
|
|
|
|
sampler.add_child(child_sampler) |
|
|
|
|
|
# Create ImageFolderDataset with sampler |
|
|
|
|
|
data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler) |
|
|
|
|
|
|
|
|
|
|
|
data1 = data1.batch(batch_size=5, drop_remainder=True) |
|
|
|
|
|
data1 = data1.repeat(count=3) |
|
|
|
|
|
|
|
|
|
|
|
# Verify dataset size |
|
|
|
|
|
data1_size = data1.get_dataset_size() |
|
|
|
|
|
logger.info("dataset size is: {}".format(data1_size)) |
|
|
|
|
|
assert data1_size == 24 |
|
|
|
|
|
|
|
|
|
|
|
# Verify number of iterations |
|
|
|
|
|
num_iter = 0 |
|
|
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary |
|
|
|
|
|
# in this example, each dictionary has keys "image" and "label" |
|
|
|
|
|
logger.info("image is {}".format(item["image"])) |
|
|
|
|
|
logger.info("label is {}".format(item["label"])) |
|
|
|
|
|
num_iter += 1 |
|
|
|
|
|
|
|
|
|
|
|
logger.info("Number of data in data1: {}".format(num_iter)) |
|
|
|
|
|
# Note: Each of the 4 shards has 44/4=11 samples |
|
|
|
|
|
# Note: Number of iterations is (11/5 = 2) * 3 = 6 |
|
|
|
|
|
assert num_iter == 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def skip_test_chained_sampler_05(): |
|
|
|
|
|
logger.info("Test Case Chained Sampler - PKSampler and WeightedRandom") |
|
|
|
|
|
|
|
|
|
|
|
# Create chained sampler, PKSampler and WeightedRandom |
|
|
|
|
|
sampler = ds.PKSampler(num_val=3) # Number of elements per class is 3 (and there are 4 classes) |
|
|
|
|
|
weights = [1.0, 0.1, 0.02, 0.3, 0.4, 0.05, 1.2, 0.13, 0.14, 0.015, 0.16, 0.5] |
|
|
|
|
|
child_sampler = ds.WeightedRandomSampler(weights, num_samples=12) |
|
|
|
|
|
sampler.add_child(child_sampler) |
|
|
|
|
|
# Create ImageFolderDataset with sampler |
|
|
|
|
|
data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler) |
|
|
|
|
|
|
|
|
|
|
|
# Verify dataset size |
|
|
|
|
|
data1_size = data1.get_dataset_size() |
|
|
|
|
|
logger.info("dataset size is: {}".format(data1_size)) |
|
|
|
|
|
assert data1_size == 12 |
|
|
|
|
|
|
|
|
|
|
|
# Verify number of iterations |
|
|
|
|
|
num_iter = 0 |
|
|
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary |
|
|
|
|
|
# in this example, each dictionary has keys "image" and "label" |
|
|
|
|
|
logger.info("image is {}".format(item["image"])) |
|
|
|
|
|
logger.info("label is {}".format(item["label"])) |
|
|
|
|
|
num_iter += 1 |
|
|
|
|
|
|
|
|
|
|
|
logger.info("Number of data in data1: {}".format(num_iter)) |
|
|
|
|
|
# Note: PKSampler produces 4x3=12 samples |
|
|
|
|
|
# Note: Child WeightedRandomSampler produces 12 samples |
|
|
|
|
|
assert num_iter == 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_chained_sampler_06(): |
|
|
|
|
|
logger.info("Test Case Chained Sampler - WeightedRandom and PKSampler") |
|
|
|
|
|
|
|
|
|
|
|
# Create chained sampler, WeightedRandom and PKSampler |
|
|
|
|
|
weights = [1.0, 0.1, 0.02, 0.3, 0.4, 0.05, 1.2, 0.13, 0.14, 0.015, 0.16, 0.5] |
|
|
|
|
|
sampler = ds.WeightedRandomSampler(weights=weights, num_samples=12) |
|
|
|
|
|
child_sampler = ds.PKSampler(num_val=3) # Number of elements per class is 3 (and there are 4 classes) |
|
|
|
|
|
sampler.add_child(child_sampler) |
|
|
|
|
|
# Create ImageFolderDataset with sampler |
|
|
|
|
|
data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler) |
|
|
|
|
|
|
|
|
|
|
|
# Verify dataset size |
|
|
|
|
|
data1_size = data1.get_dataset_size() |
|
|
|
|
|
logger.info("dataset size is: {}".format(data1_size)) |
|
|
|
|
|
assert data1_size == 12 |
|
|
|
|
|
|
|
|
|
|
|
# Verify number of iterations |
|
|
|
|
|
num_iter = 0 |
|
|
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary |
|
|
|
|
|
# in this example, each dictionary has keys "image" and "label" |
|
|
|
|
|
logger.info("image is {}".format(item["image"])) |
|
|
|
|
|
logger.info("label is {}".format(item["label"])) |
|
|
|
|
|
num_iter += 1 |
|
|
|
|
|
|
|
|
|
|
|
logger.info("Number of data in data1: {}".format(num_iter)) |
|
|
|
|
|
# Note: WeightedRandomSampler produces 12 samples |
|
|
|
|
|
# Note: Child PKSampler produces 12 samples |
|
|
|
|
|
assert num_iter == 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_chained_sampler_07(): |
|
|
|
|
|
logger.info("Test Case Chained Sampler - SubsetRandom and Distributed, 2 shards") |
|
|
|
|
|
|
|
|
|
|
|
# Create chained sampler, subset random and distributed |
|
|
|
|
|
indices = [0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 11] |
|
|
|
|
|
sampler = ds.SubsetRandomSampler(indices, num_samples=12) |
|
|
|
|
|
child_sampler = ds.DistributedSampler(num_shards=2, shard_id=1) |
|
|
|
|
|
sampler.add_child(child_sampler) |
|
|
|
|
|
# Create ImageFolderDataset with sampler |
|
|
|
|
|
data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler) |
|
|
|
|
|
|
|
|
|
|
|
# Verify dataset size |
|
|
|
|
|
data1_size = data1.get_dataset_size() |
|
|
|
|
|
logger.info("dataset size is: {}".format(data1_size)) |
|
|
|
|
|
assert data1_size == 12 |
|
|
|
|
|
|
|
|
|
|
|
# Verify number of iterations |
|
|
|
|
|
|
|
|
|
|
|
num_iter = 0 |
|
|
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary |
|
|
|
|
|
# in this example, each dictionary has keys "image" and "label" |
|
|
|
|
|
logger.info("image is {}".format(item["image"])) |
|
|
|
|
|
logger.info("label is {}".format(item["label"])) |
|
|
|
|
|
num_iter += 1 |
|
|
|
|
|
|
|
|
|
|
|
logger.info("Number of data in data1: {}".format(num_iter)) |
|
|
|
|
|
# Note: SubsetRandomSampler produces 12 samples |
|
|
|
|
|
# Note: Each of 2 shards has 6 samples |
|
|
|
|
|
# FIXME: Uncomment the following assert when code issue is resolved; at runtime, number of samples is 12 not 6 |
|
|
|
|
|
# assert num_iter == 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def skip_test_chained_sampler_08(): |
|
|
|
|
|
logger.info("Test Case Chained Sampler - SubsetRandom and Distributed, 4 shards") |
|
|
|
|
|
|
|
|
|
|
|
# Create chained sampler, subset random and distributed |
|
|
|
|
|
indices = [0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 11] |
|
|
|
|
|
sampler = ds.SubsetRandomSampler(indices, num_samples=12) |
|
|
|
|
|
child_sampler = ds.DistributedSampler(num_shards=4, shard_id=1) |
|
|
|
|
|
sampler.add_child(child_sampler) |
|
|
|
|
|
# Create ImageFolderDataset with sampler |
|
|
|
|
|
data1 = ds.ImageFolderDataset(DATA_DIR, sampler=sampler) |
|
|
|
|
|
|
|
|
|
|
|
# Verify dataset size |
|
|
|
|
|
data1_size = data1.get_dataset_size() |
|
|
|
|
|
logger.info("dataset size is: {}".format(data1_size)) |
|
|
|
|
|
assert data1_size == 3 |
|
|
|
|
|
|
|
|
|
|
|
# Verify number of iterations |
|
|
|
|
|
num_iter = 0 |
|
|
|
|
|
for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary |
|
|
|
|
|
# in this example, each dictionary has keys "image" and "label" |
|
|
|
|
|
logger.info("image is {}".format(item["image"])) |
|
|
|
|
|
logger.info("label is {}".format(item["label"])) |
|
|
|
|
|
num_iter += 1 |
|
|
|
|
|
|
|
|
|
|
|
logger.info("Number of data in data1: {}".format(num_iter)) |
|
|
|
|
|
# Note: SubsetRandomSampler returns 12 samples |
|
|
|
|
|
# Note: Each of 4 shards has 3 samples |
|
|
|
|
|
assert num_iter == 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_imagefolder_rename(): |
|
|
def test_imagefolder_rename(): |
|
|
logger.info("Test Case rename") |
|
|
logger.info("Test Case rename") |
|
|
# define parameters |
|
|
# define parameters |
|
|
@@ -498,6 +744,30 @@ if __name__ == '__main__': |
|
|
test_weighted_random_sampler_exception() |
|
|
test_weighted_random_sampler_exception() |
|
|
logger.info('test_weighted_random_sampler_exception Ended.\n') |
|
|
logger.info('test_weighted_random_sampler_exception Ended.\n') |
|
|
|
|
|
|
|
|
|
|
|
test_chained_sampler_01() |
|
|
|
|
|
logger.info('test_chained_sampler_01 Ended.\n') |
|
|
|
|
|
|
|
|
|
|
|
test_chained_sampler_02() |
|
|
|
|
|
logger.info('test_chained_sampler_02 Ended.\n') |
|
|
|
|
|
|
|
|
|
|
|
test_chained_sampler_03() |
|
|
|
|
|
logger.info('test_chained_sampler_03 Ended.\n') |
|
|
|
|
|
|
|
|
|
|
|
test_chained_sampler_04() |
|
|
|
|
|
logger.info('test_chained_sampler_04 Ended.\n') |
|
|
|
|
|
|
|
|
|
|
|
# test_chained_sampler_05() |
|
|
|
|
|
# logger.info('test_chained_sampler_05 Ended.\n') |
|
|
|
|
|
|
|
|
|
|
|
test_chained_sampler_06() |
|
|
|
|
|
logger.info('test_chained_sampler_06 Ended.\n') |
|
|
|
|
|
|
|
|
|
|
|
test_chained_sampler_07() |
|
|
|
|
|
logger.info('test_chained_sampler_07 Ended.\n') |
|
|
|
|
|
|
|
|
|
|
|
# test_chained_sampler_08() |
|
|
|
|
|
# logger.info('test_chained_sampler_07 Ended.\n') |
|
|
|
|
|
|
|
|
test_imagefolder_numshards() |
|
|
test_imagefolder_numshards() |
|
|
logger.info('test_imagefolder_numshards Ended.\n') |
|
|
logger.info('test_imagefolder_numshards Ended.\n') |
|
|
|
|
|
|
|
|
|