|
|
|
@@ -4421,23 +4421,7 @@ class CelebADataset(MappableDataset): |
|
|
|
The generated dataset has two columns ['image', 'attr']. |
|
|
|
The type of the image tensor is uint8. The attr tensor is uint32 and one hot type. |
|
|
|
|
|
|
|
Args: |
|
|
|
dataset_dir (str): Path to the root directory that contains the dataset. |
|
|
|
num_parallel_workers (int, optional): Number of workers to read the data (default=value set in the config). |
|
|
|
shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None). |
|
|
|
dataset_type (str): one of 'all', 'train', 'valid' or 'test'. |
|
|
|
sampler (Sampler, optional): Object used to choose samples from the dataset (default=None). |
|
|
|
decode (bool, optional): decode the images after reading (default=False). |
|
|
|
extensions (list[str], optional): List of file extensions to be |
|
|
|
included in the dataset (default=None). |
|
|
|
num_samples (int, optional): The number of images to be included in the dataset. |
|
|
|
(default=None, all images). |
|
|
|
num_shards (int, optional): Number of shards that the dataset should be divided |
|
|
|
into (default=None). |
|
|
|
shard_id (int, optional): The shard ID within num_shards (default=None). This |
|
|
|
argument should be specified only when num_shards is also specified. |
|
|
|
|
|
|
|
Citation of CelebA dataset. |
|
|
|
Citation of CelebA dataset. |
|
|
|
|
|
|
|
.. code-block:: |
|
|
|
|
|
|
|
@@ -4455,9 +4439,9 @@ class CelebADataset(MappableDataset): |
|
|
|
bibsource = {dblp computer science bibliography, https://dblp.org}, |
|
|
|
howpublished = {http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html}, |
|
|
|
description = {CelebFaces Attributes Dataset (CelebA) is a large-scale face attributes dataset |
|
|
|
with more than 200K celebrity images, each with 40 attribute annotations. The |
|
|
|
images in this dataset cover large pose variations and background clutter. CelebA |
|
|
|
has large diversities, large quantities, and rich annotations, including |
|
|
|
with more than 200K celebrity images, each with 40 attribute annotations. |
|
|
|
The images in this dataset cover large pose variations and background clutter. |
|
|
|
CelebA has large diversities, large quantities, and rich annotations, including |
|
|
|
* 10,177 number of identities, |
|
|
|
* 202,599 number of face images, and |
|
|
|
* 5 landmark locations, 40 binary attributes annotations per image. |
|
|
|
@@ -4465,6 +4449,22 @@ class CelebADataset(MappableDataset): |
|
|
|
vision tasks: face attribute recognition, face detection, landmark (or facial part) |
|
|
|
localization, and face editing & synthesis.} |
|
|
|
} |
|
|
|
|
|
|
|
Args: |
|
|
|
dataset_dir (str): Path to the root directory that contains the dataset. |
|
|
|
num_parallel_workers (int, optional): Number of workers to read the data (default=value set in the config). |
|
|
|
shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None). |
|
|
|
dataset_type (str): one of 'all', 'train', 'valid' or 'test'. |
|
|
|
sampler (Sampler, optional): Object used to choose samples from the dataset (default=None). |
|
|
|
decode (bool, optional): decode the images after reading (default=False). |
|
|
|
extensions (list[str], optional): List of file extensions to be |
|
|
|
included in the dataset (default=None). |
|
|
|
num_samples (int, optional): The number of images to be included in the dataset. |
|
|
|
(default=None, all images). |
|
|
|
num_shards (int, optional): Number of shards that the dataset should be divided |
|
|
|
into (default=None). |
|
|
|
shard_id (int, optional): The shard ID within num_shards (default=None). This |
|
|
|
argument should be specified only when num_shards is also specified. |
|
|
|
""" |
|
|
|
|
|
|
|
@check_celebadataset |
|
|
|
@@ -4542,6 +4542,24 @@ class CLUEDataset(SourceDataset): |
|
|
|
models, corpus and leaderboard. Here we bring in classification task of CLUE, which are AFQMC, TNEWS, IFLYTEK, |
|
|
|
CMNLI, WSC and CSL. |
|
|
|
|
|
|
|
Citation of CLUE dataset. |
|
|
|
|
|
|
|
.. code-block:: |
|
|
|
|
|
|
|
@article{CLUEbenchmark, |
|
|
|
title = {CLUE: A Chinese Language Understanding Evaluation Benchmark}, |
|
|
|
author = {Liang Xu, Xuanwei Zhang, Lu Li, Hai Hu, Chenjie Cao, Weitang Liu, Junyi Li, Yudong Li, |
|
|
|
Kai Sun, Yechen Xu, Yiming Cui, Cong Yu, Qianqian Dong, Yin Tian, Dian Yu, Bo Shi, Jun Zeng, |
|
|
|
Rongzhao Wang, Weijian Xie, Yanting Li, Yina Patterson, Zuoyu Tian, Yiwen Zhang, He Zhou, |
|
|
|
Shaoweihua Liu, Qipeng Zhao, Cong Yue, Xinrui Zhang, Zhengliang Yang, Zhenzhong Lan}, |
|
|
|
journal = {arXiv preprint arXiv:2004.05986}, |
|
|
|
year = {2020}, |
|
|
|
howpublished = {https://github.com/CLUEbenchmark/CLUE}, |
|
|
|
description = {CLUE, a Chinese Language Understanding Evaluation benchmark. It contains eight different |
|
|
|
tasks, including single-sentence classification, sentence pair classification, and machine |
|
|
|
reading comprehension.} |
|
|
|
} |
|
|
|
|
|
|
|
Args: |
|
|
|
dataset_files (str or list[str]): String or list of files to be read or glob strings to search for a pattern of |
|
|
|
files. The list will be sorted in a lexicographical order. |
|
|
|
@@ -4564,24 +4582,6 @@ class CLUEDataset(SourceDataset): |
|
|
|
shard_id (int, optional): The shard ID within num_shards (default=None). This |
|
|
|
argument should be specified only when num_shards is also specified. |
|
|
|
|
|
|
|
Citation of CLUE dataset. |
|
|
|
|
|
|
|
.. code-block:: |
|
|
|
|
|
|
|
@article{CLUEbenchmark, |
|
|
|
title = {CLUE: A Chinese Language Understanding Evaluation Benchmark}, |
|
|
|
author = {Liang Xu, Xuanwei Zhang, Lu Li, Hai Hu, Chenjie Cao, Weitang Liu, Junyi Li, Yudong Li, |
|
|
|
Kai Sun, Yechen Xu, Yiming Cui, Cong Yu, Qianqian Dong, Yin Tian, Dian Yu, Bo Shi, Jun Zeng, |
|
|
|
Rongzhao Wang, Weijian Xie, Yanting Li, Yina Patterson, Zuoyu Tian, Yiwen Zhang, He Zhou, |
|
|
|
Shaoweihua Liu, Qipeng Zhao, Cong Yue, Xinrui Zhang, Zhengliang Yang, Zhenzhong Lan}, |
|
|
|
journal = {arXiv preprint arXiv:2004.05986}, |
|
|
|
year = {2020}, |
|
|
|
howpublished = {https://github.com/CLUEbenchmark/CLUE}, |
|
|
|
description = {CLUE, a Chinese Language Understanding Evaluation benchmark. It contains eight different |
|
|
|
tasks, including single-sentence classification, sentence pair classification, and machine |
|
|
|
reading comprehension.} |
|
|
|
} |
|
|
|
|
|
|
|
Examples: |
|
|
|
>>> import mindspore.dataset as ds |
|
|
|
>>> dataset_files = ["/path/to/1", "/path/to/2"] # contains 1 or multiple text files |
|
|
|
|