import numpy as np import uctc.nn as nn np.random.seed(42) def parameter_data(*shape): assert len(shape) == 2, ( "Shape must have 2 dimensions, instead has {}".format(len(shape))) assert all(isinstance(dim, int) and dim > 0 for dim in shape), ( "Shape must consist of positive integers, got {!r}".format(shape)) limit = np.sqrt(3.0 / np.mean(shape)) data = np.random.uniform(low=-limit, high=limit, size=shape).astype(np.float32) return data class Dataset(object): def __init__(self, x, y): assert isinstance(x, np.ndarray) assert isinstance(y, np.ndarray) assert np.issubdtype(x.dtype, np.floating) assert np.issubdtype(y.dtype, np.floating) assert x.ndim == 2 assert y.ndim == 2 assert x.shape[0] == y.shape[0] self.x = x self.y = y def iterate_once(self, batch_size): assert isinstance(batch_size, int) and batch_size > 0, ( f"Batch size should be a positive integer, got {batch_size}") assert self.x.shape[0] % batch_size == 0, ( f"Dataset size {self.x.shape[0]} is not divisible by batch size {batch_size}") index = 0 while index < self.x.shape[0]: x = self.x[index:index + batch_size] y = self.y[index:index + batch_size] yield nn.Constant(x), nn.Constant(y) index += batch_size def iterate_forever(self, batch_size): while True: yield from self.iterate_once(batch_size) def get_validation_accuracy(self): raise NotImplementedError( "No validation data is available for this dataset. " "In this assignment, only the Digit Classification and Language " "Identification datasets have validation data.")