import numpy as np
import time
import os
import collections

import matplotlib.pyplot as plt
import uctc.nn as nn 
from utils import parameter_data, Dataset

use_graphics = False

class DigitClassificationModel(object):
    """
    A model for handwritten digit classification using the MNIST dataset.

    Each handwritten digit is a 28x28 pixel grayscale image, which is flattened
    into a 784-dimensional vector for the purposes of this model. Each entry in
    the vector is a floating point number between 0 and 1.

    The goal is to sort each digit into one of 10 classes (number 0 through 9).

    (See RegressionModel for more information about the APIs of different
    methods here. We recommend that you implement the RegressionModel before
    working on this part of the project.)
    """
    def __init__(self):
        # Initialize your model parameters here
        "*** YOUR CODE HERE ***"
        self.input_features = 784
        self.h1 = 200
        self.h2 = 100
        self.output_features = 10
        self.lr = 0.01
        self.batch_size = 100
        self.w1 = nn.Parameter(parameter_data(self.input_features, self.h1))
        self.b1 = nn.Parameter(parameter_data(1, self.h1))
        self.w2 = nn.Parameter(parameter_data(self.h1, self.h2))
        self.b2 = nn.Parameter(parameter_data(1, self.h2))
        self.w3 = nn.Parameter(parameter_data(self.h2, self.output_features))
        self.b3 = nn.Parameter(parameter_data(1, self.output_features))


    def run(self, x):
        """
        Runs the model for a batch of examples.

        Your model should predict a node with shape (batch_size x 10),
        containing scores. Higher scores correspond to greater probability of
        the image belonging to a particular class.

        Inputs:
            x: a node with shape (batch_size x 784)
        Output:
            A node with shape (batch_size x 10) containing predicted scores
                (also called logits)
        """
        "*** YOUR CODE HERE ***"
        l1 = nn.ReLU(nn.AddBias(nn.Linear(x, self.w1), self.b1))
        l2 = nn.ReLU(nn.AddBias(nn.Linear(l1, self.w2), self.b2))
        l3 = nn.AddBias(nn.Linear(l2, self.w3), self.b3)
        return l3

    def get_loss(self, x, y):
        """
        Computes the loss for a batch of examples.

        The correct labels `y` are represented as a node with shape
        (batch_size x 10). Each row is a one-hot vector encoding the correct
        digit class (0-9).

        Inputs:
            x: a node with shape (batch_size x 784)
            y: a node with shape (batch_size x 10)
        Returns: a loss node
        """
        "*** YOUR CODE HERE ***"
        return nn.SoftmaxLoss(self.run(x), y)

    def train(self, dataset):
        """
        Trains the model.
        """
        "*** YOUR CODE HERE ***"
        while True:
            for x, y in dataset.iterate_once(self.batch_size):
                loss = self.get_loss(x, y)
                g_w1, g_b1, g_w2, g_b2, g_w3, g_b3 = nn.gradients(loss, [self.w1, self.b1, self.w2, self.b2, self.w3, self.b3])
                self.w1.update(g_w1, self.lr)
                self.b1.update(g_b1, self.lr)
                self.w2.update(g_w2, self.lr)
                self.b2.update(g_b2, self.lr)
                self.w3.update(g_w3, self.lr)
                self.b3.update(g_b3, self.lr)
            accuracy = dataset.get_validation_accuracy()
            print(accuracy)
            if accuracy > 0.95:
                break

def get_data_path(filename):
    path = os.path.join(
        os.path.dirname(__file__), os.pardir, "data", filename)
    if not os.path.exists(path):
        path = os.path.join(
            os.path.dirname(__file__), "data", filename)
    if not os.path.exists(path):
        path = os.path.join(
            os.path.dirname(__file__), filename)
    if not os.path.exists(path):
        raise Exception("Could not find data file: {}".format(filename))
    return path

class DigitClassificationDataset(Dataset):
    def __init__(self, model: DigitClassificationModel):
        mnist_path = get_data_path("mnist.npz")

        with np.load(mnist_path) as data:
            train_images = data["train_images"]
            train_labels = data["train_labels"]
            test_images = data["test_images"]
            test_labels = data["test_labels"]
            assert len(train_images) == len(train_labels) == 60000
            assert len(test_images) == len(test_labels) == 10000
            self.dev_images = np.array(test_images[0::2], copy=True)
            self.dev_labels = np.array(test_labels[0::2], copy=True)
            self.test_images = np.array(test_images[1::2], copy=True)
            self.test_labels = np.array(test_labels[1::2], copy=True)

        train_labels_one_hot = np.zeros((len(train_images), 10))
        train_labels_one_hot[range(len(train_images)), train_labels] = 1

        super().__init__(train_images, train_labels_one_hot)

        self.model = model
        self.epoch = 0

        if use_graphics:
            width = 20  # Width of each row expressed as a multiple of image width
            samples = 100  # Number of images to display per label
            fig = plt.figure()
            ax = {}
            images = collections.defaultdict(list)
            texts = collections.defaultdict(list)
            for i in reversed(range(10)):
                ax[i] = plt.subplot2grid((30, 1), (3 * i, 0), 2, 1,
                                         sharex=ax.get(9))
                plt.setp(ax[i].get_xticklabels(), visible=i == 9)
                ax[i].set_yticks([])
                ax[i].text(-0.03, 0.5, i, transform=ax[i].transAxes,
                           va="center")
                ax[i].set_xlim(0, 28 * width)
                ax[i].set_ylim(0, 28)
                for j in range(samples):
                    images[i].append(ax[i].imshow(
                        np.zeros((28, 28)), vmin=0, vmax=1, cmap="Greens",
                        alpha=0.3))
                    texts[i].append(ax[i].text(
                        0, 0, "", ha="center", va="top", fontsize="smaller"))
            ax[9].set_xticks(np.linspace(0, 28 * width, 11))
            ax[9].set_xticklabels(
                ["{:.1f}".format(num) for num in np.linspace(0, 1, 11)])
            ax[9].tick_params(axis="x", pad=16)
            ax[9].set_xlabel("Probability of Correct Label")
            status = ax[0].text(
                0.5, 1.5, "", transform=ax[0].transAxes, ha="center",
                va="bottom")
            plt.show(block=False)

            self.width = width
            self.samples = samples
            self.fig = fig
            self.images = images
            self.texts = texts
            self.status = status
        self.last_update = time.time()

    def iterate_once(self, batch_size):
        self.epoch += 1

        for i, (x, y) in enumerate(super().iterate_once(batch_size)):
            yield x, y

            if time.time() - self.last_update > 1:
                dev_logits = self.model.run(nn.Constant(self.dev_images)).tensor()
                # dev_logits = np.array(dev_logits_raw.data()).reshape(5000, 10)
                # dev_predicted = np.argmax(dev_logits, axis=1)
                dev_argmax = nn.argmax(dev_logits, axis=1)
                dev_predicted = np.array(dev_argmax.data())
                # sftmax = np.array(nn.log_softmax(nn.pyarray_to_tensor(dev_logits)).data()).reshape(5000, 10)
                sftmax = nn.log_softmax(dev_logits)
                dev_probs = np.array(nn.exp(sftmax).data()).reshape(5000, 10)
                dev_accuracy = np.mean(dev_predicted == self.dev_labels)
                print("epoch: {:d}, batch: {:d}/{:d}, validation accuracy: "
                        "{:.2%}".format(
                            self.epoch, i, len(self.x) // batch_size, dev_accuracy))
                if use_graphics:
                    self.status.set_text(
                        "epoch: {:d}, batch: {:d}/{:d}, validation accuracy: "
                        "{:.2%}".format(
                            self.epoch, i, len(self.x) // batch_size, dev_accuracy))
                    for i in range(10):
                        predicted = dev_predicted[self.dev_labels == i]
                        probs = dev_probs[self.dev_labels == i][:, i]
                        linspace = np.linspace(
                            0, len(probs) - 1, self.samples).astype(int)
                        indices = probs.argsort()[linspace]
                        for j, (prob, image) in enumerate(zip(
                                probs[indices],
                                self.dev_images[self.dev_labels == i][indices])):
                            self.images[i][j].set_data(image.reshape((28, 28)))
                            left = prob * (self.width - 1) * 28
                            if predicted[indices[j]] == i:
                                self.images[i][j].set_cmap("Greens")
                                self.texts[i][j].set_text("")
                            else:
                                self.images[i][j].set_cmap("Reds")
                                self.texts[i][j].set_text(predicted[indices[j]])
                                self.texts[i][j].set_x(left + 14)
                            self.images[i][j].set_extent([left, left + 28, 0, 28])
                    self.fig.canvas.draw_idle()
                    self.fig.canvas.start_event_loop(1e-3)
                self.last_update = time.time()

    def get_validation_accuracy(self):
        # print(self.dev_images[:2].tolist())
        dev_logits = self.model.run(nn.Constant(self.dev_images)).tensor()
        dev_predicted = np.array(nn.argmax(dev_logits, axis=1).data())
        dev_accuracy = np.mean(dev_predicted == self.dev_labels)
        return dev_accuracy

model = DigitClassificationModel()
dataset = DigitClassificationDataset(model)
model.train(dataset)